# Zeroshot Prompting for Blurb Generation  


By providing a single zeroshot prompt, use llama3 or OpenAI LLMs to produce a blurb for a collection of articles from Towards Data Science.

Inputs:
- Articles in .txt format in the relative folder destination `./tds`
- Choice of LLM `params['model']`
- Hyperparameters in the variables `params['model']` and `params['temperature']`
- OpenAI API key set as the variable `os.environ['OPENAI_API_KEY']`

Outputs:
- Log file saved in the home directory as `./zeroshot_summarise_ubs_week_log.csv`

In [None]:
from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama
from langchain_community.document_loaders import TextLoader
from langchain.chains.llm import LLMChain
from langchain_core.prompts import ChatPromptTemplate

import os
os.environ['OPENAI_API_KEY'] = "[key]"

from datetime import datetime
import pandas as pd

### Summarisation Hyperparameters  

Adjust the following variables to match your preferences:
- Articles in .txt format in the relative folder destination `./tds`
- Choice of LLM `params['model']`
- Hyperparameters in the variables `params['model']` and `params['temperature']`

In [None]:
log_file = "./zeroshot_blurb.csv"
params = {}

doc_folder = 'tds'
params['doc_names'] = [os.path.join(doc_folder,f) for f in os.listdir(doc_folder) if os.path.isfile(os.path.join(doc_folder,f))]
params['doc_names'].sort()
params['latest_article_date'] = params['doc_names'][-1][:10]

# Latest models available as of 25 June 2024
#params['model'] = "llama2:70b"
#params['model'] = "llama3"
params['model'] = "gpt-3.5-turbo-0125"
params['model'] = "gpt-4-turbo-2024-04-09"

params['temperature'] = 0

if 'llama3' in params['model']:
    llm = Ollama(model=params['model'], num_ctx = 4096,num_predict=-1, num_gpu=1, temperature=params['temperature'],
                    stop=["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"])
elif 'llama2' in params['model']:
    llm = Ollama(model=params['model'], num_ctx = 4096, num_predict=-1, num_gpu=1, temperature=params['temperature'])   # ,num_ctx=2048
else:
    llm = ChatOpenAI(temperature=0, model_name=params['model'])

### Prompts  
Setup your prompt templates:
- System prompt
- Summarisation template to write a blurb for a magazine article

In [None]:
if 'llama' in params['model']:
    # System Prompt
    params['system'] = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are a an experienced writer with expertise in presenting technical information to a general audience in a tone that is educational, informative, and approachable.<|eot_id|>"
    # User Prompt
    params['user'] = "<|start_header_id|>user<|end_header_id|>The following is a magazine article:\n{docs}\n Based on this article, write a paragraph under 100 words that will convince readers to read the full article. Start the paragraph with a key reason, following up with key points and a brief context that helps to situate the article within broader trends or ongoing discussions. \nHelpful Answer:<|eot_id|>"
else:
    params['system'] = "You are a an experienced writer with expertise in presenting technical information to a general audience in a tone that is educational, informative, and approachable.<|eot_id|>"
    params['user'] = "The following is a magazine article:\n{docs}\n Based on this article, write a paragraph under 100 words that will convince readers to read the full article. Start the paragraph with a key reason, following up with key points and a brief context that helps to situate the article within broader trends or ongoing discussions. \nHelpful Answer:"

prompt = ChatPromptTemplate.from_messages([("system", params['system']),("user", params['user'])])
chain = LLMChain(llm=llm, prompt=prompt)

### Load Magazine Articles

In [None]:
doc_texts = []
for doc in params['doc_names']:
    with open(doc, encoding="utf-8") as fd:
        doc_texts.append(fd.read())

### Generate Blurbs

In [None]:
params['blurbs'] = []

for doc_text in doc_texts:
    output = chain.invoke(doc_text)
    params['blurbs'].append(output['text'])
    print(output['text'])

### Write Log File  
Write all input hyperparameters and LLM outputs into a log file saved in the home directory as `./zeroshot_summarise_ubs_week_log.csv`

In [None]:
# datetime object containing current date and time
now = datetime.now()
params['experiment_date'] = now.strftime("%Y-%m-%d")
params['experiment_time'] = now.strftime("%H:%M:%S")
try:  
    all_params_df = pd.read_csv(log_file)
except:
    all_params_df = pd.DataFrame()

params_df = pd.DataFrame([params])
all_params_df = pd.concat([all_params_df,params_df])

all_params_df.to_csv(log_file, index=False)