# Zeroshot Prompting for Summary Generation  


By providing a single zeroshot prompt, use llama3 or OpenAI LLMs to summarise a collection of articles on wealth management news and advice.

Inputs:
- Articles in .txt format in the relative folder destination `./insight_docs`
- Choice of LLM `params['model']`
- Hyperparameters in the variables `params['model']` and `params['temperature']`
- OpenAI API key set as the variable `os.environ['OPENAI_API_KEY']`

Outputs:
- Log file saved in the home directory as `./zeroshot_summarise_ubs_week_log.csv`

In [None]:
from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama
from langchain_community.document_loaders import TextLoader
from langchain.chains.llm import LLMChain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document

import os
os.environ['OPENAI_API_KEY'] = "[key]"

from datetime import datetime
import time
import pandas as pd

### Summarisation Hyperparameters  
Adjust the following variables to match your preferences:
- Articles in .txt format in the relative folder destination `./insight_docs`
- Choice of LLM `params['model']`
- Hyperparameters in the variables `params['model']` and `params['temperature']`

In [None]:
log_file = "./zeroshot_summarise_ubs_week_log.csv"
params = {}

doc_folder = 'insight_docs'
params['doc_names'] = [os.path.join(doc_folder,f) for f in os.listdir(doc_folder) if os.path.isfile(os.path.join(doc_folder,f))]
params['doc_names'].sort()
params['latest_article_date'] = params['doc_names'][-1][:10]

# Latest models available as of 25 June 2024
#params['model'] = "llama2:70b"
#params['model'] = "llama3"
params['model'] = "gpt-3.5-turbo-0125"
params['model'] = "gpt-4-turbo-2024-04-09"

params['temperature'] = 0

if 'llama3' in params['model']:
    llm = Ollama(model=params['model'], num_ctx = 4096,num_predict=-1, num_gpu=1, temperature=params['temperature'],
                    stop=["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"])
elif 'llama2' in params['model']:
    llm = Ollama(model=params['model'], num_ctx = 4096, num_predict=-1, num_gpu=1, temperature=params['temperature'])   # ,num_ctx=2048
else:
    llm = ChatOpenAI(temperature=0, model_name=params['model'])

### Prompts  
Setup your prompt templates:
- System prompt
- Summarisation template to summarise all information into an appropriate output format.

In [None]:
# System Prompt
params['system'] = "You are an experienced investment analyst who writes for the Chief Investment Office at a wealth management firm."

# User Prompt
params['summarise_template'] = '''There is a set of seven articles in quotation marks below. Take these articles and distill them into multiple bite-sized paragraphs. Each paragraph must be structured as shown below. 
Summary: [summary of key news]
Actionable Insights: [point form]
"""{docs}"""   
'''
summarise_prompt = ChatPromptTemplate.from_messages([("system", params['system']),("user", params['summarise_template'])])
summarise_chain = LLMChain(llm=llm, prompt=summarise_prompt)


### Load documents

In [None]:
titles = [f[11:-4] for f in os.listdir(doc_folder) if os.path.isfile(os.path.join(doc_folder,f))]

docs = []
# Load docs
for f in params['doc_names']:
    loader = TextLoader(f,encoding='UTF-8')
    docs.append(loader.load()[0])

Combine all articles into a single string `all_docs`.  
Each article summary is pre-fixed by the article title.

In [None]:
all_docs = ''

for i in range(len(docs)):
    all_docs = all_docs+ "Article "+str(i+1)+": "+titles[i] + "\n" + docs[i].page_content + "\n\n"

print(all_docs)

### Summarise

In [None]:
start = time.time()
params['summary'] = summarise_chain.invoke(all_docs)['text']
end = time.time()

params['duration'] = end - start
print(params['summary'])

### Write Log File  
Write all input hyperparameters and LLM outputs into a log file saved in the home directory as `./zeroshot_summarise_ubs_week_log.csv`

In [None]:
# datetime object containing current date and time
now = datetime.now()
params['experiment_date'] = now.strftime("%Y-%m-%d")
params['experiment_time'] = now.strftime("%H:%M:%S")
try:  
    all_params_df = pd.read_csv(log_file)
except:
    all_params_df = pd.DataFrame()

params_df = pd.DataFrame([params])
all_params_df = pd.concat([all_params_df,params_df])

all_params_df.to_csv(log_file, index=False)