# Chained Prompting for Summary Generation  


By providing a a sequence of chained promopts, use llama3 or OpenAI LLMs to summarise a collection of articles on wealth management news and advice.

Inputs:
- Articles in .txt format in the relative folder destination `./insight_docs`
- Choice of LLM `params['model']`
- Hyperparameters in the variables `params['model']` and `params['temperature']`
- OpenAI API key set as the variable `os.environ['OPENAI_API_KEY']`

Outputs:
- Log file saved in the home directory as `./zeroshot_summarise_ubs_week_log.csv`

In [None]:
from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama
from langchain_community.document_loaders import TextLoader
from langchain.chains.llm import LLMChain
from langchain_core.prompts import ChatPromptTemplate

import os
os.environ['OPENAI_API_KEY'] = "[key]"

from datetime import datetime
import time
import pandas as pd

### Summarisation Hyperparameters  

Adjust the following variables to match your preferences:
- Articles in .txt format in the relative folder destination `./insight_docs`
- Choice of LLM `params['model']`
- Hyperparameters in the variables `params['model']` and `params['temperature']`

In [None]:
log_file = "./custom_summarise_ubs_week_log.csv"
params = {}

doc_folder = 'insights_docs'
params['doc_names'] = [os.path.join(doc_folder,f) for f in os.listdir(doc_folder) if os.path.isfile(os.path.join(doc_folder,f))]
params['doc_names'].sort()
params['latest_article_date'] = params['doc_names'][-1][:10]

# Latest models available as of 25 June 2024
#params['model'] = "llama2:70b"
#params['model'] = "llama3"
params['model'] = "gpt-3.5-turbo-0125"
#params['model'] = "gpt-4-turbo-2024-04-09"

params['reduce_token_max'] = 4000
params['temperature'] = 0

if 'llama3' in params['model']:
    llm = Ollama(model=params['model'], num_ctx = 4096,num_predict=-1, num_gpu=1, temperature=params['temperature'],
                    stop=["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"])
elif 'llama2' in params['model']:
    llm = Ollama(model=params['model'], num_ctx = 4096, num_predict=-1, num_gpu=1, temperature=params['temperature'])   # ,num_ctx=2048
else:
    llm = ChatOpenAI(temperature=0, model_name=params['model'])

### Prompts  
Setup your prompt templates:
- System prompt
- "Map" prompt for summarising individual articles
- "Reduce" prompt to combine all summarised information into an appropriate output format.

In [None]:
if 'llama' in params['model']:
    # System Prompt
    params['system'] = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an experienced investment analyst who writes for the Chief Investment Office at a wealth management firm.<|eot_id|>"
    # User Prompt
    params['map_template'] = '''<|begin_of_text|><|start_header_id|>user<|end_header_id|>The following text delimited in triple quotes is an article:
"""{docs}"""
Based on this article, please identify all key news and calls to action.
Helpful Answer:<|eot_id|>'''
else:
    # System Prompt
    params['system'] = "You are an experienced investment analyst who writes for the Chief Investment Office at a wealth management firm."
    # User Prompt
    params['map_template'] = '''The following text delimited in triple quotes is an article:
"""{docs}"""   
Based on this article, please identify all key news and calls to action.
Helpful Answer:'''
map_prompt = ChatPromptTemplate.from_messages([("system", params['system']),("user", params['map_template'])])
map_chain = LLMChain(llm=llm, prompt=map_prompt)

# Re-write all summaries using the following structure. Do not leave out any information.
if 'llama' in params['model']:
    params['reduce_template'] = '''<|begin_of_text|><|start_header_id|>user<|end_header_id|>The following is a set of seven article summaries:
"""{doc_summaries}"""
Rewrite all seven article summaries using the following structure. Your entire response should be 2000 words long.
Key News: [in prose form]
Actionable Insights: [in point form]<|eot_id|>'''
else:
    params['reduce_template'] ='''The following is a set of seven article summaries:
"""{doc_summaries}"""
Rewrite all seven article summaries using the following structure. Your entire response should be 2000 words long.
Key News: [in prose form]
Actionable Insights: [in point form]'''
reduce_prompt = ChatPromptTemplate.from_messages([("system", params['system']),("user", params['reduce_template'])])
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

Load documents

In [None]:
docs = []
# Load docs
for f in params['doc_names']:
    loader = TextLoader(f,encoding='UTF-8')
    docs.append(loader.load()[0])

### Map
Summarise each document individually.

In [None]:
map_results = []

start = time.time()
for doc_text in docs:
    output = map_chain.invoke(doc_text)
    map_results.append(output['text'])
    print(output['text'])
end = time.time()

params['map_duration'] = end - start

params['intermediate_outputs'] = map_results

Combine all summaries into a single string `mapped_docs`.  
Each article summary is pre-fixed by the article title.

In [None]:
# Get the titles of all articles
titles = [f[11:-4] for f in os.listdir(doc_folder) if os.path.isfile(os.path.join(doc_folder,f))]

mapped_docs = ''
i = 0
for doc in params['intermediate_outputs']:
    mapped_docs = mapped_docs+ "Article "+str(i+1)+": "+titles[i] + "\n" + doc + "\n\n"
    i = i + 1
params['intermediate_outputs'] = mapped_docs
print(mapped_docs)

### Reduce  
Combine the summaries of all articles into a single "weekly digest" with an output format as demanded by you.

In [None]:
start = time.time()
params['summary'] = reduce_chain.invoke(mapped_docs)['text']
end = time.time()

params['reduce_duration'] = end - start
print(params['summary'])

### Write Log File  
Write all input hyperparameters and LLM outputs into a log file saved in the home directory as `./zeroshot_summarise_ubs_week_log.csv`

In [None]:
# datetime object containing current date and time
now = datetime.now()
params['experiment_date'] = now.strftime("%Y-%m-%d")
params['experiment_time'] = now.strftime("%H:%M:%S")
try:  
    all_params_df = pd.read_csv(log_file)
except:
    all_params_df = pd.DataFrame()

params_df = pd.DataFrame([params])
all_params_df = pd.concat([all_params_df,params_df])

all_params_df.to_csv(log_file, index=False)