# LLM Prompts and Chains

### Imports

In [1]:
import pandas as pd
import numpy as np

from tqdm import tqdm
import os

import openai
from dotenv import load_dotenv, find_dotenv


_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [2]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

from langchain.output_parsers import ResponseSchema
from langchain.chains import SequentialChain, LLMChain
from langchain.output_parsers import StructuredOutputParser

In [3]:
# Set Model Version
llm_model = 'gpt-3.5-turbo-0125'

### Output Response Schema

In [4]:
# Sentiment Extraction
sentiment_schema = ResponseSchema(name="sentiment",
                                description='''Determine the degree of sentiment of the news article with respect to {company}.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.''')

# Evidence Extraction
evidence_schema = ResponseSchema(name="evidence",
                                description='''Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.''')

# Stock Movement Extraction
stock_movement_schema = ResponseSchema(name="stock_movement",
                                description='''Given your expertise in the field, determine stock movement of the {company}.
Output as Up or Down.''')

# Explination Extraction
explaination_schema = ResponseSchema(name="explaination",
                                description='''Explain your thoughts and thinking process. Output as string.''')

response_schemas = [
    sentiment_schema, 
    evidence_schema,
    stock_movement_schema,
    explaination_schema
]

# Output Parsers and Format Instructions for LLM
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

### LLM Chains

In [5]:
# 1st Chain
llm = ChatOpenAI(temperature=0.0, model=llm_model)

# Company Extraction Prompt
first_prompt = ChatPromptTemplate.from_template(
    '''
    You are an expert in the field of finance news, stock market and trading. Which company does the news article talk about majorly? Output only one company name.
    article: {text}
    '''
)

# Chain 1
chain_one = LLMChain(llm=llm, prompt=first_prompt, output_key='company')

In [6]:
# 2nd Chain
second_prompt = ChatPromptTemplate.from_template(
    """\
    You are an expert in analyzing financial news, stock market and trading for {company} given below. For the following financial news article, do the following:

    sentiment: Determine the degree of sentiment of the news article with respect to {company}.
    Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.

    evidence: Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.

    stock_movement: Given your expertise in the field, determine stock movement of the {company}.
    Output as Up or Down.

    Explaination: Explain your thoughts and thinking process. Output as string.

    article: {text}
    {format_instructions}
    """
)

# chain 2
chain_two = LLMChain(llm=llm, prompt=second_prompt, output_key='json_output')

In [7]:
# overall_chain: input = Text 
# and output= company, json_output
overall_chain = SequentialChain(
    chains=[chain_one, chain_two],
    input_variables=["text", "format_instructions"],
    output_variables=["company", "json_output"],
    verbose=False
)

### Run on All Content

In [2]:
df = pd.read_csv('../data/final/artilces_with_content.csv')

In [13]:
df_outputs = []
not_worked = []

for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        article_text = f'Title: {row['Title']}, Content: {row['Content']}'
        outputs = overall_chain.invoke({
            'text': article_text,
            'format_instructions': format_instructions
        })
        output_dict = {'GOID': row['GOID'], 'company': outputs['company']}
        output_dict.update(output_parser.parse(outputs['json_output']))
        df_outputs.append(output_dict)
    except:
        not_worked.append(row['GOID'])
        
    if i % 2 == 0:
        pd.DataFrame.from_dict(df_outputs).to_csv('../data/final/llm_outputs.csv', index=False)
        np.array(not_worked).tofile('../data/final/did_not_work.txt', sep=',')

  4%|▍         | 125/2781 [59:28<20:53:19, 28.31s/it]

## LLM Prompts

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

from langchain.output_parsers import ResponseSchema
from langchain.chains import SequentialChain, LLMChain
from langchain.output_parsers import StructuredOutputParser

import os
import openai

import pandas as pd
from tqdm import tqdm

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

chat = ChatOpenAI(temperature=0.0, model=llm_model)


In [None]:
df = pd.read_csv('sf.csv')
news_article = df.iloc[0, 1]

In [None]:
sentiment_schema = ResponseSchema(name="sentiment",
                             description="What is the degree of sentiment\
                             of the news article? \
                             Answer in the range of (-1, 1) where,\
                             -1 being Negative and \
                             1 being Positive.")
evidence_schema = ResponseSchema(name="evidence",
                                      description="Extract any\
                                    sentences that provide evidence \
                                    for the sentiment of the article, \
                                    and output them as a \
                                    comma separated Python list.")
prediction_schema = ResponseSchema(name="prediction",
                                    description="For the company talked \
                                    about in the article, What would be\
                                    the movement of its stock price after the publishing of the article? \
                                    Answer Up if upwards,\
                                    Down if downwards \
                                    and Not Known if not known.")

response_schemas = [sentiment_schema, 
                    evidence_schema,
                    prediction_schema]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

In [None]:
review_template_2 = """\
For the following news article, extract the following information:

sentiment: What is the sentiment of the news article? 
Answer in the range of (-1, 1) where, -1 being Negative and 1 being Positive.

evidence: Extract any sentences that provide evidence for the sentiment of the article,
and output them as a comma separated Python list.

prediction: For the company talked about in the article, What would be the movement of its stock price after the publishing of the article?
Answer Up if upwards, Down if downwards and Not Known if not known.

news article: {text}

{format_instructions}
"""

prompt = ChatPromptTemplate.from_template(template=review_template_2)

messages = prompt.format_messages(text=news_article, 
                                format_instructions=format_instructions)

6.283799999999999

In [None]:
outputs = []
output_not_correct = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        output_dict = {'GOID': row['GOID']}
        messages = prompt.format_messages(text=row['Content'], 
                                    format_instructions=format_instructions)
        response = chat(messages)
        output_dict.update(output_parser.parse(response.content))
        outputs.append(output_dict)
    except:
        output_not_correct[row['GOID']] = response.content
        
    if i == 30:
        break

## Summary and ticker

In [None]:
summary_schema = ResponseSchema(name="summary",
                             description="Write a summary of the news article in at most 200 words.")
company_schema = ResponseSchema(name="companies",
                                description='''Out of Apple, Microsoft, Salesforce and Alphabet, which 
                                companies does the entire news article talks about? 
                                Output them as a comma separated Python list.''')

response_schemas = [summary_schema, 
                    company_schema]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

review_template_2 = """\
For the following news article, extract the following information:

summary: Write a summary of the news article in at most 200 words.

companies: Out of Apple, Microsoft, Salesforce and Alphabet, which companies does the entire news article talks about? 
Output them as a comma separated Python list.

news article: {text}

{format_instructions}
"""

## Per Company

In [None]:
sentiment_schema = ResponseSchema(name="sentiment",
                                description='''Determine the degree of sentiment of the news article with respect to {company}.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.''')

evidence_schema = ResponseSchema(name="evidence",
                                description='''Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.''')

stock_movement_schema = ResponseSchema(name="stock_movement",
                                description='''Given your expertise in the field, determine stock movement of the {company}.
Output as Up or Down.''')

explaination_schema = ResponseSchema(name="explaination",
                                description='''Explain your thoughts and thinking process. Output as string.''')

response_schemas = [sentiment_schema, 
                    evidence_schema,
                    stock_movement_schema,
                    explaination_schema
                   ]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

review_template_2 = """\
You are an expert in analyzing financial news, stock market and trading for {company} given below. For the following financial news article, do the following:

sentiment: Determine the degree of sentiment of the news article with respect to {company}.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.

evidence: Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.

stock_movement: Given your expertise in the field, determine stock movement of the {company}.
Output as Up or Down.

Explaination: Explain your thoughts and thinking process. Output as string.

article: {text}
{format_instructions}
"""

In [None]:
apple_template = """You are an expert in analyzing financial news, stock market and trading for Apple. For the following financial news article, do the following:

sentiment: Determine the degree of sentiment of the news article with respect to Apple.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.

evidence: Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.

stock_movement: Given your expertise in the field, determine stock movement of the Apple.
Output as Up or Down.

Explaination: Explain your thoughts and thinking process. Output as string.

article: {text}
{format_instructions}"""


alphabet_template = """You are an expert in analyzing financial news, stock market and trading for Alphabet. For the following financial news article, do the following:

sentiment: Determine the degree of sentiment of the news article with respect to Alphabet.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.

evidence: Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.

stock_movement: Given your expertise in the field, determine stock movement of the Alphabet.
Output as Up or Down.

Explaination: Explain your thoughts and thinking process. Output as string.

article: {text}
{format_instructions}"""

microsoft_template = """You are an expert in analyzing financial news, stock market and trading for Microsoft. For the following financial news article, do the following:

sentiment: Determine the degree of sentiment of the news article with respect to Microsoft.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.

evidence: Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.

stock_movement: Given your expertise in the field, determine stock movement of the Microsoft.
Output as Up or Down.

Explaination: Explain your thoughts and thinking process. Output as string.

article: {text}
{format_instructions}"""


salesforce_template = """You are an expert in analyzing financial news, stock market and trading for Salesforce. For the following financial news article, do the following:

sentiment: Determine the degree of sentiment of the news article with respect to Salesforce.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.

evidence: Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.

stock_movement: Given your expertise in the field, determine stock movement of the Salesforce.
Output as Up or Down.

Explaination: Explain your thoughts and thinking process. Output as string.

article: {text}
{format_instructions}"""

## Sequential Chains

In [None]:
sentiment_schema = ResponseSchema(name="sentiment",
                                description='''Determine the degree of sentiment of the news article with respect to {company}.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.''')

evidence_schema = ResponseSchema(name="evidence",
                                description='''Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.''')

stock_movement_schema = ResponseSchema(name="stock_movement",
                                description='''Given your expertise in the field, determine stock movement of the {company}.
Output as Up or Down.''')

explaination_schema = ResponseSchema(name="explaination",
                                description='''Explain your thoughts and thinking process. Output as string.''')

response_schemas = [sentiment_schema, 
                    evidence_schema,
                    stock_movement_schema,
                    explaination_schema
                   ]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

review_template_2 = """\
You are an expert in analyzing financial news for {company} given below. For the following financial news article, do the following:

sentiment: Determine the degree of sentiment of the news article with respect to {company}.
Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.

evidence: Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.

stock_movement: Given your expertise in the field, determine stock movement of the {company}.
Output as Up or Down.

Explaination: Explain your thoughts and thinking process. Output as string.

article: {text}
{format_instructions}
"""

In [None]:
llm = ChatOpenAI(temperature=0.0, model=llm_model)

# prompt template 1
first_prompt = ChatPromptTemplate.from_template(
    '''
    You are an expert in the field of finance news, stock market and trading. Which company does the news article talk about majorly? Output only one company name.
    article: {text}
    '''
)

# Chain 1
chain_one = LLMChain(llm=llm, prompt=first_prompt, output_key='company')

In [None]:
# prompt template 2
second_prompt = ChatPromptTemplate.from_template(
    """\
    You are an expert in analyzing financial news, stock market and trading for {company} given below. For the following financial news article, do the following:

    sentiment: Determine the degree of sentiment of the news article with respect to {company}.
    Output in the range of (-1, 1) where -1 is Negative, and 1 is Positive.

    evidence: Extract any sentences that provide evidence for the extracted sentiment, and output as a Python List.

    stock_movement: Given your expertise in the field, determine stock movement of the {company}.
    Output as Up or Down.

    Explaination: Explain your thoughts and thinking process. Output as string.

    article: {text}
    {format_instructions}
    """
)
# chain 2
chain_two = LLMChain(llm=llm, prompt=second_prompt, output_key='json_output')

In [None]:
# overall_chain: input= Review 
# and output= English_Review,summary, followup_message
overall_chain = SequentialChain(
    chains=[chain_one, chain_two],
    input_variables=["text", "format_instructions"],
    output_variables=["company", "json_output"],
    verbose=False
)

In [None]:
outputs = overall_chain({
    'text': df.iloc[16, 1],
    'format_instructions': format_instructions
})

print(outputs['company'], output_parser.parse(outputs['json_output']))

In [None]:
df_outputs = []
not_worked = []

for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        outputs = overall_chain({
            'text': row['Content'],
            'format_instructions': format_instructions
        })
        output_dict = {'GOID': row['GOID'], 'company': outputs['company']}
        output_dict.update(output_parser.parse(outputs['json_output']))
        df_outputs.append(output_dict)
    except:
        not_worked.append(row['GOID'])
        
    if i == 2:
        break