In [1]:
import os

import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
from openai import OpenAI

In [2]:
load_dotenv(dotenv_path='../.env')
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
client = OpenAI()

## Preprocessing with GPT

In [5]:
def parse_article_gpt(article, instructions):


    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": instructions},
            {
                "role": "user",
                "content": f"{article}"
            }
        ]
    )

    return completion.choices[0].message.content

### Test prompt

In [6]:
parse_article_gpt("Tesla reported record quarterly deliveries on Wednesday, putting the electric-car maker within reach of its goal for the year. But analysts remain skeptical that the company will show a profit for the period.","Evaluate, in one paragraph, how this piece of news would affect the Tesla stock price, while only using strictly either postiive or negative words before passing it into another sentiment scoring model.")

"This piece of news is likely to have a mixed effect on Tesla's stock price. The record quarterly deliveries serve as a strong positive indicator of the company's growth and market demand for its vehicles, suggesting potential for increased revenue. However, the skepticism regarding profitability introduces a negative sentiment that could concern investors about the sustainability of this growth. Overall, the combination of record deliveries and profit uncertainty could lead to volatility in Tesla's stock price, influencing investors' sentiments either positively or negatively depending on their focus on growth potential versus profitability concerns."

### Load Data

### Tesla

In [None]:
tesla_headlines = pd.read_csv('../data/Tesla/tesla_headlines.csv')

In [None]:
instructions_tesla = "Evaluate, in one paragraph, how this piece of news would affect the Tesla stock price, while only using strictly either postiive or negative words before passing it into another sentiment scoring model."


In [None]:
for i, row in tqdm(tesla_headlines.iterrows(), total=tesla_headlines.shape[0]):
    combined_string = f"{row['lead_paragraph']} \n {row['abstract']}"
    gpt_summary = parse_article_gpt(combined_string, instructions_tesla)
    tesla_headlines.loc[i, 'gpt_summary'] = gpt_summary


100%|██████████| 1365/1365 [6:31:58<00:00, 17.23s/it]  


In [None]:
tesla_headlines.to_csv('../data/Tesla/tesla_gpt_summarised_sentiments.csv')

### S&P

In [None]:
spy_headlines = pd.read_csv('../data/S&P_market/s&p_headlines.csv')

In [None]:
instructions_spy = "Evaluate, in one paragraph, how this piece of news would affect the S&P index price, while only using strictly either postiive or negative words before passing it into another sentiment scoring model."


In [None]:
for i, row in tqdm(spy_headlines.iterrows(), total=spy_headlines.shape[0]):
    combined_string = f"{row['lead_paragraph']} \n {row['abstract']}"
    gpt_summary = parse_article_gpt(combined_string, instructions_spy)
    spy_headlines.loc[i, 'gpt_summary'] = gpt_summary


100%|██████████| 1365/1365 [6:31:58<00:00, 17.23s/it]  


In [None]:
spy_headlines.to_csv('../data/S&P_market/s&p_gpt_summarised_sentiments.csv')