In [30]:
import os
import sys
from dotenv import load_dotenv

import pandas as pd

In [14]:
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))

# Path to the .env file
env_path = os.path.join(project_root, ".env")

# Load the .env file
load_dotenv(env_path)

True

In [15]:
openai_api_key = os.getenv("OPENAI_API_KEY")

In [16]:
repo_root = "/Users/ankitagrawal/Desktop/mlops_project/market-movement-forecast/"
sys.path.append(os.path.join(repo_root, "scripts"))


In [31]:
# Python script imports
from finbert_news_classifier import FinBERTNewsClassifier
from gpt4o_sentiment import analyze_sentiment_dataframe
from category_sentiment_scores import compute_category_sentiment_scores

In [None]:
sample_df = pd.DataFrame(
    {
        "date": [
            "2024-01-02",
            "2024-01-02",
            "2024-01-02",
            "2024-01-02",
            "2024-01-02",
            "2024-01-02",
            "2024-01-02",
            "2024-01-02",
        ],
        "text": [
            "Tesla shares rally after record vehicle deliveries and strong Q4 guidance.",
            "Oil prices tumble as OPEC signals potential production increase.",
            "U.S. President announces new trade measures targeting Chinese tech firms.",
            "U.S. economy crashes",
            "Apple and Google to invest 500 million in AI research partnership in the US",
            "U.S. economy shows no signs of recovery amid rising unemployment rates.",
            "U.S. Federal Reserve hints at possible interest rate hike in upcoming meeting.",
            "U.S. federal reserve hints at positive economic outlook despite inflation concerns.",
        ],
    }
)

sample_df


Unnamed: 0,date,text
0,2024-01-02,Tesla shares rally after record vehicle delive...
1,2024-01-02,Oil prices tumble as OPEC signals potential pr...
2,2024-01-02,U.S. President announces new trade measures ta...
3,2024-01-02,U.S. economy crashes
4,2024-01-02,Apple and Google to invest 500 million in AI r...
5,2024-01-02,U.S. economy shows no signs of recovery amid r...
6,2024-01-02,U.S. Federal Reserve hints at possible interes...
7,2024-01-02,U.S. federal reserve hints at positive economi...


In [None]:
classifier = FinBERTNewsClassifier()  # uses default categories

df_with_topics = classifier.classify_dataframe(sample_df, text_col="text")
df_with_topics


Loading FinBERT model: ProsusAI/finbert ...
✓ FinBERT model loaded.
✓ Category embeddings built for 9 categories.


Classifying texts: 100%|██████████| 1/1 [00:00<00:00,  5.84it/s]


Unnamed: 0,date,text,topic_category,topic_confidence
0,2024-01-02,Tesla shares rally after record vehicle delive...,Corporate,0.417803
1,2024-01-02,Oil prices tumble as OPEC signals potential pr...,Economy,0.894801
2,2024-01-02,U.S. President announces new trade measures ta...,US Politics,0.569499
3,2024-01-02,U.S. economy crashes,Economy,0.684717
4,2024-01-02,Apple and Google to invest 500 million in AI r...,Technology,0.775182
5,2024-01-02,U.S. economy shows no signs of recovery amid r...,Economy,0.989989
6,2024-01-02,U.S. Federal Reserve hints at possible interes...,Economy,0.934091
7,2024-01-02,U.S. federal reserve hints at positive economi...,Economy,0.983869


In [None]:
df_sent, daily = analyze_sentiment_dataframe(df_with_topics, text_col="text", date_col="date")

df_sent

Sentiment analysis (GPT-4o-mini): 100%|██████████| 8/8 [00:09<00:00,  1.22s/it]


Unnamed: 0,date,text,topic_category,topic_confidence,sentiment,confidence,raw_output
0,2024-01-02,Tesla shares rally after record vehicle delive...,Corporate,0.417803,Positive,0.95,Sentiment: Positive \nConfidence: 0.95
1,2024-01-02,Oil prices tumble as OPEC signals potential pr...,Economy,0.894801,Negative,0.85,Sentiment: Negative \nConfidence: 0.85
2,2024-01-02,U.S. President announces new trade measures ta...,US Politics,0.569499,Negative,0.85,Sentiment: Negative \nConfidence: 0.85
3,2024-01-02,U.S. economy crashes,Economy,0.684717,Negative,0.95,Sentiment: Negative \nConfidence: 0.95
4,2024-01-02,Apple and Google to invest 500 million in AI r...,Technology,0.775182,Positive,0.9,Sentiment: Positive \nConfidence: 0.90
5,2024-01-02,U.S. economy shows no signs of recovery amid r...,Economy,0.989989,Negative,0.85,Sentiment: Negative \nConfidence: 0.85
6,2024-01-02,U.S. Federal Reserve hints at possible interes...,Economy,0.934091,Negative,0.85,Sentiment: Negative \nConfidence: 0.85
7,2024-01-02,U.S. federal reserve hints at positive economi...,Economy,0.983869,Positive,0.85,Sentiment: Positive \nConfidence: 0.85


In [20]:
daily

Unnamed: 0_level_0,Corporate_Positive,Corporate_Negative,Corporate_Neutral,Economy_Positive,Economy_Negative,Economy_Neutral,US Politics_Positive,US Politics_Negative,US Politics_Neutral,Technology_Positive,Technology_Negative,Technology_Neutral,Total_Articles
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-01-02,1.0,0,0,1.0,4.0,0,0,1.0,0,1.0,0,0,8.0


In [21]:
# %load_ext autoreload
# %autoreload 2

In [22]:
CATEGORY_LIST = [
    "Corporate",
    "Technology",
    "Geo-Political",
    "US Politics",
    "Economy",
    "Energy",
    "Healthcare",
    "Automobile",
    "Airlines",
]

In [None]:
score_df = compute_category_sentiment_scores(daily, categories=CATEGORY_LIST)


In [26]:
score_df["overall_sentiment"] = (
    (df_sent[df_sent['sentiment'] == 'Positive'].shape[0] - df_sent[df_sent['sentiment'] == 'Negative'].shape[0]) / df_sent.shape[0]
)

In [29]:
import json
import pandas as pd
import requests

with open("../feature_names.json", "r") as f:
    feature_list = json.load(f)

df_new = score_df.copy(deep=True)
X_new = df_new[feature_list]

# Build the dataframe_split payload MLflow expects
payload = {
    "dataframe_split": X_new.to_dict(orient="split")
}

url = "http://127.0.0.1:5001/invocations"
headers = {"Content-Type": "application/json"}

response = requests.post(url, headers=headers, data=json.dumps(payload))

print("Status:", response.status_code)
print("Raw response text:", response.text)  # for debugging

try:
    print("Predictions:", response.json())
except Exception:
    print("Response is not valid JSON.")


Status: 200
Raw response text: {"predictions": ["slightly_up"]}
Predictions: {'predictions': ['slightly_up']}
