In [None]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob

In [None]:
# Load the dataset
def load_data(filepath):
    return pd.read_csv(filepath)

# Compute sentiment scores
def compute_sentiment(df, text_col="headline"):
    df[text_col] = df[text_col].astype(str)
    df["Polarity"] = df[text_col].apply(lambda x: TextBlob(x).sentiment.polarity)
    df["Subjectivity"] = df[text_col].apply(lambda x: TextBlob(x).sentiment.subjectivity)
    return df

# Filter news for a specific company
def filter_company_news(df, company_name):
    return df[df["headline"].str.contains(company_name, case=False, na=False)]

# Summarize sentiment
def summarize_sentiment(df):
    return {
        "average_polarity": df["Polarity"].mean(),
        "average_subjectivity": df["Subjectivity"].mean(),
        "most_positive": df.loc[df["Polarity"].idxmax()]["headline"],
        "most_negative": df.loc[df["Polarity"].idxmin()]["headline"],
    }

In [None]:
csv_file = "..data/raw_analyst_ratings.csv"
df = load_data(csv_file)

In [None]:
df = compute_sentiment(df)

# Companies to analyze
companies = ["AAPL", "GOOG", "AMZN", "TSLA", "MSFT"]

In [None]:
for company in companies:
    print(f"\nSentiment summary for {company}:")
    filtered_df = filter_company_news(df, company)
    summary = summarize_sentiment(filtered_df)
    for k, v in summary.items():
        print(f"{k}: {v}")

    # Plot sentiment distribution
    plt.figure(figsize=(8, 4))
    plt.hist(filtered_df["Polarity"], bins=20, color="skyblue", edgecolor="black")
    plt.title(f"Sentiment Polarity Distribution for {company}")
    plt.xlabel("Polarity")
    plt.ylabel("Frequency")
    plt.tight_layout()
    plt.show()