In [2]:
import pandas as pd
import yfinance as yf

In [17]:
stock_symbol = "AAPL"
start_date = "2024-12-01"
end_date = "2025-02-01"

print(f"Fetching stock price data for {stock_symbol} from {start_date} to {end_date}...")
stock_data = yf.download(stock_symbol, start=start_date, end=end_date)
stock_data.reset_index(inplace=True)


stock_data.columns = ["Date", "Open", "High", "Low", "Close", "Volume"]
stock_data["Date"] = stock_data["Date"].astype(str)

print("Fixed Stock Data Columns:", stock_data.columns)

[*********************100%***********************]  1 of 1 completed

📈 Fetching stock price data for AAPL from 2024-12-01 to 2025-02-01...
📊 Fixed Stock Data Columns: Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')





In [None]:
sentiment_data = pd.read_csv("../data/sentiment_stock_data.csv")

if "Date" not in sentiment_data.columns:
    print("No Date column found in Sentiment Data. Assigning estimated dates...")
    sentiment_data["Date"] = pd.date_range(end=pd.Timestamp.today(), periods=len(sentiment_data), freq="D")

sentiment_data["Date"] = pd.to_datetime(sentiment_data["Date"]).dt.strftime("%Y-%m-%d")

if "Close" in sentiment_data.columns:
    print("Removing existing `Close` column from Sentiment Data before merging.")
    sentiment_data.drop(columns=["Close"], inplace=True)


df_merged = pd.merge(sentiment_data, stock_data, on="Date", how="left")
df_merged.rename(columns={"Close_y": "Close"}, inplace=True)
df_merged.drop(columns=["Close_x"], inplace=True, errors="ignore")
df_merged.fillna(method="ffill", inplace=True)
df_merged["Close_Change"] = df_merged["Close"].diff().fillna(0)
df_merged["Price_Volatility"] = ((df_merged["High"] - df_merged["Low"]) / df_merged["Close"]) * 100
df_merged["Tweet_Count"] = df_merged.groupby("Date")["Tweet"].transform("count")

print("Data merged successfully! Sample:")
print(df_merged.head())

⚠️ Removing existing `Close` column from Sentiment Data before merging.
✅ Data merged successfully! Sample:
                                               Tweet  Sentiment_Score  \
0  Crypto Daily : \n\n#BTCUSD  Bitcoin back so do...           0.0000   
1  Cutoshi barely down, #AAPL barely down. Pocket...           0.4939   
2  36 Featured Stock Charts updated at 01/31/2025...           0.2960   
3  Portfolio of February 2025 - Our Choices:\nhtt...           0.0000   
4  Portfolio of January 2025 - Results:\nhttps://...           0.0000   

         Date  Stock Movement        Open        High         Low       Close  \
0  2025-01-23               0  223.660004  227.029999  222.300003  224.740005   
1  2025-01-24               0  222.779999  225.630005  221.410004  224.779999   
2  2025-01-25               0  222.779999  225.630005  221.410004  224.779999   
3  2025-01-26               0  222.779999  225.630005  221.410004  224.779999   
4  2025-01-27               1  229.860001  232.1

  df_merged.fillna(method="ffill", inplace=True)


In [22]:
df_merged["Date"] = pd.to_datetime(df_merged["Date"])
df_merged = df_merged[df_merged["Date"].dt.weekday < 5]
df_merged["Sentiment_MA3"] = df_merged["Sentiment_Score"].rolling(3, min_periods=1).mean()
df_merged["Next_Close"] = df_merged["Close"].shift(-1)
df_merged["Stock Movement"] = (df_merged["Next_Close"] > df_merged["Close"]).astype(int)
df_merged.drop(columns=["Next_Close"], inplace=True)

df_merged.to_csv("../data/final_stock_sentiment.csv", index=False)

print("Final Cleaned CSV Saved Successfully!")


Final Cleaned CSV Saved Successfully!
