In [69]:
import pandas as pd
import plotly.express as px
from dotenv import load_dotenv
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import ta

In [2]:
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

# Load Alpaca credentials
load_dotenv()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/niroren/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


False

In [59]:
def score_sentiment(news_df):
    news_df["sentiment"] = news_df["headline"].apply(lambda x: sid.polarity_scores(x)["compound"])
    print("done")
    return news_df

In [None]:
clean = pd.read_csv("data/cleaned_news.csv").drop(columns=['Unnamed: 0'])

# add sentiments, and extract date
news_df = score_sentiment(clean)
news_df.created_at = pd.to_datetime(news_df.created_at)
news_df['date'] = news_df.created_at.dt.date

In [11]:
tickers = ['AAPL','GOOGL']

In [None]:
ticker_news = {}
daily_sentiment = {}
for symbol in tickers:
    # extract news articles that have to do with the ticker
    ticker_news[symbol] = news_df[news_df['symbols'].apply(lambda x: f"'{symbol}'" in x)]
    
    # calculate average sentiment per day
    daily_sentiment[symbol] = ticker_news[symbol].groupby('date')['sentiment'].mean()


In [79]:
# merge sentiment, add other indicators

price_dfs = {}
for ticker in tickers:
    price_dfs[ticker] = pd.read_csv(f'data/{ticker}.csv').set_index('Date')
    price_dfs[ticker]['news_sentiment'] = (daily_sentiment[ticker])
    price_dfs[ticker]['return'] = price_dfs[ticker]['Close'].diff()
    price_dfs[ticker]['fwd_return'] = price_dfs[ticker]['Close'].diff().shift(-1)

    # EMA
    price_dfs[ticker]['ema'] = ta.trend.ema_indicator(
        price_dfs[ticker]['Close'], window=20)
    
    # RSI
    price_dfs[ticker]['rsi_14'] = ta.momentum.rsi(price_dfs[ticker]['Close'], window=14)

    # PVT
    price_dfs[ticker]['pvt'] = ta.volume.volume_price_trend(price_dfs[ticker]['Close'], price_dfs[ticker]['Volume'])

    # MACD
    price_dfs[ticker]['macd'] = ta.trend.macd(price_dfs[ticker]['Close'])
    price_dfs[ticker]['macd_signal'] = ta.trend.macd_signal(price_dfs[ticker]['Close'])
    price_dfs[ticker]['macd_diff'] = ta.trend.macd_diff(price_dfs[ticker]['Close'])

In [80]:
price_dfs['AAPL']

Unnamed: 0_level_0,Open,High,Low,Close,Volume,news_sentiment,return,fwd_return,ema,rsi_14,pvt,macd,macd_signal,macd_diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-12-30,128.41,129.95,127.43,129.93,77034211,-0.012300,,-0.32,,,,,,
2022-12-29,127.99,130.48,127.73,129.61,75703711,0.015200,-0.32,-3.57,,,-1.864480e+05,,,
2022-12-28,129.67,131.03,125.87,126.04,85438391,0.122017,-3.57,3.99,,,-2.539778e+06,,,
2022-12-27,131.38,131.41,128.72,130.03,69007828,0.039906,3.99,1.83,,,-3.552233e+05,,,
2022-12-23,130.92,132.42,129.64,131.86,63814887,0.121450,1.83,0.37,,,5.428867e+05,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-08,241.92,243.71,240.05,242.70,37628941,0.220200,5.85,-0.49,233.209777,60.551618,5.086962e+07,1.149247,0.073271,1.075976
2025-01-07,242.98,245.55,241.35,242.21,40855961,0.034377,-0.49,2.79,234.066941,59.935485,5.078714e+07,1.727602,0.404137,1.323465
2025-01-06,244.31,247.33,243.20,245.00,45045566,0.073488,2.79,-1.64,235.108185,62.288453,5.130601e+07,2.383606,0.800031,1.583575
2025-01-03,243.36,244.18,241.89,243.36,40244113,0.057607,-1.64,0.49,235.894072,60.055729,5.103663e+07,2.739579,1.187941,1.551639
