# Unsupervised Learning Trading Strategy

## 1. Load Twitter Sentiment Data

In [9]:
# Load twitter sentiment dataset, set the index, calculate engagement ratio, and filter out stocks with no significant twitter activity.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import yfinance as yf
import os
plt.style.use('ggplot')
data_folder = '/Users/vishnu/home/resume_projects/TradingAlgorithm'
sentiment_df = pd.read_csv(os.path.join(data_folder, 'sentiment_data.csv'))

sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])
sentiment_df = sentiment_df.set_index(['date', 'symbol'])
sentiment_df['engagement_ratio'] = sentiment_df['twitterComments']/sentiment_df['twitterLikes']

# Filtering Only Stocks That Have 20+ Likes and 10+ Comments
sentiment_df = sentiment_df[(sentiment_df['twitterLikes'] > 20) & (sentiment_df['twitterLikes']>10)]
sentiment_df

Unnamed: 0_level_0,Unnamed: 1_level_0,twitterPosts,twitterComments,twitterLikes,twitterImpressions,twitterSentiment,engagement_ratio
date,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-18,AAPL,811.0,2592.0,21674.0,7981808.0,,0.119590
2021-11-18,AMD,150.0,675.0,2949.0,1645270.0,,0.228891
2021-11-18,AMZN,557.0,1315.0,12969.0,5590695.0,,0.101396
2021-11-18,ATVI,82.0,36.0,131.0,1310715.0,,0.274809
2021-11-18,BA,61.0,55.0,342.0,425847.0,,0.160819
...,...,...,...,...,...,...,...
2023-01-04,TMO,21.0,2.0,32.0,30857.0,0.610020,0.062500
2023-01-04,TSLA,6767.0,540711.0,3810688.0,55464921.0,0.543057,0.141893
2023-01-04,TSN,35.0,168.0,460.0,57207.0,0.561900,0.365217
2023-01-04,V,132.0,1008.0,5943.0,139835.0,0.567286,0.169611


## 2. Aggregate Monthly and Calculate Average Sentiment for the Month

In [17]:
# Aggregate on a montly level and calculating average monthly metrics for chosen stocks
aggregated_df = (sentiment_df.reset_index('symbol').groupby([pd.Grouper(freq='M'), 'symbol'])
 [['engagement_ratio']].mean())

# Stocks with highest engagement ratio have highest rank
aggregated_df['rank'] = (aggregated_df.groupby(level=0)['engagement_ratio']
                         .transform(lambda x: x.rank(ascending=False)))

aggregated_df

Unnamed: 0_level_0,Unnamed: 1_level_0,engagement_ratio,rank
date,symbol,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-11-30,AAL,0.203835,38.0
2021-11-30,AAPL,0.256318,23.0
2021-11-30,ABBV,0.244677,26.0
2021-11-30,ABT,0.285456,17.0
2021-11-30,AES,0.864613,2.0
...,...,...,...
2023-01-31,TMO,0.243042,39.0
2023-01-31,TSLA,0.151992,73.0
2023-01-31,TSN,0.280553,27.0
2023-01-31,V,0.194045,61.0
