##### Import statements

In [1]:
import pandas as pd

##### Load sentiment score

In [2]:
sentiment_df = pd.read_csv( "data/result/clean-data-with-sentiment.csv", sep=";")

In [3]:
sentiment_df.head()

Unnamed: 0,date,text,neg,neu,pos,compound
0,2024-03-08T14:00:00.000Z,What’s Next For Southwest Stock After A 20% Ri...,0.0,1.0,0.0,0.0
1,2024-03-08T13:57:54.752Z,Another Strong Jobs Number But A Smaller Numbe...,0.104,0.701,0.195,0.1027
2,2024-03-08T13:25:15.999Z,One Size Does NOT Fit All - Talking To Women A...,0.129,0.734,0.137,0.3318
3,2024-03-08T13:00:00.000Z,Should You Pick CVS Stock At $75 After A 6% Fa...,0.094,0.906,0.0,-0.4588
4,2024-03-08T13:00:00.000Z,Communicating Effectively In The Competitive P...,0.0,0.642,0.358,0.8689


##### Group sentiment scores by date (averaging scores)

In [4]:
dates = []

for i in range( len( sentiment_df)):
	dates.append( sentiment_df[ "date"][ i][ :10])

sentiment_df[ "published_date"] = dates

In [5]:
sentiment_df_group_by_date = sentiment_df[ [ "published_date", "neg", "neu", "pos", "compound"]].groupby( "published_date").mean()

In [6]:
sentiment_df_group_by_date.head()

Unnamed: 0_level_0,neg,neu,pos,compound
published_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-08-01,0.071105,0.857632,0.071263,-0.038842
2020-08-02,0.047577,0.847115,0.105269,0.264081
2020-08-03,0.07006,0.841299,0.088701,0.061833
2020-08-04,0.052134,0.851244,0.096622,0.175166
2020-08-05,0.070892,0.839919,0.089257,0.079282


##### Function for merging historical data with sentiment scores

In [7]:
def merge_historical_with_sentiment( historical, sentiment):
	dates = []

	for i in range( len( historical)):
		dates.append( historical[ "Date"][ i][ :10])

	historical[ "day"] = dates

	merged = pd.merge(
		left=historical,
		right=sentiment,
		how="left",
		left_on="day",
		right_on="published_date"
	)

	new_df = pd.DataFrame(
		data={
			"Open": merged[ "Open"],
			"High": merged[ "High"],
			"Low": merged[ "Low"],
			"Close": merged[ "Close"],
			"Volume": merged[ "Volume"],
			"Neg": merged[ "neg"],
			"Neu": merged[ "neu"],
			"Pos": merged[ "pos"],
			"Compound": merged[ "compound"]
		}
	)
	new_df.index = merged[ "day"]
	new_df.index.name = "Date"

	return new_df

##### Prepare S&P 500

In [8]:
sp500_data = pd.read_csv( "data/input/sp500.csv")

In [9]:
sp500_with_sentiment = merge_historical_with_sentiment( sp500_data, sentiment_df_group_by_date)

In [10]:
sp500_with_sentiment.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Neg,Neu,Pos,Compound
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-08-03,3288.26001,3302.72998,3284.530029,3294.610107,4438940000,0.07006,0.841299,0.088701,0.061833
2020-08-04,3289.919922,3306.840088,3286.370117,3306.51001,4414380000,0.052134,0.851244,0.096622,0.175166
2020-08-05,3317.370117,3330.77002,3317.370117,3327.77002,4535060000,0.070892,0.839919,0.089257,0.079282
2020-08-06,3323.169922,3351.030029,3318.139893,3349.159912,4278930000,0.076577,0.815756,0.107628,0.156222
2020-08-07,3340.050049,3352.540039,3328.719971,3351.280029,4110030000,0.072143,0.843195,0.084597,0.079914


In [11]:
sp500_with_sentiment.to_csv( "data/result/sp500-with-sentiment.csv")

##### Prepare DJIA

In [12]:
djia_data = pd.read_csv( "data/input/djia.csv")

In [13]:
djia_with_sentiment = merge_historical_with_sentiment( djia_data, sentiment_df_group_by_date)

In [14]:
djia_with_sentiment.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Neg,Neu,Pos,Compound
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-08-03,26542.320312,26707.259766,26534.380859,26664.400391,439380000,0.07006,0.841299,0.088701,0.061833
2020-08-04,26664.609375,26832.720703,26597.820312,26828.470703,346350000,0.052134,0.851244,0.096622,0.175166
2020-08-05,26924.779297,27221.669922,26924.779297,27201.519531,366000000,0.070892,0.839919,0.089257,0.079282
2020-08-06,27170.820312,27394.099609,27145.25,27386.980469,322460000,0.076577,0.815756,0.107628,0.156222
2020-08-07,27321.679688,27456.240234,27223.550781,27433.480469,321170000,0.072143,0.843195,0.084597,0.079914


In [15]:
djia_with_sentiment.to_csv( "data/result/djia-with-sentiment.csv")

##### Prepare NYSE

In [16]:
nyse_data = pd.read_csv( "data/input/nyse.csv")

In [17]:
nyse_with_sentiment = merge_historical_with_sentiment( nyse_data, sentiment_df_group_by_date)

In [18]:
nyse_with_sentiment.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Neg,Neu,Pos,Compound
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-08-03,12513.209961,12562.269531,12470.469727,12536.80957,4438940000,0.07006,0.841299,0.088701,0.061833
2020-08-04,12548.929688,12612.469727,12532.610352,12612.110352,4414380000,0.052134,0.851244,0.096622,0.175166
2020-08-05,12612.089844,12751.360352,12612.089844,12731.549805,4535060000,0.070892,0.839919,0.089257,0.079282
2020-08-06,12702.879883,12731.290039,12659.30957,12729.169922,4278930000,0.076577,0.815756,0.107628,0.156222
2020-08-07,12655.700195,12766.049805,12650.679688,12765.839844,4110030000,0.072143,0.843195,0.084597,0.079914


In [19]:
nyse_with_sentiment.to_csv( "data/result/nyse-with-sentiment.csv")

##### Prepare NASDAQ

In [20]:
nasdaq_data = pd.read_csv( "data/input/nasdaq.csv")

In [21]:
nasdaq_with_sentiment = merge_historical_with_sentiment( nasdaq_data, sentiment_df_group_by_date)

In [22]:
nasdaq_with_sentiment.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Neg,Neu,Pos,Compound
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-08-03,10848.639648,10927.55957,10831.150391,10902.799805,4203890000,0.07006,0.841299,0.088701,0.061833
2020-08-04,10897.889648,10941.910156,10852.900391,10941.169922,4016520000,0.052134,0.851244,0.096622,0.175166
2020-08-05,10967.870117,11002.110352,10943.719727,10998.400391,4160740000,0.070892,0.839919,0.089257,0.079282
2020-08-06,10989.980469,11121.19043,10963.410156,11108.070312,4120000000,0.076577,0.815756,0.107628,0.156222
2020-08-07,11072.530273,11126.040039,10920.370117,11010.980469,4263930000,0.072143,0.843195,0.084597,0.079914


In [23]:
nasdaq_with_sentiment.to_csv( "data/result/djia-with-sentiment.csv")