In [1]:
import pandas as pd
from pathlib import Path

In [8]:
sentiment_df = pd.DataFrame()

In [4]:
# read in tickers csv to create dictionary of tickers

file = Path('./tickers.csv')
tickers_df = pd.read_csv(file)
tickers_df.dropna(inplace=True)
tickers_df.sort_values(by=['End'])
tickers_df = tickers_df.iloc[0:720]
tickers_dict = tickers_df.set_index('Ticker').to_dict()['Name']

In [9]:
# read in sentiment csvs from api calls to create one dataframe. If calls resulted in no results, add in as zero value.

for key, value in tickers_dict.items():
    sentiments = []
    ticker_file = Path(f"./data_files/webhose_sentiments/{key}.csv")
    try:
        sentiment_df = sentiment_df.append(pd.read_csv(ticker_file))
        
    except:
        sentiments.append({
                "ticker": key,
                "compound": 0,
                "positive": 0,
                "negative": 0,
                "neutral": 0
            
        })
        sentiment_df = sentiment_df.append(pd.DataFrame(sentiments))

In [24]:
sentiment_df.head()

Unnamed: 0,compound,name,negative,neutral,positive,ticker
0,0.9789,REBGLO,0.007,0.89,0.103,REP
1,0.7518,REBGLO,0.074,0.83,0.097,REP
2,0.9982,REBGLO,0.086,0.787,0.127,REP
3,0.7518,REBGLO,0.074,0.83,0.097,REP
4,0.9844,REBGLO,0.014,0.841,0.145,REP


In [26]:
# set the index to the ticker and remove the 'name' column

sentiment_df = sentiment_df.set_index("ticker")
sentiment_df = sentiment_df.drop(columns=['name'])
sentiment_df.head()

Unnamed: 0_level_0,compound,name,negative,neutral,positive
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
REP,0.9789,REBGLO,0.007,0.89,0.103
REP,0.7518,REBGLO,0.074,0.83,0.097
REP,0.9982,REBGLO,0.086,0.787,0.127
REP,0.7518,REBGLO,0.074,0.83,0.097
REP,0.9844,REBGLO,0.014,0.841,0.145


In [28]:
# The average sentiment for each ticker

sentiment_avg = sentiment_df.groupby(['ticker']).mean()
sentiment_avg.head()

Unnamed: 0_level_0,compound,negative,neutral,positive
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1ST,0.260376,0.02819,0.92276,0.04911
21MCoin,0.477738,0.03135,0.8962,0.07246
3DT,0.991483,0.060833,0.818167,0.120833
8BT,0.0,0.0,0.0,0.0
A2A,0.427912,0.02964,0.89717,0.07318


In [29]:
# The number of posts for each ticker

sentiment_count = sentiment_df.groupby(['ticker']).count()
sentiment_count.head()

Unnamed: 0_level_0,compound,negative,neutral,positive
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1ST,100,100,100,100
21MCoin,100,100,100,100
3DT,6,6,6,6
8BT,1,1,1,1
A2A,100,100,100,100


In [30]:
sentiment_count = sentiment_count.drop(columns=['negative','neutral','positive'])
sentiment_count = sentiment_count.rename(columns={'compound':'no_of_posts'})
sentiment_count.head()

Unnamed: 0_level_0,no_of_posts
ticker,Unnamed: 1_level_1
1ST,100
21MCoin,100
3DT,6
8BT,1
A2A,100


In [46]:
# if the api call returned no results then the post count value should also be zero

for index in sentiment_count.iterrows():
    if sentiment_avg.loc[index[0]]['compound'] == 0.0000:
        sentiment_count.loc[index[0]] = 0

In [47]:
sentiment_count.head()

Unnamed: 0_level_0,no_of_posts
ticker,Unnamed: 1_level_1
1ST,100
21MCoin,100
3DT,6
8BT,0
A2A,100


In [51]:
# concantenate both dataframes into one

ticker_sentiments = pd.concat([sentiment_avg, sentiment_count], axis=1)
ticker_sentiments.head()

Unnamed: 0_level_0,compound,negative,neutral,positive,no_of_posts
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1ST,0.260376,0.02819,0.92276,0.04911,100
21MCoin,0.477738,0.03135,0.8962,0.07246,100
3DT,0.991483,0.060833,0.818167,0.120833,6
8BT,0.0,0.0,0.0,0.0,0
A2A,0.427912,0.02964,0.89717,0.07318,100


In [52]:
len(ticker_sentiments)

703

In [53]:
# create a csv of the single dataframe

file = Path(f"./ticker_sentiments.csv")
ticker_sentiments.to_csv(file)