## Potential sentiment-time analyses plots

As next, we analyze the changing of sentiment of quotes over time. To make things easier to understand and analyze (not as plot 1), we added some lowpass smoothing to be able to observe the trends a bit better.

In [None]:
def getPartOfDataFrame(df, start_date, end_date):
    df_dates = df[df['date'] >= start_date][df['date'] <= end_date]
    return df_dates

def sentimentAcrossDates(df, start_date='2019-01-01', end_date='2019-12-31'):
    df_dates = getPartOfDataFrame(df, start_date, end_date)
    date_range = df_dates.date
    unique_dates = np.unique(date_range)
    
    date_avgSentiment = ([(date, np.mean(df.query(f'date == "{date}"')[['date', 'sentiment']])) for date in tqdm(unique_dates)])
    dates, avgSentiments = list(zip(*date_avgSentiment))
    avgSentiments = np.concatenate(avgSentiments)
    
    return np.array(dates), avgSentiments

def smooth_filter(y, l=51):
    y_filtered = np.convolve(y, np.ones(l)/l, mode='same')
    return y_filtered

def moving_average(y, L_=200):
    y_filtered = []
    for s in range(L_,len(y)):
        y_filtered.append(np.mean(y[s-L_:s]))
    
    return y_filtered

def plotSentimentTimeseries(df, start_date, end_date):
    
    dates, unfiltered = sentimentAcrossDates(df, start_date=start_date, end_date=end_date)
    filtered_once = smooth_filter(unfiltered, 51)
    filtered_twice = smooth_filter(filtered_once, 21)
    
    months = ['01-01', '02-01', '03-01', '04-01', '05-01', '06-01', '07-01', '08-01', '09-01', '10-01', '11-01', '12-01']
    xtick_pos = np.round(np.arange(0, len(dates), step=len(dates)/12, dtype=int))

    fig, axs = plt.subplots(3, figsize=(12,8), sharex=True)

    axs[0].plot(np.arange(len(unfiltered)), unfiltered, '--.')
    axs[0].axhline(color='grey', linestyle='--')

    axs[1].plot(np.arange(len(filtered_once)), filtered_once)
    axs[1].plot(np.arange(len(filtered_once)), filtered_once-np.mean(filtered_once), '-')
    axs[1].axhline(color='grey', linestyle='--')
    axs[1].set_ylabel("Average sentiment", size=12)

    axs[2].plot(np.arange(len(filtered_twice)), filtered_twice, label='Lowpass')
    axs[2].plot(np.arange(len(filtered_twice)), filtered_twice-np.mean(filtered_twice), label='Lowpass with zero mean')
    axs[2].axhline(color='grey', linestyle='--', label='Neutral sentiment')

    plt.xlabel("Date", size=12)
    plt.xticks(xtick_pos, dates[xtick_pos], rotation=45)
    plt.legend(bbox_to_anchor=(1,1.45), loc="lower left")
    fig.suptitle(f"Daily sentiment of quotes between {start_date} and {end_date} \n(w. smoothness filter)", size=20)
    plt.show()

In [None]:
earliest_date = np.min(df.date)
latest_date = np.max(df.date)
plotSentimentTimeseries(df, earliest_date, latest_date)
print(f"Dates range from {earliest_date} to {latest_date} !")

In [None]:
start_date = '2015-10-15'
end_date = '2016-12-16'
df_dates = getPartOfDataFrame(df, start_date=start_date, end_date=end_date)

print(f"Dates range from {start_date} to {end_date} !")

In [None]:
min_score_idx = df_dates[df_dates.sentiment == np.min(df_dates.sentiment)].index
negative_speaker_ID = list(df_dates.loc[min_score_idx].shared_ID)[0]

min_mean = np.mean(df_dates.sentiment[df.shared_ID == negative_speaker_ID])
min_std = np.std(df_dates.sentiment[df.shared_ID == negative_speaker_ID])

print(f"Sentiment associated with speaker ID with minimum (single) sentiment:\n\n {min_mean} \pm {min_std}")

df_dates[df.shared_ID == negative_speaker_ID]