# Get All Data

In [1]:
local_dir_nasdaq = r'input_data/nasdaq'
local_dir_data_tables = 'input_data/saved_data_tables'

In [2]:
import glob
from datetime import timedelta, datetime

import numpy as np
import pandas as pd
from bokeh.io import show, output_notebook, push_notebook
from bokeh.models import Range1d, LinearAxis
from bokeh.plotting import figure
from ipywidgets import interact

flatten_data = pd.read_csv(f'{local_dir_data_tables}/flatten_data.csv', index_col=None, header=0)

flatten_data['created_utc'] = pd.to_datetime(flatten_data['created_utc'], format='%Y-%m-%d %H:%M:%S')
flatten_data['update_dt'] = pd.to_datetime(flatten_data['update_dt'], format='%Y-%m-%d %H:%M:%S')

## Group Data by Ticker and Date

In [3]:
grouped_by_ticker_and_date = flatten_data.groupby([flatten_data.created_utc.dt.strftime('%d %b %Y'), 'ticker'],
                                                  as_index=False).agg(
    {'positive': 'mean', 'neutral': 'mean', 'negative': 'mean', 'compound': 'mean', 'id': 'size', 'ticker': 'first', 'created_utc': 'first'})
grouped_by_ticker_and_date['created_utc'] = grouped_by_ticker_and_date['created_utc'].dt.strftime('%Y-%m-%d')

path_of_nasdaq_csv_files = local_dir_nasdaq
all_files = glob.glob(path_of_nasdaq_csv_files + "/*.csv")

frames = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    frames.append(df)

all_nasdaq_files_df = pd.concat(frames, axis=0, ignore_index=True)

grouped_by_ticker_and_date = grouped_by_ticker_and_date.rename(
    columns={'positive': 'average_positive', 'negative': 'average_negative', 'neutral': 'average_neutral', 'compound': 'average_compound', 'id': 'number_of_text_submissions',
             'created_utc': 'date', 'lastsale': 'end_of_day_stock_price'})

grouped_by_ticker_and_date = pd.merge(grouped_by_ticker_and_date,
                                      all_nasdaq_files_df[['symbol', 'update_dt', 'lastsale']], how='inner',
                                      left_on=['ticker', 'date'], right_on=['symbol', 'update_dt']).drop(
    columns=['update_dt', 'symbol'])

grouped_by_ticker_and_date = grouped_by_ticker_and_date.rename(
    columns={'lastsale': 'end_of_day_stock_price'})

grouped_by_ticker_and_date['end_of_day_stock_price'] = grouped_by_ticker_and_date['end_of_day_stock_price'].str.replace(
    '$', '', regex=True).astype(float)

grouped_by_ticker_and_date['date'] = pd.to_datetime(grouped_by_ticker_and_date['date'])

grouped_by_ticker_and_date = grouped_by_ticker_and_date.loc[
                             grouped_by_ticker_and_date.duplicated(subset='ticker', keep=False), :]
grouped_by_ticker_and_date.sort_values(by='date', inplace=True)
all_unique_tickers_with_at_least_one_submissions = grouped_by_ticker_and_date['ticker'].explode().unique()

## Add Function to get Ticker Data

In [4]:
def get_ticker_data(ticker):
    return grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker]['date'].tolist(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker]['average_positive'].tolist(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker]['average_negative'].tolist(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker]['average_neutral'].tolist(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker]['average_compound'].tolist(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker][
               'end_of_day_stock_price'].tolist(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker][
               ['average_positive', 'average_negative', 'average_neutral', 'average_compound']].min().min(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker][
               ['average_positive', 'average_negative', 'average_neutral', 'average_compound']].max().max(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker][
               'end_of_day_stock_price'].min(), \
           grouped_by_ticker_and_date.loc[grouped_by_ticker_and_date['ticker'] == ticker][
               'end_of_day_stock_price'].max()

## Initialize Bokeh Chart

In [5]:
p = figure(x_axis_type='datetime', x_axis_label='Date', y_axis_label='Price', plot_width=900)

date_axis, average_positive, average_negative, average_neutral, average_compound, end_of_day_prices, min_sentiment, max_sentiment, min_price, max_price = get_ticker_data(
    'TSLA')

sdate = min(date_axis)
edate = max(date_axis)
all_dates_for_stock_prices = pd.date_range(sdate,edate,freq='d').strftime('%Y-%m-%d').tolist()
if '2021-11-23' in all_dates_for_stock_prices:
    all_dates_for_stock_prices.remove('2021-11-23')
list_of_stock_prices = []
for date in all_dates_for_stock_prices:
    list_of_stock_prices.append(all_nasdaq_files_df.loc[(all_nasdaq_files_df['symbol'] == 'TSLA') & (all_nasdaq_files_df['update_dt'] == date)]['lastsale'].str.replace('$', '', regex=True).astype(float).iloc[0])

timestamps_for_stocks = []
for x in all_dates_for_stock_prices:
    timestamps_for_stocks.append(pd.Timestamp(datetime.strptime(x, '%Y-%m-%d')))

average_positive_line = p.line(date_axis,
                               average_positive,
                               legend_label='Daily\'s Average Positive Sentiment Score', line_width=2, y_range_name='sentiment',
                               color='green')
average_positive_scatter = p.circle(date_axis,
                               average_positive,
                               legend_label='Daily\'s Average Positive Sentiment Score', line_width=2, y_range_name='sentiment',
                               color='green')
average_negative_line = p.line(date_axis,
                               average_negative,
                               legend_label='Daily\'s Average Negative Sentiment Score', line_width=2, y_range_name='sentiment',
                               color='red')
average_negative_scatter = p.circle(date_axis,
                               average_negative,
                               legend_label='Daily\'s Average Negative Sentiment Score', line_width=2, y_range_name='sentiment',
                               color='red')
average_neutral_line = p.line(date_axis,
                               average_neutral,
                               legend_label='Daily\'s Average Neutral Sentiment Score', line_width=2, y_range_name='sentiment',
                               color='gray')
average_neutral_scatter = p.circle(date_axis,
                               average_neutral,
                               legend_label='Daily\'s Average Neutral Sentiment Score', line_width=2, y_range_name='sentiment',
                               color='gray')
average_compound_line = p.line(date_axis,
                               average_compound,
                               legend_label='Daily\'s Average Compound Sentiment Score', line_width=2, y_range_name='sentiment',
                               color='yellow')
average_compound_scatter = p.circle(date_axis,
                               average_compound,
                               legend_label='Daily\'s Average Compound Sentiment Score', line_width=2, y_range_name='sentiment',
                               color='yellow')
price_line = p.line(timestamps_for_stocks,
                    list_of_stock_prices,
                    legend_label='Stock Price', line_width=2, color='blue')
price_scatter = p.circle(timestamps_for_stocks,
                    list_of_stock_prices,
                    legend_label='Stock Price', line_width=2, color='blue')

p.extra_y_ranges['sentiment'] = Range1d(
    start=min_sentiment,
    end=max_sentiment)

p.legend.location = "top_left"
p.legend.click_policy = "hide"

p.add_layout(LinearAxis(y_range_name="sentiment", axis_label='Sentiment Percent'), 'right')

## Add Update Function for Chart Based on Selection and Enable Interaction

In [6]:
def update(ticker='TSLA'):
    date_axis, average_positive, average_negative, average_neutral, average_compound, end_of_day_prices, min_sentiment, max_sentiment, min_price, max_price = get_ticker_data(
        ticker)
    sdate = min(date_axis)
    edate = max(date_axis)
    all_dates_for_stock_prices = pd.date_range(sdate,edate,freq='d').strftime('%Y-%m-%d').tolist()
    if '2021-11-23' in all_dates_for_stock_prices:
        all_dates_for_stock_prices.remove('2021-11-23')
    list_of_stock_prices = []
    for date in all_dates_for_stock_prices:
        list_of_stock_prices.append(all_nasdaq_files_df.loc[(all_nasdaq_files_df['symbol'] == ticker) & (all_nasdaq_files_df['update_dt'] == date)]['lastsale'].str.replace('$', '', regex=True).astype(float).iloc[0])

    timestamps_for_stocks = []
    for x in all_dates_for_stock_prices:
        timestamps_for_stocks.append(pd.Timestamp(datetime.strptime(x, '%Y-%m-%d')))

    p.extra_y_ranges['sentiment'].start = min_sentiment
    p.extra_y_ranges['sentiment'].end = max_sentiment
    average_positive_line.data_source.data = {'x': list(np.zeros(len(average_positive))), 'y': list(np.ones(len(average_positive)))}
    average_positive_line.data_source.data['x'] = date_axis
    average_positive_line.data_source.data['y'] = average_positive
    average_positive_scatter.data_source.data = {'x': list(np.zeros(len(average_positive))), 'y': list(np.ones(len(average_positive)))}
    average_positive_scatter.data_source.data['x'] = date_axis
    average_positive_scatter.data_source.data['y'] = average_positive
    average_negative_line.data_source.data = {'x': list(np.zeros(len(average_negative))), 'y': list(np.ones(len(average_negative)))}
    average_negative_line.data_source.data['y'] = average_negative
    average_negative_line.data_source.data['x'] = date_axis
    average_negative_scatter.data_source.data = {'x': list(np.zeros(len(average_negative))), 'y': list(np.ones(len(average_negative)))}
    average_negative_scatter.data_source.data['y'] = average_negative
    average_negative_scatter.data_source.data['x'] = date_axis
    average_neutral_line.data_source.data = {'x': list(np.zeros(len(average_neutral))), 'y': list(np.ones(len(average_neutral)))}
    average_neutral_line.data_source.data['y'] = average_neutral
    average_neutral_line.data_source.data['x'] = date_axis
    average_neutral_scatter.data_source.data = {'x': list(np.zeros(len(average_neutral))), 'y': list(np.ones(len(average_neutral)))}
    average_neutral_scatter.data_source.data['y'] = average_neutral
    average_neutral_scatter.data_source.data['x'] = date_axis
    average_compound_line.data_source.data = {'x': list(np.zeros(len(average_neutral))), 'y': list(np.ones(len(average_neutral)))}
    average_compound_line.data_source.data['y'] = average_compound
    average_compound_line.data_source.data['x'] = date_axis
    average_compound_scatter.data_source.data = {'x': list(np.zeros(len(average_compound))), 'y': list(np.ones(len(average_compound)))}
    average_compound_scatter.data_source.data['y'] = average_compound
    average_compound_scatter.data_source.data['x'] = date_axis
    price_line.data_source.data = {'x': list(np.zeros(len(list_of_stock_prices))), 'y': list(np.ones(len(list_of_stock_prices)))}
    price_line.data_source.data['y'] = list_of_stock_prices
    price_line.data_source.data['x'] = timestamps_for_stocks
    price_scatter.data_source.data = {'x': list(np.zeros(len(list_of_stock_prices))), 'y': list(np.ones(len(list_of_stock_prices)))}
    price_scatter.data_source.data['y'] = list_of_stock_prices
    price_scatter.data_source.data['x'] = timestamps_for_stocks
    push_notebook()

## Show Chart for Stock Price, Average Daily Positive Sentiment Score, Average Daily Neutral Sentiment Score, and Average Daily Negative Sentiment Score for all Text Submissions

## Can Move Chart and Click on Legend to Hide/Show Line(s)

In [7]:
output_notebook()
show(p, notebook_handle=True)
interact(update, ticker=all_unique_tickers_with_at_least_one_submissions)

interactive(children=(Dropdown(description='ticker', index=26, options=('IP', 'PLUG', 'PLTR', 'PATH', 'OI', 'O…

<function __main__.update(ticker='TSLA')>