In [1]:
import pandas as pd
import numpy as np

# Load data

In [2]:
outdir = f'output/backup4'

In [3]:
author_data = pd.read_csv(f'{outdir}/author_data.csv', index_col=None, header=0)
comment_data = pd.read_csv(f'{outdir}/comment_data.csv', index_col=None, header=0)
submission_data = pd.read_csv(f'{outdir}/submission_data.csv', index_col=None, header=0)
text_data = pd.read_csv(f'{outdir}/text_data.csv', index_col=None, header=0)
sentiment_data = pd.read_csv(f'{outdir}/sentiment_data.csv', index_col=None, header=0)
ticker_data = pd.read_csv(f'{outdir}/ticker_data.csv', index_col=None, header=0)
submission_comment_data = pd.read_csv(f'{outdir}/submission_comment_data.csv', index_col=None, header=0)
flatten_data = pd.read_csv(f'{outdir}/flatten_data.csv', index_col=None, header=0)



author_data['created_utc'] = pd.to_datetime(author_data['created_utc'], format='%Y-%m-%d %H:%M:%S')
author_data['update_dt'] = pd.to_datetime(author_data['update_dt'], format='%Y-%m-%d %H:%M:%S')

comment_data['created_utc'] = pd.to_datetime(comment_data['created_utc'], format='%Y-%m-%d %H:%M:%S')
comment_data['update_dt'] = pd.to_datetime(comment_data['update_dt'], format='%Y-%m-%d %H:%M:%S')

submission_data['created_utc'] = pd.to_datetime(submission_data['created_utc'], format='%Y-%m-%d %H:%M:%S')
submission_data['update_dt'] = pd.to_datetime(submission_data['update_dt'], format='%Y-%m-%d %H:%M:%S')

text_data['update_dt'] = pd.to_datetime(text_data['update_dt'], format='%Y-%m-%d %H:%M:%S')

submission_comment_data['created_utc'] = pd.to_datetime(submission_comment_data['created_utc'], format='%Y-%m-%d %H:%M:%S')
submission_comment_data['update_dt'] = pd.to_datetime(submission_comment_data['update_dt'], format='%Y-%m-%d %H:%M:%S')

flatten_data['created_utc'] = pd.to_datetime(flatten_data['created_utc'], format='%Y-%m-%d %H:%M:%S')
flatten_data['update_dt'] = pd.to_datetime(flatten_data['update_dt'], format='%Y-%m-%d %H:%M:%S')

sentiment_price_data = pd.read_csv(f'{outdir}/sentiment_price_data.csv', index_col=None, header=0)
sentiment_price_data['Date'] = pd.to_datetime(sentiment_price_data['Date'], format='%Y-%m-%d %H:%M:%S')
sentiment_price_data['Date'] = sentiment_price_data['Date'].dt.tz_convert('America/New_York')



In [4]:
reddit_tickers = list(np.unique(sentiment_price_data['Ticker']))
print(len(reddit_tickers))

1098


# Built the plot

In [5]:
from ipywidgets import interact, interactive
import ipywidgets as widgets
import numpy as np

from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, FactorRange

output_notebook()

In [6]:
def get_dataset_ticker_sentiment(src, ticker = 'NVDA'):
    
    if ticker in reddit_tickers:
        print('found ticker')
        filtered_data = src[src['Ticker'] == ticker]    

        dates = np.array(filtered_data['Date'])
        source = ColumnDataSource(data=dict(date=dates, 
                                            close=filtered_data['Close'], 
                                            negative = filtered_data['sent_negative'],
                                            neutral = filtered_data['sent_neutral'],
                                            positive = filtered_data['sent_positive'],
                                            mentions = filtered_data['mentions']))
        return source
    else:
        print('ticker not found!')
        return


In [7]:
import numpy as np
from datetime import timedelta

from bokeh.layouts import column
from bokeh.models import ColumnDataSource, RangeTool
from bokeh.plotting import figure, show
# from bokeh.sampledata.stocks import AAPL
from bokeh.models import LinearAxis, Range1d

def make_ticker_plot(source):
#     filtered_data = df_sentiment_price[df_sentiment_price['Ticker'] == 'LCID']
    
    data = source.data
    
    sentiments = ['positive', 'negative', 'neutral']
    colors = ["#718dbf", "#e84d60", "#c9d9d3"]
#     dates = np.array(filtered_data['Date'], dtype=np.datetime64)
#     source = ColumnDataSource(data=dict(date=dates, close=filtered_data['Close']))

    min_dates = np.min(data['date'])
    max_dates = np.max(data['date'])
    
    min_price = np.min(data['close'])
    max_price = np.max(data['close'])
    
    min_mentions = np.min(data['mentions'])
    max_mentions = np.max(data['mentions'])
                       
    p = figure(height=300, width=800, tools="xpan", toolbar_location=None,
               x_axis_type="datetime", x_axis_location="above",
               background_fill_color="#efefef", x_range=(min_dates, max_dates), y_range = (min_price, max_price) )

    p.yaxis.axis_label = 'Price'
    p.line('date', 'close', source=source)
    
#     fig = figure(title='Twin Axis Example', y_range = (-1.1, 1.1))
    print(max_mentions)
    p.extra_y_ranges = {"y2": Range1d(start = 0, end = max_mentions)}
    p.add_layout(LinearAxis(y_range_name = "y2", axis_label="Mentions"), 'right')
 
    
    p.vbar_stack(sentiments, 
                 x='date', 
                 width=timedelta(hours=1), #100,
                 color=colors, 
                 source=source, 
                 legend_label=sentiments, 
                 y_range_name = "y2")
    
    p.legend.location = "top_left"
    p.legend.orientation = "horizontal"
    
    

    select = figure(title="Drag the middle and edges of the selection box to change the range above",
                    height=130, width=800, y_range=p.y_range,
                    x_axis_type="datetime", y_axis_type=None,
                    tools="", toolbar_location=None, background_fill_color="#efefef")

    range_tool = RangeTool(x_range=p.x_range)
    range_tool.overlay.fill_color = "navy"
    range_tool.overlay.fill_alpha = 0.2

    select.line('date', 'close', source=source)
    select.ygrid.grid_line_color = None
    select.add_tools(range_tool)
    select.toolbar.active_multi = range_tool
    
    plot = column(p, select)
    
#     show(column(p, select))

    return plot


In [8]:
def update_plot(ticker = 'NVDA'):
    src = get_dataset_ticker_sentiment(sentiment_price_data, ticker)
    
    data = src.data
    
    min_price = np.min(data['close'])
    max_price = np.max(data['close'])
    
    min_mentions = np.min(data['mentions'])
    max_mentions = np.max(data['mentions'])
    
    print(max_mentions)
    
    f = plot2.children[0]

    
    f.y_range.start = min_price 
    f.y_range.end   = max_price  
    
    f.extra_y_ranges['y2'].start = 0 #new secondary axis min
    f.extra_y_ranges['y2'].end = max_mentions #new secondary axis max
#     tickers = data['ticker']
    
#     plot.x_range.factors = tickers
    source2.data.update(src.data)
    push_notebook()

In [9]:
from ipywidgets import Layout, Button, Box, FloatText, Textarea, Dropdown, Label, IntSlider, Combobox

form_item_layout = Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)

r_t = Combobox(options=reddit_tickers)

form_items = [
    Box([Label(value='Ticker'), r_t], layout=form_item_layout)
]

form = Box(form_items, layout=Layout(
    display='flex',
    flex_flow='column',
    border='solid 2px',
    align_items='stretch',
    width='50%'
))



In [10]:
source2 = get_dataset_ticker_sentiment(sentiment_price_data)

plot2 = make_ticker_plot(source2)

out = widgets.interactive_output(update_plot, {'ticker': r_t})

show(plot2, notebook_handle=True)
display(form, out)

found ticker
116.0


Box(children=(Box(children=(Label(value='Ticker'), Combobox(value='', options=('AA', 'AAL', 'AAON', 'AAP', 'AAâ€¦

Output()