In [79]:
from bokeh.plotting import figure, output_file, show
from bokeh.models import Span
import pandas as pd

In [203]:
got_senti = pd.read_csv('results/got_sentiment_noon_to_3am.csv', parse_dates=["interval_start"],
                        date_parser=lambda x: pd.to_datetime(x, format="%Y-%m-%d %H:%M:%S %Z"))

got_senti['minutes_since_start'] = (got_senti['interval_start'] - got_senti.loc[0,'interval_start']).astype('timedelta64[m]')

got_senti['est'] = pd.DatetimeIndex(got_senti['interval_start']).tz_localize('UTC').tz_convert('US/Eastern')

got_senti.head()

Unnamed: 0,interval_start,entity,low_senti,avg_senti,high_senti,num_datapoints,minutes_since_start,est
0,2019-05-20 00:35:00,Daenerys,-0.4199,0.09,0.5994,2,0.0,2019-05-19 20:35:00-04:00
1,2019-05-20 00:35:00,Game of Thrones,-0.4019,-0.402,-0.4019,1,0.0,2019-05-19 20:35:00-04:00
2,2019-05-20 00:35:00,Drogon,-0.8481,-0.848,-0.8481,1,0.0,2019-05-19 20:35:00-04:00
3,2019-05-20 00:35:00,Game of Thrones,-0.4767,0.103,0.657,3,0.0,2019-05-19 20:35:00-04:00
4,2019-05-20 00:35:00,Jon Snow,0.4404,0.44,0.4404,1,0.0,2019-05-19 20:35:00-04:00


In [204]:
got_senti['entity'].value_counts()

Game of Thrones       511
Daenerys              508
Jon Snow              507
Arya                  506
Jaime                 505
Sansa                 505
Cersei                504
Drogon                504
Tyrion                504
Bran                  504
Brienne               491
Winterfell            490
Tormund               467
Bronn                 457
Gendry                437
George R.R. Martin    430
Ser Davos             428
Varys                 403
Missandei             352
Theon                 193
Gilly                 111
Cleganebowl            24
Name: entity, dtype: int64

In [205]:
entities = ['Tyrion', 'Sansa', 'Daenerys', 'Varys', 'Jon Snow', 'Arya', 'Bran',
 'Drogon', 'Bronn', 'Gendry', 'Brienne', 'Tormund', 'Gilly', 'Cersei', 'Jaime',
            'Missandei', 'George R.R. Martin', 'Game of Thrones', 'Theon', 'Ser Davos',
            'Winterfell']

#Screwed up
#'Grey Worm', 'The Hound', 'Sam','King\'s Landing', 'Cleganebowl'

In [206]:
got_senti.dtypes

interval_start                     datetime64[ns]
entity                                     object
low_senti                                 float64
avg_senti                                 float64
high_senti                                float64
num_datapoints                              int64
minutes_since_start                       float64
est                    datetime64[ns, US/Eastern]
dtype: object

In [216]:
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, CDSView, GroupFilter, FuncTickFormatter
from bokeh.plotting import figure, show, output_file
from bokeh.io import export_svgs

import colorcet as cc

import numpy as np

output_file("got_results.html")

got_palette = cc.glasbey_dark[:30]

source = ColumnDataSource(got_senti)

plot_size_and_tools = {'plot_height': 300, 'plot_width': 400,
                        'tools':['box_select', 'reset', 'help']}

p1 = figure(title="All Data", x_axis_type='datetime',
            x_range=(got_senti.loc[0,'est'], got_senti.loc[0,'est'] + pd.to_timedelta(6,'h')),
            **plot_size_and_tools)

js_formatter = FuncTickFormatter(code="""
    var utc_time = new Date(tick)
    var suffix = "AM"
    mil_time = utc_time.getHours() - 5
    if (mil_time < 0) {
        mil_time = mil_time + 12
        suffix = "PM"}
    if (mil_time == 0) {
        mil_time = 12
    }
    return mil_time + suffix;
""")

# Let's start with all the data

p1.xaxis.formatter = js_formatter
p1.xaxis.major_label_text_font_size="12pt"
p1.yaxis.major_label_text_font_size="12pt"
p1.title.text_font_size="18pt"

p1.circle(x='est', y='avg_senti', source=source, color='black', alpha=0.2, size=8)

episode_start = Span(location=pd.to_datetime("2019-05-20 02:00:00 UTC", format="%Y-%m-%d %H:%M:%S %Z"),
                      dimension='height', line_color=got_palette[-1],
                      line_dash='dashed', line_width=2)
episode_end = Span(location=pd.to_datetime("2019-05-20 03:20:00 UTC", format="%Y-%m-%d %H:%M:%S %Z"),
                      dimension='height', line_color=got_palette[-2],
                      line_dash='dashed', line_width=2)

p1.add_layout(episode_start)
p1.add_layout(episode_end)

#p1.background_fill_color = "cornsilk"
p1.toolbar.logo = None
p1.toolbar_location = None

# p1.background_fill_alpha = 0.4

# p1.output_backend = "svg"
# export_svgs(p1, "plots/overall.svg")

export_png(p1, "plots/All_Data.png",height=400, width=600)

# Now the rest
got_views = dict()
got_plots = dict()

for idx, entity in enumerate(entities):
    
    par = np.polyfit(got_senti['minutes_since_start'], got_senti['avg_senti'], 1, full=True)
    slope=par[0][0]
    intercept=par[0][1]
    got_senti['y_predicted'] = intercept + slope * got_senti['minutes_since_start']
    
    source = ColumnDataSource(got_senti)

    got_views[entity] = CDSView(source=source, filters=[GroupFilter(column_name='entity', group=entity)])

    got_plots[entity] = figure(title=f"{entity}", x_axis_type='datetime', x_range=p1.x_range, y_range=(-1,1), **plot_size_and_tools)
    
#    got_plots[entity].background_fill_color = "cornsilk"
#     got_plots[entity].background_fill_alpha = 0.4
    got_plots[entity].xaxis.formatter = js_formatter
#    got_plots[entity].ygrid.grid_line_color = "goldenrod"
    got_plots[entity].ygrid.grid_line_alpha = 0.2
    got_plots[entity].xgrid.grid_line_color = None
    got_plots[entity].xaxis.major_label_text_font_size="12pt"
    got_plots[entity].yaxis.major_label_text_font_size="12pt"
    got_plots[entity].title.text_font_size="18pt"
    got_plots[entity].toolbar.logo = None
    got_plots[entity].toolbar_location = None
    
    got_plots[entity].circle(x='est', y='avg_senti', source=source, view=got_views[entity], color=got_palette[idx], alpha=0.2, size=8)
    
    got_plots[entity].line(x='est', y='y_predicted', source=source, line_width=2, view=got_views[entity], color=got_palette[idx]) 

    episode_start = Span(location=pd.to_datetime("2019-05-20 02:00:00 UTC", format="%Y-%m-%d %H:%M:%S %Z"),
                          dimension='height', line_color=got_palette[-1],
                          line_dash='dashed', line_width=2)
    episode_end = Span(location=pd.to_datetime("2019-05-20 03:20:00 UTC", format="%Y-%m-%d %H:%M:%S %Z"),
                          dimension='height', line_color=got_palette[-2],
                          line_dash='dashed', line_width=2)
    
    got_plots[entity].add_layout(episode_start)
    got_plots[entity].add_layout(episode_end) 
    
#     got_plots[entity].output_backend = "svg"
#     export_svgs(got_plots[entity], f"plots/{entity}.svg")

    export_png(got_plots[entity], f"plots/{entity}.png", height=400, width=600)
    
# show(gridplot([[p1] + [got_plots[ent] for ent in entities[0:3]],
#                [got_plots[ent] for ent in entities[3:7]],
#                [got_plots[ent] for ent in entities[7:11]],
#                [got_plots[ent] for ent in entities[11:15]],
#                [got_plots[ent] for ent in entities[15:19]],
#                [got_plots[ent] for ent in entities[19:23]],
#                [got_plots[ent] for ent in entities[23:]],
#                ]))

### VADER Scratch area

In [169]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [170]:
analyzer = SentimentIntensityAnalyzer()

In [219]:
TEST_TEXT = "tie for some positivity! i'm grateful that: - sansa is still alive & thriving - cersei wasn't murdered by her lover - gilly is pregnant - tormund finally realized brienne doesn't like him - brienne's a knight - ghost didn't participate in dany's war - arya's having a good season"

analyzer.polarity_scores(TEST_TEXT)

{'neg': 0.142, 'neu': 0.528, 'pos': 0.331, 'compound': 0.9021}

In [None]:
-0.57