In [28]:
import os
import pandas as pd
import numpy as np
from dplython import (DplyFrame, X, diamonds, select, sift, sample_n, sample_frac, arrange, mutate, group_by, summarize)

import plotly.plotly
import plotly.graph_objs as go
import plotly.offline as offline
from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.figure_factory as ff
from plotly import tools

import matplotlib.pyplot as plt
%matplotlib inline
%config IPCompleter.greedy=True

out_dir = '/Users/genie/dev/projects/jupyter_lab/out'
os.chdir(out_dir)

include_plotlyjs=True

In [29]:
data_path = '/Users/genie/data/game_of_thrones/got_characters_s1_to_s7.csv'
character_df = pd.read_csv(data_path,quotechar='"',na_values='',encoding = "ISO-8859-1")

In [30]:
character_df['total_screen_time'] = character_df.apply(lambda x: sum([x['s'+str(i)+'_screenTime'] for i in range(1,8)]), axis=1)
character_df['num_of_episodes_appeared'] = character_df.apply(lambda x: sum([x['s'+str(i)+'_episodes'] for i in range(1,8)]), axis=1)
character_df['num_of_people_killed'] = character_df.apply(lambda x: sum([x['s'+str(i)+'_numKilled'] for i in range(1,8)]), axis=1)

In [31]:
character_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 368 entries, 0 to 367
Data columns (total 66 columns):
character_name                      368 non-null object
gender                              147 non-null object
house                               86 non-null object
spouse                              43 non-null object
parents                             56 non-null object
siblings                            56 non-null object
parent_of                           37 non-null object
royal                               368 non-null int64
kingsguard                          368 non-null int64
s1_episodes                         368 non-null int64
s1_screenTime                       368 non-null float64
s1_numOfCharactersInteractedWith    368 non-null int64
s1_numKilled                        368 non-null int64
s1_bc                               368 non-null float64
s1_ec                               368 non-null float64
s1_shareOfScreenTime                368 non-null float64
s2_ep

In [49]:
# chart of characters
df2 = character_df
df2 = df2[df2.house.notnull()]
df2['house'] = df2.apply(lambda x: x['house'].split(';')[0], axis=1)

# chart
categories = list(df2['house'].unique())
seasons = [1,2,3,4,5,6,7]

figure = {
    'data': [],
    'layout': {},
    'frames': []
}

figure['layout']['xaxis'] = {'range': [0, 150], 'title': 'screen time (mins)'}
figure['layout']['yaxis'] = {'range': [0, 0.6], 'tick0': 0, 'dtick':0.05, 'title': 'eigen vector centrality'}
figure['layout']['title'] = 'Game of Thrones Character Importance (Season1-7) <br><span style="font-size:x-small;width:50%;">NOTE: This is an interactive chart, please hover over individual bubbles for more detailed information. Also you may selectively enable or disable legend items to isolate individual category or more.</span>'

figure['layout']['hovermode'] = 'closest'
figure['layout']['legend'] = { 'font' : {'family':'sans-serif','size':12} }
figure['layout']['sliders'] = {
    'args': [
        'transition', {
            'duration': 400,
            'easing': 'cubic-in-out'
        }
    ],
    'initialValue': 1,
    'plotlycommand': 'animate',
    'values': seasons,
    'visible': True
}

figure['layout']['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [None, {'frame': {'duration': 500, 'redraw': False},
                         'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': '',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

# make data
season = seasons[0]
for category in categories:
#     dataset_by_year = df2[df2.fiscal_year == year]
#     dataset_by_year_and_cat = dataset_by_year[dataset_by_year.category == category]
    dataset = df2[df2.house == category]

    data_dict = {
        'x': list(dataset['s1_screenTime']),
        'y': list(dataset['s1_ec']),
        'mode': 'markers',
        'text': list(dataset['character_name']),
        'marker': {
            'sizemode': 'area',
            'sizeref': 0.1,
            'size': list(dataset['s1_screenTime'])
        },
        'name': category
    }
    figure['data'].append(data_dict)
    
# make frames
for season in seasons:
    frame = {'data': [], 'name': str(season)}
    season_prefix = 's' + str(season) + '_'
    for category in categories:
        dataset = df2[df2.house == category]

        data_dict = {
            'x': list(dataset[season_prefix+'screenTime']),
            'y': list(dataset[season_prefix+'ec']),
            'mode': 'markers',
            'text': list(dataset['character_name']),
            'marker': {
                'sizemode': 'area',
                'sizeref': 0.1,
                'size': list(dataset[season_prefix + 'screenTime'])
            },
            'name': category
        }
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [
        [season],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': 'Season' + str(season),
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]

plotly.offline.plot(figure, include_plotlyjs=include_plotlyjs, filename='got_character_importance_s1_s7.html')

'file:///Users/genie/dev/projects/jupyter_lab/out/test.html'

In [69]:
### death count by season

seasons = [1,2,3,4,5,6,7] 

x_labels = seasons
y_labels =['death count']

x_data = []
for x_season in seasons:
    count = character_df[character_df.dead_in_season=='s'+str(x_season)]['character_name'].count()
    x_data.append([count])

traces = []
for ylabel_idx,y_label in enumerate(y_labels):
    ylabel_data_points = [x_data[x_idx][ylabel_idx] for x_idx in range(len(x_data))]
    traces.append(go.Bar(x=seasons,y=ylabel_data_points,name=y_labels[ylabel_idx]))
    
data = [traces[i] for i in range(len(traces))]

title = 'Game of Thrones Character Death Count (Season1-7) <br><span style="font-size:x-small;width:50%;">NOTE: only characters with considerable screen time included in death counts, but not the army people</span>'

layout = go.Layout(barmode='stack', title=title, yaxis=dict(title='death count'))

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, include_plotlyjs=include_plotlyjs, filename='got_death_count_by_season.html')

'file:///Users/genie/dev/projects/jupyter_lab/out/test2.html'

In [90]:
## num of people killed by every character (all seasons combined)

df2 = character_df[character_df.num_of_people_killed>1][['character_name','num_of_people_killed']].sort_values(by=['num_of_people_killed'],ascending=False).head(50)

data = [go.Bar(
            x=list(df2['character_name'].values),
            y=list(df2['num_of_people_killed'].values)
    )]

layout = go.Layout(barmode='stack', title='Num of People Killed by Characters in GoT (Season1-7)', yaxis=dict(title='death count'))

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, include_plotlyjs=include_plotlyjs, filename='got_num_of_people_killed_by_every_character.html')


'file:///Users/genie/dev/projects/jupyter_lab/out/got_num_of_people_killed_by_every_character.html'

In [86]:
# common death types

df2 = DplyFrame(character_df) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
df2 = df2.sort_values(by=['total'],ascending=False).head(20)

data = [go.Bar(
            x=list(df2['manner_of_death'].values),
            y=list(df2['total'].values)
    )]

layout = go.Layout(barmode='stack', title='Most common type of death in GoT (Season1-7)', yaxis=dict(title='death count'))

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, include_plotlyjs=include_plotlyjs, filename='got_common_death_types.html')

'file:///Users/genie/dev/projects/jupyter_lab/out/got_common_death_types.html'

In [103]:
# How do they kill the most?

df2 = character_df[(character_df.killed_by.notnull()) & (character_df.killed_by.str.contains('Daenerys Targaryen')) ]
df2 = DplyFrame(df2) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
print('Daenerys Targaryen: ', list(df2['manner_of_death'].unique()))

df2 = character_df[(character_df.killed_by.notnull()) & (character_df.killed_by.str.contains('Jon Snow')) ]
df2 = DplyFrame(df2) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
print('Jon Snow: ' , list(df2['manner_of_death'].unique()))

df2 = character_df[(character_df.killed_by.notnull()) & (character_df.killed_by.str.contains('Arya Stark')) ]
df2 = DplyFrame(df2) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
print('Arya Stark: ' , list(df2['manner_of_death'].unique()))

df2 = character_df[(character_df.killed_by.notnull()) & (character_df.killed_by.str.contains('Cersei Lannister')) ]
df2 = DplyFrame(df2) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
print('Cersei Lannister: ' , list(df2['manner_of_death'].unique()))

Daenerys Targaryen:  ['Burning', 'Dragon', 'Safe']
Jon Snow:  ['Arrow', 'Burning', 'Chest stab', 'Decapitation', 'Face stab', 'Head crush']
Arya Stark:  ['Chest stab', 'Multiple stabs', 'Neck stab', 'Throat slash']
Cersei Lannister:  ['Poison', 'Wildfire']
