In [1]:
import os
import pandas as pd
import numpy as np
from dplython import (DplyFrame, X, diamonds, select, sift, sample_n, sample_frac, arrange, mutate, group_by, summarize)

import plotly.plotly
import plotly.graph_objs as go
import plotly.offline as offline
from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.figure_factory as ff
from plotly import tools

import matplotlib.pyplot as plt
%matplotlib inline
%config IPCompleter.greedy=True

out_dir = '/Users/genie/dev/projects/github/got_survival_analysis/out'
os.chdir(out_dir)

include_plotlyjs=False

  from collections import Callable

can't resolve package from __spec__ or __package__, falling back on __name__ and __path__



In [2]:
data_path = '/Users/genie/dev/projects/github/got_survival_analysis/data/got_characters_s1_to_s7.csv'
character_df = pd.read_csv(data_path,quotechar='"',na_values='',encoding = "ISO-8859-1")

In [3]:
character_df['total_screen_time'] = character_df.apply(lambda x: sum([x['s'+str(i)+'_screenTime'] for i in range(1,8)]), axis=1)
character_df['num_of_episodes_appeared'] = character_df.apply(lambda x: sum([x['s'+str(i)+'_episodes'] for i in range(1,8)]), axis=1)
character_df['num_of_people_killed'] = character_df.apply(lambda x: sum([x['s'+str(i)+'_numKilled'] for i in range(1,8)]), axis=1)

In [4]:
character_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 368 entries, 0 to 367
Data columns (total 71 columns):
character_name                      368 non-null object
gender                              337 non-null object
house                               86 non-null object
spouse                              43 non-null object
parents                             56 non-null object
siblings                            56 non-null object
parent_of                           37 non-null object
royal                               368 non-null int64
kingsguard                          368 non-null int64
guardian_of                         12 non-null object
guarded_by                          9 non-null object
served_by                           4 non-null object
serves                              14 non-null object
allies                              5 non-null object
s1_episodes                         368 non-null int64
s1_screenTime                       368 non-null float64
s1_numOfCharac

In [5]:
# chart of characters by screen time and influence
df2 = character_df
df2 = df2[df2.house.notnull()]
df2['house'] = df2.apply(lambda x: x['house'].split(';')[0], axis=1)

# chart
categories = list(df2['house'].unique())
seasons = [1,2,3,4,5,6,7]

figure = {
    'data': [],
    'layout': {},
    'frames': []
}

figure['layout']['xaxis'] = {'range': [0, 150], 'title': 'screen time (mins)'}
figure['layout']['yaxis'] = {'range': [0, 0.6], 'tick0': 0, 'dtick':0.05, 'title': 'eigen vector centrality'}
figure['layout']['title'] = 'Chart-1: Game of Thrones Character Importance (Season1-7) <br><span style="font-size:x-small;width:50%;">NOTE: This is an interactive chart, please hover over individual bubbles for more detailed information. Also you may selectively enable or disable legend items to isolate individual category or more.</span>'

figure['layout']['hovermode'] = 'closest'
figure['layout']['legend'] = { 'font' : {'family':'sans-serif','size':12} }
figure['layout']['sliders'] = {
    'args': [
        'transition', {
            'duration': 400,
            'easing': 'cubic-in-out'
        }
    ],
    'initialValue': 1,
    'plotlycommand': 'animate',
    'values': seasons,
    'visible': True
}

figure['layout']['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [None, {'frame': {'duration': 500, 'redraw': False},
                         'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': '',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

# make data
season = seasons[0]
for category in categories:
#     dataset_by_year = df2[df2.fiscal_year == year]
#     dataset_by_year_and_cat = dataset_by_year[dataset_by_year.category == category]
    dataset = df2[df2.house == category]

    data_dict = {
        'x': list(dataset['s1_screenTime']),
        'y': list(dataset['s1_ec']),
        'mode': 'markers',
        'text': list(dataset['character_name']),
        'marker': {
            'sizemode': 'area',
            'sizeref': 0.1,
            'size': list(dataset['s1_screenTime'])
        },
        'name': category
    }
    figure['data'].append(data_dict)


# make frames
for season in seasons:
    frame = {'data': [], 'name': str(season)}
    season_prefix = 's' + str(season) + '_'
    for category in categories:
        dataset = df2[df2.house == category]

        data_dict = {
            'x': list(dataset[season_prefix+'screenTime']),
            'y': list(dataset[season_prefix+'ec']),
            'mode': 'markers',
            'text': list(dataset['character_name']),
            'marker': {
                'sizemode': 'area',
                'sizeref': 0.1,
                'size': list(dataset[season_prefix + 'screenTime'])
            },
            'name': category
        }
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [
        [season],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': 'Season' + str(season),
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]

plotly.offline.plot(figure, include_plotlyjs=include_plotlyjs, filename='chart1.html')

'file:///Users/genie/dev/projects/github/got_survival_analysis/out/chart1.html'

In [9]:
## chart of overall screen time by characters

df2 = character_df[['character_name','total_screen_time','s1_screenTime','s2_screenTime','s3_screenTime','s4_screenTime','s5_screenTime','s6_screenTime','s7_screenTime']].sort_values(by=['total_screen_time'], ascending=False).head(30)

traces = []
for i in range(1,8):
    s_prefix = 's' + str(i) + '_'
    traces.append(
        go.Bar(
            x = list(df2['character_name'].values),
            y = list(df2[s_prefix + 'screenTime'].values),
            name = 'Season ' + str(i)
        )
    )

data = [traces[i] for i in range(len(traces))]
title = 'Chart-2: Game of Thrones Character Overall Screen Times (Season1-7) ' + \
'<br><span style="font-size:x-small;width:50%;">NOTE: click on individual legend items to the right to selectively enable or disable a color group</span>'
layout = go.Layout(barmode='stack', title = title, yaxis=dict(title='screen time (mins)'))

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, include_plotlyjs=include_plotlyjs, filename='chart2.html')



'file:///Users/genie/dev/projects/github/got_survival_analysis/out/chart2.html'

In [7]:
df1 = character_df[['character_name','total_screen_time']]
df1['total_screen_time_in_hrs'] = df1.apply(lambda x: round(x['total_screen_time']/60,2), axis=1)
df1.sort_values(by=['total_screen_time'], ascending=False).head(10)

Unnamed: 0,character_name,total_screen_time,total_screen_time_in_hrs
330,Tyrion Lannister,549.29,9.15
119,Jon Snow,547.56,9.13
44,Daenerys Targaryen,420.02,7.0
38,Cersei Lannister,399.53,6.66
285,Sansa Stark,362.74,6.05
15,Arya Stark,332.67,5.54
109,Jaime Lannister,329.04,5.48
121,Jorah Mormont,298.53,4.98
317,Theon Greyjoy,246.46,4.11
47,Davos Seaworth,241.75,4.03


In [8]:
### death count by season

seasons = [1,2,3,4,5,6,7] 

x_labels = seasons
y_labels =['death count']

x_data = []
for x_season in seasons:
    count = character_df[character_df.dead_in_season=='s'+str(x_season)]['character_name'].count()
    x_data.append([count])

traces = []
for ylabel_idx,y_label in enumerate(y_labels):
    ylabel_data_points = [x_data[x_idx][ylabel_idx] for x_idx in range(len(x_data))]
    traces.append(go.Bar(x=seasons,y=ylabel_data_points,name=y_labels[ylabel_idx]))
    
data = [traces[i] for i in range(len(traces))]

title = 'Chart-3: Game of Thrones Character Death Count (Season1-7) <br><span style="font-size:x-small;width:50%;">NOTE: only characters with considerable screen time included in death counts, but not the army people</span>'

layout = go.Layout(barmode='stack', title=title, yaxis=dict(title='death count'))

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, include_plotlyjs=include_plotlyjs, filename='chart3.html')

'file:///Users/genie/dev/projects/github/got_survival_analysis/out/chart3.html'

In [9]:
## num of people killed by every character (all seasons combined)

df2 = character_df[character_df.num_of_people_killed>1][['character_name','num_of_people_killed']].sort_values(by=['num_of_people_killed'],ascending=False).head(50)

data = [go.Bar(
            x=list(df2['character_name'].values),
            y=list(df2['num_of_people_killed'].values)
    )]

layout = go.Layout(barmode='stack', title='Chart-4: Num of People Killed by Characters in GoT (Season1-7)', yaxis=dict(title='death count'))

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, include_plotlyjs=include_plotlyjs, filename='chart4.html')


'file:///Users/genie/dev/projects/github/got_survival_analysis/out/chart4.html'

In [10]:
# common death types

df2 = DplyFrame(character_df) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
df2 = df2.sort_values(by=['total'],ascending=False).head(20)

data = [go.Bar(
            x=list(df2['manner_of_death'].values),
            y=list(df2['total'].values)
    )]

layout = go.Layout(barmode='stack', title='Chart-5: Most common type of death in GoT (Season1-7)', yaxis=dict(title='death count'))

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, include_plotlyjs=include_plotlyjs, filename='chart5.html')

'file:///Users/genie/dev/projects/github/got_survival_analysis/out/chart5.html'

In [11]:
# How do they kill the most?

df2 = character_df[(character_df.killed_by.notnull()) & (character_df.killed_by.str.contains('Daenerys Targaryen')) ]
df2 = DplyFrame(df2) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
print('Daenerys Targaryen: ', list(df2['manner_of_death'].unique()))

df2 = character_df[(character_df.killed_by.notnull()) & (character_df.killed_by.str.contains('Jon Snow')) ]
df2 = DplyFrame(df2) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
print('Jon Snow: ' , list(df2['manner_of_death'].unique()))

df2 = character_df[(character_df.killed_by.notnull()) & (character_df.killed_by.str.contains('Arya Stark')) ]
df2 = DplyFrame(df2) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
print('Arya Stark: ' , list(df2['manner_of_death'].unique()))

df2 = character_df[(character_df.killed_by.notnull()) & (character_df.killed_by.str.contains('Cersei Lannister')) ]
df2 = DplyFrame(df2) >> sift(X.manner_of_death.notnull()) >> group_by(X.manner_of_death) >> summarize(total=X.character_name.count())
print('Cersei Lannister: ' , list(df2['manner_of_death'].unique()))

Daenerys Targaryen:  ['Burning', 'Dragon', 'Safe']
Jon Snow:  ['Arrow', 'Burning', 'Chest stab', 'Decapitation', 'Face stab', 'Head crush']
Arya Stark:  ['Chest stab', 'Multiple stabs', 'Neck stab', 'Throat slash']
Cersei Lannister:  ['Poison', 'Wildfire']


In [36]:
df1 = DplyFrame(character_df) >> sift(X.gender.notnull(),X.is_dead==1) >> group_by(X.gender) >> summarize(total_killed=X.character_name.count())
df1_x = DplyFrame(character_df) >> sift(X.gender.notnull()) >> group_by(X.gender) >> summarize(total=X.character_name.count())
df1 = pd.merge(df1, df1_x, how='left', on=['gender'])
del df1_x
df1['death_pct'] = df1.apply(lambda x: round((x['total_killed']/x['total'])*100,2) , axis=1)
df1 = df1.drop(['total','total_killed'], axis=1)
# df1.head()

df2 = character_df
df2['house'] = df2['house'].fillna('')
df2['house'] = df2.apply(lambda x: None if x['house']=='' else list(x['house'].split(';'))[0], axis=1)
df2_x = DplyFrame(df2) >> sift(X.house.notnull(),X.is_dead==1) >> group_by(X.house) >> summarize(total_killed=X.character_name.count())
df2_y = DplyFrame(df2) >> sift(X.house.notnull()) >> group_by(X.house) >> summarize(total=X.character_name.count())

df2_x = pd.merge(df2_x, df2_y, how='left', on=['house'])
df2_x['death_pct'] = df2_x.apply(lambda x: round((x['total_killed']/x['total'])*100,2) , axis=1)
df2_x = df2_x.drop(['total','total_killed'], axis=1)
# df2_x.head(20)
df3 = DplyFrame(character_df) >> sift(X.royal.notnull(),X.is_dead==1) >> group_by(X.royal) >> summarize(total_killed=X.character_name.count())
df3_x = DplyFrame(character_df) >> sift(X.royal.notnull()) >> group_by(X.royal) >> summarize(total=X.character_name.count())
df3 = pd.merge(df3, df3_x, how='left', on=['royal'])
del df3_x
df3['death_pct'] = df3.apply(lambda x: round((x['total_killed']/x['total'])*100,2) , axis=1)
df3 = df3.drop(['total','total_killed'], axis=1)
df3['royal'] = df3.apply(lambda x: 'Royal' if x['royal']==1 else 'Not-Royal', axis=1)
# df3.head()

df4 = DplyFrame(character_df) >> sift(X.kingsguard.notnull(),X.is_dead==1) >> group_by(X.kingsguard) >> summarize(total_killed=X.character_name.count())
df4_x = DplyFrame(character_df) >> sift(X.kingsguard.notnull()) >> group_by(X.kingsguard) >> summarize(total=X.character_name.count())

df4 = pd.merge(df4, df4_x, how='left', on=['kingsguard'])
del df4_x
df4['death_pct'] = df4.apply(lambda x: round((x['total_killed']/x['total'])*100,2) , axis=1)
df4 = df4.drop(['total','total_killed'], axis=1)
df4['kingsguard'] = df4.apply(lambda x: 'Kingsguard' if x['kingsguard']==1 else 'Not-Kingsguard', axis=1)
# df4.head()


In [40]:
trace1 = go.Bar(x=df1['gender'],y=df1['death_pct'])
trace2 = go.Bar(x=df2_x['house'],y=df2_x['death_pct'])
trace3 = go.Bar(x=df3['royal'], y=df3['death_pct'])
trace4 = go.Bar(x=df4['kingsguard'], y=df4['death_pct'])

fig = tools.make_subplots(rows=2, cols=2, subplot_titles=('By Gender', 'By House','By being Royal', 'By being Kingsguard'))

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.append_trace(trace3, 2, 1)
fig.append_trace(trace4, 2, 2)

fig['layout'].update(height=600, width=800, title='Chart-6: Death Percentage by various groups', showlegend=False)
plotly.offline.plot(fig, include_plotlyjs=include_plotlyjs, filename='chart6.html')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]



'file:///Users/genie/dev/projects/github/got_survival_analysis/out/chart6.html'

In [12]:

# from plotly import tools
# import plotly.plotly as py
# import plotly.graph_objs as go

# trace1 = go.Scatter(x=[1, 2, 3], y=[4, 5, 6])
# trace2 = go.Scatter(x=[20, 30, 40], y=[50, 60, 70])
# trace3 = go.Scatter(x=[300, 400, 500], y=[600, 700, 800])
# trace4 = go.Scatter(x=[4000, 5000, 6000], y=[7000, 8000, 9000])

# fig = tools.make_subplots(rows=2, cols=2, subplot_titles=('Plot 1', 'Plot 2',
#                                                           'Plot 3', 'Plot 4'))

# fig.append_trace(trace1, 1, 1)
# fig.append_trace(trace2, 1, 2)
# fig.append_trace(trace3, 2, 1)
# fig.append_trace(trace4, 2, 2)

# fig['layout'].update(height=600, width=600, title='Multiple Subplots' +
#                                                   ' with Titles')

# py.iplot(fig, filename='make-subplots-multiple-with-titles')