In [1]:
# data source: https://www.kaggle.com/datasets/mylesoneill/game-of-thrones?select=character-deaths.csvb

In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
import plotly as py
import plotly.graph_objs as go
# from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [4]:
# read in dataset
# I'm using a data set of all the deaths from A Song of Ice and Fire
GoT = pd.read_csv("data/asoiaf_book_deaths.csv")


In [5]:
# info on data set
print(GoT.columns)
print(GoT.shape)
pd.DataFrame(GoT.dtypes, columns=["DataTypes"])

Index(['Name', 'Allegiances', 'Death Year', 'Book of Death', 'Death Chapter',
       'Book Intro Chapter', 'Gender', 'Nobility', 'GoT', 'CoK', 'SoS', 'FfC',
       'DwD'],
      dtype='object')
(917, 13)


Unnamed: 0,DataTypes
Name,object
Allegiances,object
Death Year,float64
Book of Death,float64
Death Chapter,float64
Book Intro Chapter,float64
Gender,int64
Nobility,int64
GoT,int64
CoK,int64


In [6]:
# pull in first 5 rows
GoT[GoT["Name"]=="Jon Snow"]

Unnamed: 0,Name,Allegiances,Death Year,Book of Death,Death Chapter,Book Intro Chapter,Gender,Nobility,GoT,CoK,SoS,FfC,DwD
409,Jon Snow,Night's Watch,,,,1.0,1,1,1,1,1,1,1


In [7]:
# B. Which column has the most missing data? Sort the columns by sum of null values
GoT.isnull().sum().sort_values(ascending=False)

Death Chapter         618
Death Year            612
Book of Death         610
Book Intro Chapter     12
Name                    0
Allegiances             0
Gender                  0
Nobility                0
GoT                     0
CoK                     0
SoS                     0
FfC                     0
DwD                     0
dtype: int64

In [8]:
# C. Looks like Death Chapter is our biggest problem, along with Death Year, and Book of Death
# Use a combination of filtering, isnull, and sum to count how many rows are missing all 3 columns
GoT[GoT['Death Chapter'].isnull()]['Death Year'].isnull().sum()

608

In [9]:
## let's make all null values equal to 666 --- these are characters that have not died
GoT['Death Chapter'].fillna(666, inplace=True)
GoT['Death Year'].fillna(666, inplace=True)
GoT['Book of Death'].fillna(666, inplace=True)
GoT['Book Intro Chapter'].fillna(666, inplace=True)
GoT.isnull().sum()

Name                  0
Allegiances           0
Death Year            0
Book of Death         0
Death Chapter         0
Book Intro Chapter    0
Gender                0
Nobility              0
GoT                   0
CoK                   0
SoS                   0
FfC                   0
DwD                   0
dtype: int64

In [10]:
## remove Null/666's values -- these characters are still alive
GoT_dead = GoT[GoT['Death Chapter'] != 666]

In [11]:
# stats on each column
GoT_dead.describe()

Unnamed: 0,Death Year,Book of Death,Death Chapter,Book Intro Chapter,Gender,Nobility,GoT,CoK,SoS,FfC,DwD
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,304.043478,7.334448,40.070234,35.257525,0.889632,0.364548,0.344482,0.424749,0.421405,0.100334,0.167224
std,42.224251,54.157568,20.47027,66.618446,0.313873,0.48211,0.475996,0.495134,0.494612,0.300949,0.373801
min,297.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,299.0,2.0,25.5,11.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,299.0,3.0,39.0,29.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,300.0,4.0,57.0,43.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0
max,666.0,666.0,80.0,666.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [12]:
# print all Allegiances
print(GoT_dead["Allegiances"].unique())

['None' 'House Greyjoy' "Night's Watch" 'House Stark' 'Baratheon'
 'House Lannister' 'Lannister' 'Stark' 'Greyjoy' 'Wildling' 'House Tully'
 'House Targaryen' 'Targaryen' 'Arryn' 'House Baratheon' 'House Arryn'
 'House Tyrell' 'Tully' 'House Martell' 'Martell' 'Tyrell']


In [13]:
# deaths by Allegiance 
GoT_dead["Allegiances"].value_counts()

None               73
Night's Watch      56
Stark              27
Wildling           23
House Stark        19
Baratheon          18
Lannister          18
House Greyjoy      14
House Lannister    11
Greyjoy             8
Targaryen           5
House Baratheon     5
House Tully         5
House Targaryen     4
Tully               4
Arryn               3
Martell             2
House Arryn         1
House Tyrell        1
House Martell       1
Tyrell              1
Name: Allegiances, dtype: int64

In [18]:
# which allegiance has the most deaths in the first 10 chapters? 
first_ten = GoT_dead[GoT_dead["Book Intro Chapter"] <= 10]
first_ten.head()
#df = first_ten.groupby('Allegiances')['Death count'].sum()
df2 = first_ten.groupby('Allegiances').count().reset_index()

deaths_by_alliance = df2[["Allegiances","Death count"]]
#deaths_by_alliance.sort_values("Death count", ascending=False)

Unnamed: 0,Allegiances,Name,Death Year,Book of Death,Death Chapter,Book Intro Chapter,Gender,Nobility,GoT,CoK,SoS,FfC,DwD
0,Baratheon,8,8,8,8,8,8,8,8,8,8,8,8
1,Greyjoy,1,1,1,1,1,1,1,1,1,1,1,1
2,House Baratheon,3,3,3,3,3,3,3,3,3,3,3,3
3,House Lannister,2,2,2,2,2,2,2,2,2,2,2,2
4,House Stark,9,9,9,9,9,9,9,9,9,9,9,9
5,House Targaryen,1,1,1,1,1,1,1,1,1,1,1,1
6,Lannister,3,3,3,3,3,3,3,3,3,3,3,3
7,Martell,2,2,2,2,2,2,2,2,2,2,2,2
8,Night's Watch,19,19,19,19,19,19,19,19,19,19,19,19
9,,9,9,9,9,9,9,9,9,9,9,9,9


In [None]:
# data objects
go.Bar
# figure objects
#go.Format
go.Figure


In [None]:
deaths_by_alliance

In [1]:

plot = deaths_by_alliance.set_index("Allegiances").sort_values("Death count", ascending=False).plot(kind = 'bar',color="black");
plot.set_title("Deaths by allegiance in first 10 chapters of ASOIAF")
plot.set_ylabel("# of deaths")
plot

NameError: name 'deaths_by_alliance' is not defined