In [288]:
import numpy as np
import pandas as pd
import requests

In [289]:
data_path = "./lord-of-the-rings-data/"

In [290]:
#load characters data
chars = pd.read_csv(data_path + 'lotr_characters.csv')
chars.head()

Unnamed: 0,birth,death,gender,hair,height,name,race,realm,spouse
0,,,Female,,,Adanel,Men,,Belemir
1,TA 2978,"February 26 ,3019",Male,Dark (book) Light brown (movie),,Boromir,Men,,
2,,"March ,3019",Male,,,Lagduf,Orcs,,
3,TA 280,TA 515,Male,,,Tarcil,Men,Arnor,Unnamed wife
4,,,Male,,,Fire-drake of Gondolin,Dragon,,


**Data Cleaning**

In [326]:
#map the races
race_mappings = {
    "Dragons" : "Dragon",
    "Dwarven" : "Dwarf",
    "Dwarves" : "Dwarf",
    "Eagles" : "Eagle",
    "Elves" : "Elf",
    "Hobbits" : "Hobbit",
    "Orcs" : "Orc",
    "Balrogs" : "Balrog"
}
chars['race'].replace(race_mappings, regex = True, inplace = True)
chars.head()

Unnamed: 0,birth,death,gender,hair,height,name,race,realm,spouse,birth_era,death_era
0,,,Female,,,Adanel,Men,,Belemir,,
1,TA 2978,"February 26 ,3019",Male,Dark (book) Light brown (movie),,Boromir,Men,,,TA,Fe
2,,"March ,3019",Male,,,Lagduf,Orc,,,,Ma
3,TA 280,TA 515,Male,,,Tarcil,Men,Arnor,Unnamed wife,TA,TA
4,,,Male,,,Fire-drake of Gondolin,Dragon,,,,


In [327]:
#get some stats about races with grouping
race_counts = chars.groupby('race').size().reset_index(name='size')
race_counts.sort_values('size', ascending = False).head(5)

Unnamed: 0,race,size
23,Men,388
19,Hobbit,142
8,Elf,106
6,Dwarf,44
0,Ainur,24


In [328]:
#map the gender and get some stats
gender_mappings = {
    "Males" : "Male",
    "male" : "Male",
    "Most likely male" : "Male"
}
chars['gender'].replace(gender_mappings, inplace = True)

gender_counts = chars.groupby('gender').size().reset_index(name='size')
gender_counts.sort_values('size', ascending = False)

Unnamed: 0,gender,size
1,Male,633
0,Female,135


In [329]:
#same for hair color
hair_counts = chars.groupby('hair').size().reset_index(name='size')
hair_counts.sort_values('size', ascending = False).head()

Unnamed: 0,hair,size
26,Dark,29
45,Golden,21
5,Black,12
16,Brown,6
60,,5


In [330]:
#and realm...
realm_counts = chars.groupby('realm').size().reset_index(name='size')
realm_counts.sort_values('size', ascending = False).head()

Unnamed: 0,realm,size
22,Gondor,33
47,Númenor,28
56,Rohan,20
7,Arthedain,9
4,Arnor,7


*Date for birth and death are not categorical. In order to get some insight, we are categorizing them based on the era*

In [302]:
timeline_mappings = {
    "Third Age" : "TA",
    "Second Age" : "SA",
    "First Age" : "FA",
    "Years of the Trees" : "YT",
    "Fourth Age" : "FO"
}
def birth_death(d, cname):
    d[cname].replace(timeline_mappings, regex = True, inplace = True)
    d.loc[d[cname].str.startswith("Late", na=False), cname] = d[cname].str[-2:]
    d.loc[d[cname].str.startswith("Mid", na=False), cname] = d[cname].str[-2:]
    d.loc[d[cname].str.startswith("Early", na=False), cname] = d[cname].str[-2:]
    d.loc[d[cname].str.startswith("Perhaps", na=False), cname] = d[cname].str.replace("Perhaps ,", "")
    d.loc[d[cname].str.contains("Arda", na=False), cname] = "BA"
    era = cname + '_era'
    d[era] = d[cname].str[:2]
    d.loc[d[era] == 'Un', era] = 'Unknown'
    if cname == 'death':
        d.loc[d[era] == 'Im', era] = 'Immortal'
    return d

In [331]:
#grouping data based on birth and death era to get some insight about lifespan
chars = birth_death(chars, 'birth')
chars = birth_death(chars, 'death')
birth_era_counts = chars.groupby(['birth_era', 'death_era']).size().reset_index(name='size')
birth_era_counts.sort_values('size', ascending = False).head()

Unnamed: 0,birth_era,death_era,size
62,TA,TA,211
21,FA,FA,60
39,SA,SA,41
55,TA,FO,37
10,BA,Immortal,21


**Playing & showing some facts from data**

Battle of the Pelennor Fields is at March 15, 3019. Let's see who has died in this battle.

In [319]:
chars[(chars.death.str.contains('3019', na=False)) & \
      (chars.death.str.contains('March', na=False)) & \
      (chars.death.str.contains('15', na=False))][['name', 'birth', 'death']]

Unnamed: 0,name,birth,death
59,Fastred (Pelennor Fields),TA,"March 15 ,3019"
164,Horn,"Possibly late ,TA","March 15 ,TA 3019"
222,Dúnhere,TA,"March 15 ,3019"
226,Déorwine,TA,"March 15 ,3019"
260,Hirluin,TA,"March 15 ,3019"
266,Herubrand,TA,"March 15 ,3019"
269,Herefara,TA,"March 15 ,3019"
277,Harding,TA,"March 15 ,3019"
291,Halbarad,TA 2916,"March 15 ,3019"
314,Denethor II,TA 2930,"March 15 ,3019"


Getting information about some key characters

In [325]:
chars[chars.name == 'Gandalf']

Unnamed: 0,birth,death,gender,hair,height,name,race,realm,spouse,birth_era,death_era
667,BA,"January 253019 ,Battle of the Peak immortal",Male,"Grey, later white",,Gandalf,Maiar,,,BA,Ja


In [332]:
chars[chars.name == 'Legolas']

Unnamed: 0,birth,death,gender,hair,height,name,race,realm,spouse,birth_era,death_era
661,,"Still alive, departed to ,Aman ,FO 120",Male,"Uncertain (book), Blonde (films)",,Legolas,Elf,,,,St


In [333]:
chars[chars.name == 'Gollum']

Unnamed: 0,birth,death,gender,hair,height,name,race,realm,spouse,birth_era,death_era
473,TA 2430,"March 25 ,3019",Male,,,Gollum,Hobbit,,,TA,Ma


In [339]:
chars[chars.name == 'Aragorn II Elessar']

Unnamed: 0,birth,death,gender,hair,height,name,race,realm,spouse,birth_era,death_era
873,"March 1 ,2931",FO 120,Male,Dark,"198cm (6'6"")",Aragorn II Elessar,Men,"Reunited Kingdom,Arnor,Gondor",Arwen,Ma,FO
