# Simpsons - Soluciones

In [1]:
import pandas as pd

## Leemos todos los ficheros y nos aseguramos de que están bien leídos

In [2]:
scripts = pd.read_csv('scripts.csv', sep=';')
characters = pd.read_csv('characters.csv', sep=';')
locations = pd.read_csv('locations.csv', sep=';')
episodes = pd.read_csv('episodes.csv', sep=';')

## ¿Cuál es la temporada con más episodios?

In [3]:
episodes.groupby('season').size().sort_values(ascending=False).head(5)

season
6    25
7    25
8    25
9    25
3    24
dtype: int64

## ¿Cual es la primera frase de la serie? ¿Quién la dice?

In [4]:
first_episode = scripts[scripts.episode_id == 1]
first_episode_lines = first_episode[first_episode.speaking_line == True]
first_episode_lines = pd.merge(first_episode_lines, characters, left_on='character_id', right_on='id')
first_episode_lines.sort_values('timestamp_in_ms').head(1)[['spoken_words', 'name']]

Unnamed: 0,spoken_words,name
0,"Ooo, careful, Homer.",Marge Simpson


## ¿Cuáles son los 10 personajes que más hablan en los episodios?

In [5]:
script_lines = scripts[scripts.speaking_line == True]
lines_by_character = script_lines.groupby('character_id').size().reset_index()
lines_by_character.columns = ['character_id', 'lines']
top_characters = lines_by_character.sort_values('lines', ascending=False).head(10)
top_characters = pd.merge(top_characters, characters, left_on='character_id', right_on='id')
top_characters[['name', 'lines']]

Unnamed: 0,name,lines
0,Homer Simpson,28059
1,Marge Simpson,13233
2,Bart Simpson,13145
3,Lisa Simpson,10854
4,C. Montgomery Burns,3116
5,Moe Szyslak,2808
6,Seymour Skinner,2381
7,Ned Flanders,2007
8,Grampa Simpson,1875
9,Milhouse Van Houten,1798


## ¿Cuáles son las 10 localizaciones más recurrentes en los episodios?

In [6]:
used_locations = scripts.groupby('location_id').size().reset_index()
used_locations.columns = ['location_id', 'times']
top_locations = used_locations.sort_values('times', ascending=False).head(10)
top_locations = pd.merge(top_locations, locations, left_on='location_id', right_on='id')
top_locations[['name', 'times']]

Unnamed: 0,name,times
0,Simpson Home,34952
1,Springfield Elementary School,7047
2,Moe's Tavern,4599
3,Springfield Nuclear Power Plant,3589
4,Kwik-E-Mart,1476
5,First Church of Springfield,1416
6,Simpson Living Room,1378
7,Springfield Street,1301
8,Springfield,1301
9,Simpson Car,1234


## ¿Cuál es el mejor episodio según IMDB?

In [7]:
episodes.sort_values('imdb_rating', ascending=False).head(5)[['title', 'imdb_rating']]

Unnamed: 0,title,imdb_rating
537,Homer's Enemy,9.2
535,You Only Move Twice,9.2
323,The City of New York vs. Homer Simpson,9.1
534,Who Shot Mr. Burns? (Part One),9.1
261,Last Exit to Springfield,9.0


## ¿Cuál es la mejor temporada (en media) según IMDB?

In [8]:
episodes.groupby('season')['imdb_rating'].mean().sort_values(ascending=False).head(5)

season
5    8.336364
7    8.324000
6    8.312000
4    8.268182
8    8.220000
Name: imdb_rating, dtype: float64

## ¿Cuál es la localización preferida de Bart?

In [9]:
characters[characters.name.str.contains('Bart')].sort_values('id').head(5)

Unnamed: 0,id,name,normalized_name,gender
71,8,Bart Simpson,bart simpson,m
221,233,Bart's Voice,barts voice,
487,513,Bart's Head,barts head,
640,659,Bartender,bartender,
641,660,One-eyed Bartender,one-eyed bartender,


In [10]:
bart_lines = scripts[scripts.character_id == 8]
bart_locations = bart_lines.groupby('location_id').size().reset_index()
bart_locations.columns = ['location_id', 'times']
top_locations = bart_locations.sort_values('times', ascending=False).head(10)
top_locations = pd.merge(top_locations, locations, left_on='location_id', right_on='id')
top_locations[['name', 'times']]

Unnamed: 0,name,times
0,Simpson Home,4132
1,Springfield Elementary School,904
2,Bart's Bedroom,272
3,Bart's Treehouse,224
4,Simpson Living Room,142
5,Springfield Street,141
6,Simpson Car,134
7,Lisa's Bedroom,106
8,Simpson Kitchen,99
9,Park,95


## ¿Cuál es el episodio más visto en USA?

In [11]:
episodes.sort_values('us_viewers_in_millions', ascending=False).head(5)[['title', 'us_viewers_in_millions']]

Unnamed: 0,title,us_viewers_in_millions
2,"Bart Gets an ""F""",33.6
217,Life on the Fast Lane,33.5
218,The Crepes of Wrath,31.2
1,Krusty Gets Busted,30.4
0,Homer's Night Out,30.3


## ¿Cuál es el personaje que más veces habla el primero en los episodios?

In [12]:
speaking_lines = scripts[scripts.speaking_line == True]
first_characters = speaking_lines.groupby('episode_id').first()
first_characters = first_characters.groupby('character_id').size().reset_index()
first_characters.columns = ['character_id', 'times']
top_characters = first_characters.sort_values('times', ascending=False).head(10)
top_characters = pd.merge(top_characters, characters, left_on='character_id', right_on='id')
top_characters[['name', 'times']]

Unnamed: 0,name,times
0,Homer Simpson,106
1,Marge Simpson,58
2,Bart Simpson,56
3,Lisa Simpson,45
4,Milhouse Van Houten,12
5,Moe Szyslak,11
6,Kent Brockman,11
7,Seymour Skinner,10
8,Announcer,10
9,Edna Krabappel-Flanders,8


## ¿Cuál es el episodio con más personajes involucrados? ¿Y el que menos?

In [13]:
characters_by_episode = scripts.groupby('episode_id')['character_id'].nunique().reset_index()
characters_by_episode.columns = ['episode_id', 'num_characters']
characters_by_episode = pd.merge(characters_by_episode, episodes, left_on='episode_id', right_on='id')
characters_by_episode.sort_values('num_characters', ascending=False).head(5)[['title', 'num_characters']]

Unnamed: 0,title,num_characters
214,"Sunday, Cruddy Sunday",68
79,Marge in Chains,64
294,Large Marge,62
342,Homer and Ned's Hail Mary Pass,62
319,'Tis the Fifteenth Season,60


In [14]:
characters_by_episode.sort_values('num_characters').head(5)[['title', 'num_characters']]

Unnamed: 0,title,num_characters
18,Dead Putting Society,11
15,Treehouse of Horror,11
188,"All Singing, All Dancing",13
144,Homer the Smithers,15
8,Life on the Fast Lane,17


## ¿Cuál es el episodio con más lineas de diálogo? ¿Y el que menos?

In [15]:
lines = scripts[scripts.speaking_line == True]
lines_by_episode = lines.groupby('episode_id').size().reset_index()
lines_by_episode.columns = ['episode_id', 'num_lines']
lines_by_episode = pd.merge(lines_by_episode, episodes, left_on='episode_id', right_on='id')
lines_by_episode.sort_values('num_lines', ascending=False).head(5)[['title', 'num_lines']]

Unnamed: 0,title,num_lines
227,Brother's Little Helper,334
0,Simpsons Roasting on an Open Fire,333
27,"Oh Brother, Where Art Thou?",331
33,Three Men and a Comic Book,318
24,The Way We Was,311


In [16]:
lines_by_episode.sort_values('num_lines').head(5)[['title', 'num_lines']]

Unnamed: 0,title,num_lines
188,"All Singing, All Dancing",78
335,Treehouse of Horror XV,113
285,Gump Roast,127
307,Three Gays of the Condo,128
563,Sky Police,149


## ¿En qué episodio aparece por primera vez Krusty?

In [17]:
characters[characters.name.str.contains('Krusty')].sort_values('id').head(5)

Unnamed: 0,id,name,normalized_name,gender
6661,139,Krusty the Clown,krusty the clown,m
206,191,Krusty Doll,krusty doll,m
874,897,Zombie Krusty,zombie krusty,
1108,1136,Krusty Announcer,krusty announcer,
1589,1623,Krusty Burglar,krusty burglar,


In [18]:
first_krusty_episode = scripts[scripts.character_id == 139].sort_values('episode_id')['episode_id'].head(1)
first_krusty_episode = pd.merge(first_krusty_episode, episodes, left_on='episode_id', right_on='id')
first_krusty_episode[['title', 'season', 'number_in_season']]

Unnamed: 0,title,season,number_in_season
0,The Telltale Head,1,8
