In [1]:
import pandas as pd

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [2]:
titles = pd.read_csv('data/titles.csv', index_col=None)
titles.head()

Unnamed: 0,title,year
0,The Rising Son,1990
1,The Thousand Plane Raid,1969
2,Crucea de piatra,1993
3,Country,2000
4,Gaiking II,2011


In [3]:
cast = pd.read_csv('data/cast.csv', index_col=None)
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


### What are the ten most common movie names of all time?

In [9]:
titles["title"].value_counts().head(10)

Hamlet                  18
Carmen                  16
Macbeth                 15
The Outsider            12
Maya                    12
Temptation              11
Kismet                  11
The Three Musketeers    11
Freedom                 11
Othello                 11
Name: title, dtype: int64

### Which three years in the 1930s saw the most films released?

In [10]:
# creating a decade column
titles["decade"] = titles["year"] // 10 * 10

In [17]:
titles[titles["decade"].eq(1930)].value_counts("year").head(3)

year
1937    1196
1936    1138
1938    1129
dtype: int64

### Count the number of films that have been released in each decade over the history of cinema.

In [29]:
films_by_decade = titles.value_counts("decade").to_frame()
films_by_decade.columns = ["# of films"]
films_by_decade

Unnamed: 0_level_0,# of films
decade,Unnamed: 1_level_1
2010,67977
2000,38713
1990,22225
1980,20243
1970,18714
1960,17515
1950,12711
1930,10097
1920,8797
1940,8576


### Count the number of "Hamlet" films made in each decade.

In [30]:
titles[titles["title"].eq("Hamlet")].value_counts("decade")

decade
1910    3
2010    3
1960    2
1970    2
1990    2
2000    2
1920    1
1940    1
1950    1
1980    1
dtype: int64

### Count the number of "Rustler" characters in each decade of the history of film.

In [31]:
cast["decade"] = cast["year"] // 10 * 10

In [33]:
cast[cast["character"].eq("Rustler")].value_counts("decade")

decade
1930    84
1940    18
1970     9
1950     8
1920     6
1990     4
1960     2
2000     2
1980     1
dtype: int64

### Count the number of "Hamlet" characters in each decade.

In [34]:
cast[cast["character"].eq("Hamlet")].value_counts("decade")

decade
2010    28
2000    19
1990    11
1970     8
1960     7
1910     6
1930     6
1950     3
1980     2
1920     1
1940     1
dtype: int64

### What are the 11 most common character names in movie history?

In [36]:
cast["character"].value_counts().head(11)

Himself        19600
Dancer         12096
Extra          10953
Reporter        8154
Doctor          7436
Student         7177
Policeman       6883
Party Guest     6742
Nurse           6676
Bartender       6613
Minor Role      6255
Name: character, dtype: int64

### Who are the 10 people most often credited as "Herself" in film history?

In [43]:
cast[cast["character"].eq("Herself")].value_counts("name").head(10)

name
Queen Elizabeth II    11
Joyce Brothers         9
Margaret Thatcher      8
Hillary Clinton        7
Mary Jo Pehl           7
Joan Rivers            6
Sumie Sakai            6
Bunny Yeager           5
Kareena Kapoor         5
Rekha                  5
dtype: int64

### Who are the 10 people most often credited as "Himself" in film history?

In [44]:
cast[cast["character"].eq("Himself")].value_counts("name").head(10)

name
Adolf Hitler             100
Richard Nixon             46
Ronald Reagan             39
John F. Kennedy           34
Winston Churchill         24
Benito Mussolini          23
Ron Jeremy                23
George W. Bush            23
Martin Luther King        21
Franklin D. Roosevelt     20
dtype: int64

### Which actors or actresses appeared in the most movies in the year 1945?

In [45]:
cast[cast["year"].eq(1945)].value_counts("name").head()

name
Emmett Vogan       39
Sam (II) Harris    32
Bess Flowers       29
Harold Miller      29
Nolan Leary        27
dtype: int64

### Which actors or actresses appeared in the most movies in the year 1985?

In [58]:
cast[cast["year"].eq(1985)].value_counts("name").head()

name
Mammootty        22
Shakti Kapoor    21
Sukumari         20
Raj Babbar       15
Lou Scheimer     15
dtype: int64

In [59]:
cast[cast["year"].eq(1985)].groupby("name").count().sort_values("title", ascending=False).head()["title"]

name
Mammootty        22
Shakti Kapoor    21
Sukumari         20
Raj Babbar       15
Lou Scheimer     15
Name: title, dtype: int64

### How many roles "Mammootty" has played in each year of his career.

In [63]:
cast[cast["name"].eq("Mammootty")].value_counts("year").sort_index()

year
1971     1
1980     2
1981     3
1982     7
1983    18
1984    16
1985    22
1986    18
1987     9
1988     7
1989    10
1990    15
1991     6
1992     3
1993     5
1994     5
1995     5
1996     3
1997     2
1998     4
1999     4
2000     7
2001     3
2002     3
2003     2
2004     8
2005     6
2006     8
2007     7
2008     9
2009    12
2010     9
2011     7
2012     7
2013     5
2014     6
2015     6
2016     4
2017     1
dtype: int64

### What are the 10 most frequent roles that start with the phrase "Patron in"?

In [66]:
cast[cast["character"].str.startswith("Patron in")].value_counts("character").head(10)

character
Patron in Frisky Rabbit         16
Patron in Chinese Restaurant     9
Patron in the Coffee House       9
Patron in Billiard Parlor        5
Patron in Bar                    4
Patron in restaurant             4
Patron in Club                   3
Patron in Restaurant             3
Patron in cabaret                3
Patron in Audience               2
dtype: int64

### What are the 10 most frequent roles that start with the word "Science"?

In [67]:
cast[cast["character"].str.startswith("Science")].value_counts("character").head(10)

character
Science Teacher         60
Science Student          9
Science Fair Student     9
Science Fair Judge       6
Science Reporter         5
Science Promo Cadet      5
Science Club Member      5
Science Kid              5
Science                  4
Science Officer          3
dtype: int64

### Count number of n-values of the roles that Judi Dench has played over her career.

In [69]:
cast[cast["name"].eq("Judi Dench")].value_counts("n")

n   
1.0     6
3.0     6
2.0     6
7.0     4
4.0     4
6.0     4
12.0    3
5.0     2
13.0    2
16.0    2
18.0    2
8.0     1
9.0     1
20.0    1
23.0    1
26.0    1
29.0    1
40.0    1
dtype: int64

### Count number of n-values of Cary Grant's roles through his career.

In [70]:
cast[cast["name"].eq("Cary Grant")].value_counts("n")

n   
1.0     46
2.0     20
3.0      2
9.0      2
10.0     2
4.0      1
5.0      1
8.0      1
dtype: int64

### Count number of n-value of the roles that Sidney Poitier has acted over the years.

In [71]:
cast[cast["name"].eq("Sidney Poitier")].value_counts("n")

n   
1.0     21
2.0      8
3.0      6
4.0      3
5.0      1
10.0     1
13.0     1
21.0     1
dtype: int64

### How many leading (n=1) roles were available to actors, and how many to actresses, in the 1950s?

In [73]:
cast[cast["n"].eq(1) & cast["type"].eq("actor") & cast["decade"].eq(1950)].shape[0]

6616

In [74]:
cast[cast["n"].eq(1) & cast["type"].eq("actress") & cast["decade"].eq(1950)].shape[0]

2965

### How many supporting (n=2) roles were available to actors, and how many to actresses, in the 1950s?

In [75]:
cast[cast["n"].eq(2) & cast["type"].eq("actor") & cast["decade"].eq(1950)].shape[0]

4564

In [77]:
cast[cast["n"].eq(2) & cast["type"].eq("actress") & cast["decade"].eq(1950)].shape[0]

4556