In [1]:
import pandas as pd

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [2]:
titles = pd.read_csv('titles.csv')
titles.head()

Unnamed: 0,title,year
0,The Rising Son,1990
1,The Thousand Plane Raid,1969
2,Crucea de piatra,1993
3,Country,2000
4,Gaiking II,2011


In [3]:
cast = pd.read_csv('cast.csv')
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


### Using groupby(), count the number of films that have been released in each decade in the history of cinema.

In [5]:
from math import ceil

In [7]:
titles['decade'] = (titles['year'] // 10)*10

In [10]:
titles.groupby('decade')['title'].count()

decade
1890        3
1900       37
1910     6512
1920     8797
1930    10097
1940     8576
1950    12711
1960    17515
1970    18714
1980    20243
1990    22225
2000    38713
2010    67977
2020      210
Name: title, dtype: int64

### Use groupby() count the number of "Hamlet" films made in each decade.

In [11]:
hamlet = titles[titles['title'] == 'Hamlet']

In [13]:
hamlet.groupby('decade')['title'].count()

decade
1910    3
1920    1
1940    1
1950    1
1960    2
1970    2
1980    1
1990    2
2000    2
2010    3
Name: title, dtype: int64

### How many leading (n=1) roles were available to actors, and how many to actresses, in each year of the 1950s?

In [14]:
conditions = cast[(cast['n']==1) & ((cast['type'] == 'actor') | (cast['type'] == 'actress')) & (cast['year']>=1950) & (cast['year']<1960)]

In [20]:
conditions.groupby(['year','type'])['n'].count()

year  type   
1950  actor      626
      actress    281
1951  actor      647
      actress    281
1952  actor      607
      actress    293
1953  actor      652
      actress    301
1954  actor      634
      actress    310
1955  actor      641
      actress    281
1956  actor      643
      actress    305
1957  actor      735
      actress    301
1958  actor      710
      actress    295
1959  actor      721
      actress    317
Name: n, dtype: int64

### In the 1950s taken as a whole, how many total roles were available to actors, and how many to actresses, for each "n" number 1 through 5?

In [24]:
cast['decade'] = (cast['year'] // 10)*10

In [32]:
conditions = cast[(cast['n'].between(1,5)) & ((cast['type'] == 'actor') | (cast['type'] == 'actress')) & (cast['decade']==1950)]

In [36]:
conditions.groupby(['decade','type','n'])['n'].count()

decade  type     n  
1950    actor    1.0    6616
                 2.0    4564
                 3.0    5587
                 4.0    5594
                 5.0    5611
        actress  1.0    2965
                 2.0    4556
                 3.0    3148
                 4.0    2849
                 5.0    2544
Name: n, dtype: int64

### Use groupby() to determine how many roles are listed for each of the Pink Panther movies.

### List, in order by year, each of the films in which Frank Oz has played more than 1 role.

In [37]:
filter_df = cast[(cast['name'] == 'Frank Oz')]

In [38]:
filter_df

Unnamed: 0,title,year,name,type,character,n,decade
1671990,An American Werewolf in London,1981,Frank Oz,actor,Mr. Collins,13.0,1980
1671991,An American Werewolf in London,1981,Frank Oz,actor,Miss Piggy,13.0,1980
1671992,Blues Brothers 2000,1998,Frank Oz,actor,Warden,4.0,1990
1671993,Follow That Bird,1985,Frank Oz,actor,Cookie Monster,3.0,1980
1671994,Follow That Bird,1985,Frank Oz,actor,Bert,3.0,1980
...,...,...,...,...,...,...,...
1672047,The Muppets Take Manhattan,1984,Frank Oz,actor,Cookie Monster,2.0,1980
1672048,The Muppets Take Manhattan,1984,Frank Oz,actor,Ocean Breeze Soap Board Member,2.0,1980
1672049,The Muppets Take Manhattan,1984,Frank Oz,actor,Sam the Eagle,2.0,1980
1672050,Trading Places,1983,Frank Oz,actor,Corrupt Cop,46.0,1980


In [45]:
result = filter_df.groupby(['title','year']).agg(n_roles = ('character','count')).sort_values('year')
result

Unnamed: 0_level_0,Unnamed: 1_level_0,n_roles
title,year,Unnamed: 2_level_1
The Muppet Movie,1979,8
The Blues Brothers,1980,1
Star Wars: Episode V - The Empire Strikes Back,1980,1
An American Werewolf in London,1981,2
The Great Muppet Caper,1981,6
The Dark Crystal,1982,2
Superman III,1983,1
Star Wars: Episode VI - Return of the Jedi,1983,1
Trading Places,1983,1
The Muppets Take Manhattan,1984,7


In [46]:
result[result['n_roles']>1]

Unnamed: 0_level_0,Unnamed: 1_level_0,n_roles
title,year,Unnamed: 2_level_1
The Muppet Movie,1979,8
An American Werewolf in London,1981,2
The Great Muppet Caper,1981,6
The Dark Crystal,1982,2
The Muppets Take Manhattan,1984,7
Follow That Bird,1985,3
The Muppet Christmas Carol,1992,7
Muppet Treasure Island,1996,4
Muppets from Space,1999,4
The Adventures of Elmo in Grouchland,1999,3


### List each of the characters that Frank Oz has portrayed at least twice.

In [47]:
filter_df = cast[(cast['name'] == 'Frank Oz')]
result = filter_df.groupby(['character']).agg(n_roles = ('character','count'))

In [48]:
result

Unnamed: 0_level_0,n_roles
character,Unnamed: 1_level_1
Animal,6
"Aughra, a Keeper Of Secrets (performer)",1
Bert,3
Brain Surgeon,1
Chamberlain (performer),1
Cookie Monster,3
Corrections Officer,1
Corrupt Cop,1
Doc Hopper's Men,1
Fozzie,1


In [53]:
result[result['n_roles']>=2].sort_values('n_roles', ascending=False)

Unnamed: 0_level_0,n_roles
character,Unnamed: 1_level_1
Animal,6
Miss Piggy,6
Yoda,6
Sam the Eagle,5
Fozzie Bear,4
Bert,3
Cookie Monster,3
Grover,2
