# Oscar Award Dataset exploration
## 1. Which films have won the most awards
## 2. Which actors & actresses have won the most awards
## 3. At each year, which films have the most nominations and awards?


In [6]:
import numpy as np
import seaborn as sns
import pandas as pd

In [11]:
oscar_df = pd.read_csv('./../assets/datasets/the_oscar_award.csv')

In [12]:
oscar_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
0,1927,1928,1,ACTOR,Richard Barthelmess,The Noose,False
1,1927,1928,1,ACTOR,Emil Jannings,The Last Command,True
2,1927,1928,1,ACTRESS,Louise Dresser,A Ship Comes In,False
3,1927,1928,1,ACTRESS,Janet Gaynor,7th Heaven,True
4,1927,1928,1,ACTRESS,Gloria Swanson,Sadie Thompson,False
...,...,...,...,...,...,...,...
10390,2019,2020,92,WRITING (Original Screenplay),Parasite,Parasite,True
10391,2019,2020,92,JEAN HERSHOLT HUMANITARIAN AWARD,Geena Davis,,True
10392,2019,2020,92,HONORARY AWARD,David Lynch,,True
10393,2019,2020,92,HONORARY AWARD,Wes Studi,,True


In [13]:
oscar_df.category.unique()

array(['ACTOR', 'ACTRESS', 'ART DIRECTION', 'CINEMATOGRAPHY',
       'DIRECTING (Comedy Picture)', 'DIRECTING (Dramatic Picture)',
       'ENGINEERING EFFECTS', 'OUTSTANDING PICTURE',
       'UNIQUE AND ARTISTIC PICTURE', 'WRITING (Adaptation)',
       'WRITING (Original Story)', 'WRITING (Title Writing)',
       'SPECIAL AWARD', 'DIRECTING', 'WRITING', 'OUTSTANDING PRODUCTION',
       'SOUND RECORDING', 'SHORT SUBJECT (Cartoon)',
       'SHORT SUBJECT (Comedy)', 'SHORT SUBJECT (Novelty)',
       'ASSISTANT DIRECTOR', 'FILM EDITING', 'MUSIC (Scoring)',
       'MUSIC (Song)', 'DANCE DIRECTION', 'WRITING (Screenplay)',
       'ACTOR IN A SUPPORTING ROLE', 'ACTRESS IN A SUPPORTING ROLE',
       'SHORT SUBJECT (Color)', 'SHORT SUBJECT (One-reel)',
       'SHORT SUBJECT (Two-reel)', 'IRVING G. THALBERG MEMORIAL AWARD',
       'MUSIC (Original Score)', 'CINEMATOGRAPHY (Black-and-White)',
       'CINEMATOGRAPHY (Color)', 'SPECIAL EFFECTS',
       'ART DIRECTION (Black-and-White)', 'ART DIRECT

In [46]:
films_df = oscar_df[(oscar_df.category == 'OUTSTANDING PICTURE')]
oscar_df['film'].value_counts()

In [49]:
win_df = oscar_df[(oscar_df.winner == True)]
win_df['film'].value_counts()

Titanic                                          12
The Lord of the Rings: The Return of the King    11
Ben-Hur                                          11
West Side Story                                  10
The English Patient                               9
                                                 ..
A Close Shave                                     1
The War of the Worlds                             1
Declaration of Independence                       1
Mona Lisa Descending a Staircase                  1
When Magoo Flew                                   1
Name: film, Length: 1273, dtype: int64

In [55]:
oscar_df['film'].value_counts()

A Star Is Born                     25
Titanic                            16
Moulin Rouge                       15
Mutiny on the Bounty               15
Little Women                       14
                                   ..
Rehearsing a Dream                  1
Nine Lives                          1
White Shadows in the South Seas     1
Searching for Sugar Man             1
The Rock                            1
Name: film, Length: 4833, dtype: int64

In [50]:
oscar_df['film'][(oscar_df.winner == True)].value_counts()

Titanic                                          12
The Lord of the Rings: The Return of the King    11
Ben-Hur                                          11
West Side Story                                  10
The English Patient                               9
                                                 ..
A Close Shave                                     1
The War of the Worlds                             1
Declaration of Independence                       1
Mona Lisa Descending a Staircase                  1
When Magoo Flew                                   1
Name: film, Length: 1273, dtype: int64

In [63]:
films_df = pd.DataFrame({'count':oscar_df['film'].value_counts()})

In [98]:
nominated_films = pd.DataFrame(oscar_df.film.value_counts().reset_index())
nominated_films.columns = ['film', 'nominated']

In [99]:
nominated_films

Unnamed: 0,film,nominated
0,A Star Is Born,25
1,Titanic,16
2,Moulin Rouge,15
3,Mutiny on the Bounty,15
4,Little Women,14
...,...,...
4828,Rehearsing a Dream,1
4829,Nine Lives,1
4830,White Shadows in the South Seas,1
4831,Searching for Sugar Man,1


In [100]:
awarded_films = pd.DataFrame(oscar_df.film[(oscar_df.winner == True)].value_counts().reset_index())
awarded_films.columns = ['film', 'won']

In [101]:
awarded_films

Unnamed: 0,film,won
0,Titanic,12
1,The Lord of the Rings: The Return of the King,11
2,Ben-Hur,11
3,West Side Story,10
4,The English Patient,9
...,...,...
1268,A Close Shave,1
1269,The War of the Worlds,1
1270,Declaration of Independence,1
1271,Mona Lisa Descending a Staircase,1


In [102]:
merged_df = pd.merge(nominated_films, awarded_films, on='film',how = 'outer')

In [112]:
merged_df['won'].fillna(0, inplace=True)
merged_df['won']=pd.to_numeric(merged_df['won'], downcast='integer')
merged_df['nominated']=pd.to_numeric(merged_df['nominated'], downcast='integer')
merged_df

Unnamed: 0,film,nominated,won
0,A Star Is Born,25,3
1,Titanic,16,12
2,Moulin Rouge,15,4
3,Mutiny on the Bounty,15,1
4,Little Women,14,3
...,...,...,...
4828,Rehearsing a Dream,1,0
4829,Nine Lives,1,0
4830,White Shadows in the South Seas,1,1
4831,Searching for Sugar Man,1,1


In [114]:
merged_df.to_csv("awarded_films.csv")

In [116]:
df1 = merged_df[merged_df.isna().any(axis=1)]
df1

Unnamed: 0,film,nominated,won
