# Oscar Award Dataset exploration
## 1. Which films have won the most awards
## 2. Which actors & actresses have won the most awards
## 3. At each year, which films have the most nominations and awards?


In [3]:
import numpy as np
import seaborn as sns
import pandas as pd

In [8]:
oscar_df = pd.read_csv('../src/datasets/the_oscar_award.csv')

In [9]:
oscar_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
0,1927,1928,1,ACTOR,Richard Barthelmess,The Noose,False
1,1927,1928,1,ACTOR,Emil Jannings,The Last Command,True
2,1927,1928,1,ACTRESS,Louise Dresser,A Ship Comes In,False
3,1927,1928,1,ACTRESS,Janet Gaynor,7th Heaven,True
4,1927,1928,1,ACTRESS,Gloria Swanson,Sadie Thompson,False
...,...,...,...,...,...,...,...
10390,2019,2020,92,WRITING (Original Screenplay),Parasite,Parasite,True
10391,2019,2020,92,JEAN HERSHOLT HUMANITARIAN AWARD,Geena Davis,,True
10392,2019,2020,92,HONORARY AWARD,David Lynch,,True
10393,2019,2020,92,HONORARY AWARD,Wes Studi,,True


In [10]:
# taking a peek at unique award categories
oscar_df.category.unique()

array(['ACTOR', 'ACTRESS', 'ART DIRECTION', 'CINEMATOGRAPHY',
       'DIRECTING (Comedy Picture)', 'DIRECTING (Dramatic Picture)',
       'ENGINEERING EFFECTS', 'OUTSTANDING PICTURE',
       'UNIQUE AND ARTISTIC PICTURE', 'WRITING (Adaptation)',
       'WRITING (Original Story)', 'WRITING (Title Writing)',
       'SPECIAL AWARD', 'DIRECTING', 'WRITING', 'OUTSTANDING PRODUCTION',
       'SOUND RECORDING', 'SHORT SUBJECT (Cartoon)',
       'SHORT SUBJECT (Comedy)', 'SHORT SUBJECT (Novelty)',
       'ASSISTANT DIRECTOR', 'FILM EDITING', 'MUSIC (Scoring)',
       'MUSIC (Song)', 'DANCE DIRECTION', 'WRITING (Screenplay)',
       'ACTOR IN A SUPPORTING ROLE', 'ACTRESS IN A SUPPORTING ROLE',
       'SHORT SUBJECT (Color)', 'SHORT SUBJECT (One-reel)',
       'SHORT SUBJECT (Two-reel)', 'IRVING G. THALBERG MEMORIAL AWARD',
       'MUSIC (Original Score)', 'CINEMATOGRAPHY (Black-and-White)',
       'CINEMATOGRAPHY (Color)', 'SPECIAL EFFECTS',
       'ART DIRECTION (Black-and-White)', 'ART DIRECT

## 1. Film Nomations and wins

In [45]:
oscar_df['film'].value_counts()

A Star Is Born                                                            25
Titanic                                                                   16
Mutiny on the Bounty                                                      15
Moulin Rouge                                                              15
Cleopatra                                                                 14
                                                                          ..
The Preacher's Wife                                                        1
Forbidden Planet                                                           1
Conquer by the Clock                                                       1
The Children of Soong Ching Ling                                           1
Eyes on the Prize: America's Civil Rights Years/Bridge to Freedom 1965     1
Name: film, Length: 4833, dtype: int64

In [12]:
win_df = oscar_df[(oscar_df.winner == True)]
win_df['film'].value_counts()

Titanic                                          12
The Lord of the Rings: The Return of the King    11
Ben-Hur                                          11
West Side Story                                  10
The English Patient                               9
                                                 ..
Interstellar                                      1
The Milagro Beanfield War                         1
San Francisco                                     1
The Red Violin                                    1
Knighty Knight Bugs                               1
Name: film, Length: 1273, dtype: int64

In [13]:
oscar_df['film'].value_counts()

A Star Is Born                                                            25
Titanic                                                                   16
Mutiny on the Bounty                                                      15
Moulin Rouge                                                              15
Cleopatra                                                                 14
                                                                          ..
The Preacher's Wife                                                        1
Forbidden Planet                                                           1
Conquer by the Clock                                                       1
The Children of Soong Ching Ling                                           1
Eyes on the Prize: America's Civil Rights Years/Bridge to Freedom 1965     1
Name: film, Length: 4833, dtype: int64

In [14]:
oscar_df['film'][(oscar_df.winner == True)].value_counts()

Titanic                                          12
The Lord of the Rings: The Return of the King    11
Ben-Hur                                          11
West Side Story                                  10
The English Patient                               9
                                                 ..
Interstellar                                      1
The Milagro Beanfield War                         1
San Francisco                                     1
The Red Violin                                    1
Knighty Knight Bugs                               1
Name: film, Length: 1273, dtype: int64

In [15]:
films_df = pd.DataFrame({'count':oscar_df['film'].value_counts()})

In [16]:
nominated_films = pd.DataFrame(oscar_df.film.value_counts().reset_index())
nominated_films.columns = ['film', 'nominated']

In [46]:
nominated_films.head(10)

Unnamed: 0,film,nominated
0,A Star Is Born,25
1,Titanic,16
2,Mutiny on the Bounty,15
3,Moulin Rouge,15
4,Cleopatra,14
5,La La Land,14
6,All about Eve,14
7,Little Women,14
8,The Lord of the Rings: The Fellowship of the Ring,13
9,Gone with the Wind,13


In [47]:
top_10_nominated = nominated_films.head(10)['film'].to_list()
print(top_10_nominated)

['A Star Is Born', 'Titanic', 'Mutiny on the Bounty', 'Moulin Rouge', 'Cleopatra', 'La La Land', 'All about Eve', 'Little Women', 'The Lord of the Rings: The Fellowship of the Ring', 'Gone with the Wind']


In [48]:
awarded_films = pd.DataFrame(oscar_df.film[(oscar_df.winner == True)].value_counts().reset_index())
awarded_films.columns = ['film', 'won']
awarded_films['won'].fillna(0, inplace=True)

In [49]:
top_10_awarded = awarded_films.head(10)['film'].to_list()
print(top_10_awarded)

['Titanic', 'The Lord of the Rings: The Return of the King', 'Ben-Hur', 'West Side Story', 'The English Patient', 'Gigi', 'The Last Emperor', 'Gandhi', 'From Here to Eternity', 'Amadeus']


In [35]:
awarded_films.to_csv("../src/datasets/awarded_only_films.csv")

In [20]:
merged_df = pd.merge(nominated_films, awarded_films, on='film',how = 'outer')

In [37]:
merged_df['won'].fillna(0, inplace=True)
merged_df['won']=pd.to_numeric(merged_df['won'], downcast='integer')
merged_df['nominated']=pd.to_numeric(merged_df['nominated'], downcast='integer')
merged_df

Unnamed: 0,film,nominated,won
0,A Star Is Born,25,3
1,Titanic,16,12
2,Mutiny on the Bounty,15,1
3,Moulin Rouge,15,4
4,Cleopatra,14,5
...,...,...,...
4828,The Preacher's Wife,1,0
4829,Forbidden Planet,1,0
4830,Conquer by the Clock,1,0
4831,The Children of Soong Ching Ling,1,0


In [36]:
merged_df.to_csv("../src/datasets/nominated_films.csv")

In [22]:
# just double check if there are any NA values
df1 = merged_df[merged_df.isna().any(axis=1)]
df1

Unnamed: 0,film,nominated,won


In [50]:
# getting all fields just for top 10
top_10_awarded_df = oscar_df[oscar_df['film'].isin(top_10_awarded)]
top_10_nominated_df = oscar_df[oscar_df['film'].isin(top_10_nominated)]

In [52]:
top_10_awarded_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
2758,1953,1954,26,ACTOR,Montgomery Clift,From Here to Eternity,False
2760,1953,1954,26,ACTOR,Burt Lancaster,From Here to Eternity,False
2764,1953,1954,26,ACTOR IN A SUPPORTING ROLE,Frank Sinatra,From Here to Eternity,True
2769,1953,1954,26,ACTRESS,Deborah Kerr,From Here to Eternity,False
2774,1953,1954,26,ACTRESS IN A SUPPORTING ROLE,Donna Reed,From Here to Eternity,True
...,...,...,...,...,...,...,...
8411,2003,2004,76,MUSIC (Original Song),The Lord of the Rings: The Return of the King,The Lord of the Rings: The Return of the King,True
8415,2003,2004,76,BEST PICTURE,The Lord of the Rings: The Return of the King,The Lord of the Rings: The Return of the King,True
8434,2003,2004,76,SOUND MIXING,The Lord of the Rings: The Return of the King,The Lord of the Rings: The Return of the King,True
8438,2003,2004,76,VISUAL EFFECTS,The Lord of the Rings: The Return of the King,The Lord of the Rings: The Return of the King,True


In [53]:
top_10_nominated_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
236,1932,1933,6,DIRECTING,Little Women,Little Women,False
242,1932,1933,6,OUTSTANDING PRODUCTION,Little Women,Little Women,False
261,1932,1933,6,WRITING (Adaptation),Little Women,Little Women,True
276,1934,1935,7,ASSISTANT DIRECTOR,Cleopatra,Cleopatra,False
280,1934,1935,7,CINEMATOGRAPHY,Cleopatra,Cleopatra,True
...,...,...,...,...,...,...,...
10285,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Florence Pugh,Little Women,False
10300,2019,2020,92,COSTUME DESIGN,Little Women,Little Women,True
10333,2019,2020,92,MUSIC (Original Score),Little Women,Little Women,False
10346,2019,2020,92,BEST PICTURE,Little Women,Little Women,False


In [55]:
top_10_nominated_df.to_csv('../src/datasets/top_10_nominated.csv')
top_10_awarded_df.to_csv('../src/datasets/top_10_awarded.csv')

## 2. Actors and Actresses 

In [41]:
actors_df = oscar_df.loc[oscar_df.category == 'ACTOR']
actors_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
0,1927,1928,1,ACTOR,Richard Barthelmess,The Noose,False
1,1927,1928,1,ACTOR,Emil Jannings,The Last Command,True
35,1928,1929,2,ACTOR,George Bancroft,Thunderbolt,False
36,1928,1929,2,ACTOR,Warner Baxter,In Old Arizona,True
37,1928,1929,2,ACTOR,Chester Morris,Alibi,False
...,...,...,...,...,...,...,...
5298,1975,1976,48,ACTOR,Walter Matthau,The Sunshine Boys,False
5299,1975,1976,48,ACTOR,Jack Nicholson,One Flew over the Cuckoo's Nest,True
5300,1975,1976,48,ACTOR,Al Pacino,Dog Day Afternoon,False
5301,1975,1976,48,ACTOR,Maximilian Schell,The Man in the Glass Booth,False


In [42]:
actress_df = oscar_df.loc[oscar_df.category == 'ACTRESS']
actress_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
2,1927,1928,1,ACTRESS,Louise Dresser,A Ship Comes In,False
3,1927,1928,1,ACTRESS,Janet Gaynor,7th Heaven,True
4,1927,1928,1,ACTRESS,Gloria Swanson,Sadie Thompson,False
40,1928,1929,2,ACTRESS,Ruth Chatterton,Madame X,False
41,1928,1929,2,ACTRESS,Betty Compson,The Barker,False


In [43]:
actor_supporting_df = oscar_df.loc[oscar_df.category == 'ACTOR IN A SUPPORTING ROLE']
actor_supporting_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
420,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Mischa Auer,My Man Godfrey,False
421,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Walter Brennan,Come and Get It,True
422,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Stuart Erwin,Pigskin Parade,False
423,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Basil Rathbone,Romeo and Juliet,False
424,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Akim Tamiroff,The General Died at Dawn,False
...,...,...,...,...,...,...,...
10272,2019,2020,92,ACTOR IN A SUPPORTING ROLE,Tom Hanks,A Beautiful Day in the Neighborhood,False
10273,2019,2020,92,ACTOR IN A SUPPORTING ROLE,Anthony Hopkins,The Two Popes,False
10274,2019,2020,92,ACTOR IN A SUPPORTING ROLE,Al Pacino,The Irishman,False
10275,2019,2020,92,ACTOR IN A SUPPORTING ROLE,Joe Pesci,The Irishman,False


In [44]:
actress_supporting_df = oscar_df.loc[oscar_df.category == 'ACTRESS IN A SUPPORTING ROLE']
actress_supporting_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
430,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Beulah Bondi,The Gorgeous Hussy,False
431,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Alice Brady,My Man Godfrey,False
432,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Bonita Granville,These Three,False
433,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Maria Ouspenskaya,Dodsworth,False
434,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Gale Sondergaard,Anthony Adverse,True
...,...,...,...,...,...,...,...
10282,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Kathy Bates,Richard Jewell,False
10283,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Laura Dern,Marriage Story,True
10284,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Scarlett Johansson,Jojo Rabbit,False
10285,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Florence Pugh,Little Women,False


In [None]:
actress_df.to_csv('../src/datasets/actress.csv')
actress_supporting_df.to_csv('../src/datasets/actress.csv')