# Oscar Award Dataset exploration
## 1. Which films have won the most awards
## 2. Which actors & actresses have won the most awards
## 3. At each year, which films have the most nominations and awards?


In [1]:
import numpy as np
import seaborn as sns
import pandas as pd

In [7]:
oscar_df = pd.read_csv('../src/datasets/the_oscar_award.csv')

In [8]:
oscar_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
0,1927,1928,1,ACTOR,Richard Barthelmess,The Noose,False
1,1927,1928,1,ACTOR,Emil Jannings,The Last Command,True
2,1927,1928,1,ACTRESS,Louise Dresser,A Ship Comes In,False
3,1927,1928,1,ACTRESS,Janet Gaynor,7th Heaven,True
4,1927,1928,1,ACTRESS,Gloria Swanson,Sadie Thompson,False
...,...,...,...,...,...,...,...
10390,2019,2020,92,WRITING (Original Screenplay),Parasite,Parasite,True
10391,2019,2020,92,JEAN HERSHOLT HUMANITARIAN AWARD,Geena Davis,,True
10392,2019,2020,92,HONORARY AWARD,David Lynch,,True
10393,2019,2020,92,HONORARY AWARD,Wes Studi,,True


In [4]:
# taking a peek at unique award categories
oscar_df.category.unique()

array(['ACTOR', 'ACTRESS', 'ART DIRECTION', 'CINEMATOGRAPHY',
       'DIRECTING (Comedy Picture)', 'DIRECTING (Dramatic Picture)',
       'ENGINEERING EFFECTS', 'OUTSTANDING PICTURE',
       'UNIQUE AND ARTISTIC PICTURE', 'WRITING (Adaptation)',
       'WRITING (Original Story)', 'WRITING (Title Writing)',
       'SPECIAL AWARD', 'DIRECTING', 'WRITING', 'OUTSTANDING PRODUCTION',
       'SOUND RECORDING', 'SHORT SUBJECT (Cartoon)',
       'SHORT SUBJECT (Comedy)', 'SHORT SUBJECT (Novelty)',
       'ASSISTANT DIRECTOR', 'FILM EDITING', 'MUSIC (Scoring)',
       'MUSIC (Song)', 'DANCE DIRECTION', 'WRITING (Screenplay)',
       'ACTOR IN A SUPPORTING ROLE', 'ACTRESS IN A SUPPORTING ROLE',
       'SHORT SUBJECT (Color)', 'SHORT SUBJECT (One-reel)',
       'SHORT SUBJECT (Two-reel)', 'IRVING G. THALBERG MEMORIAL AWARD',
       'MUSIC (Original Score)', 'CINEMATOGRAPHY (Black-and-White)',
       'CINEMATOGRAPHY (Color)', 'SPECIAL EFFECTS',
       'ART DIRECTION (Black-and-White)', 'ART DIRECT

In [9]:
oscar_df['film'] = oscar_df['film'] + " (" + oscar_df['year_film'].astype(str) + ")"
oscar_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
0,1927,1928,1,ACTOR,Richard Barthelmess,The Noose (1927),False
1,1927,1928,1,ACTOR,Emil Jannings,The Last Command (1927),True
2,1927,1928,1,ACTRESS,Louise Dresser,A Ship Comes In (1927),False
3,1927,1928,1,ACTRESS,Janet Gaynor,7th Heaven (1927),True
4,1927,1928,1,ACTRESS,Gloria Swanson,Sadie Thompson (1927),False
...,...,...,...,...,...,...,...
10390,2019,2020,92,WRITING (Original Screenplay),Parasite,Parasite (2019),True
10391,2019,2020,92,JEAN HERSHOLT HUMANITARIAN AWARD,Geena Davis,,True
10392,2019,2020,92,HONORARY AWARD,David Lynch,,True
10393,2019,2020,92,HONORARY AWARD,Wes Studi,,True


## 1. Film Nomations and wins

In [10]:
oscar_df['film'].value_counts()

Titanic (1997)                      14
All about Eve (1950)                14
La La Land (2016)                   14
The Shape of Water (2017)           13
Gone with the Wind (1939)           13
                                    ..
Suzy (1936)                          1
Paris--Underground (1945)            1
Sing (2016)                          1
Snow Falling on Cedars (1999)        1
The Invisible Man Returns (1940)     1
Name: film, Length: 4934, dtype: int64

In [11]:
win_df = oscar_df[(oscar_df.winner == True)]
win_df['film'].value_counts()

Ben-Hur (1959)                                               11
The Lord of the Rings: The Return of the King (2003)         11
Titanic (1997)                                               11
West Side Story (1961)                                       10
The English Patient (1996)                                    9
                                                             ..
The Sand Castle (1977)                                        1
Wonder Man (1945)                                             1
My Mother Dreams the Satan's Disciples in New York (1999)     1
Moscow Strikes Back (1942)                                    1
Exodus (1960)                                                 1
Name: film, Length: 1286, dtype: int64

In [13]:
oscar_df['film'][(oscar_df.winner == True)].value_counts()

Ben-Hur (1959)                                               11
The Lord of the Rings: The Return of the King (2003)         11
Titanic (1997)                                               11
West Side Story (1961)                                       10
The English Patient (1996)                                    9
                                                             ..
The Sand Castle (1977)                                        1
Wonder Man (1945)                                             1
My Mother Dreams the Satan's Disciples in New York (1999)     1
Moscow Strikes Back (1942)                                    1
Exodus (1960)                                                 1
Name: film, Length: 1286, dtype: int64

In [14]:
films_df = pd.DataFrame({'count':oscar_df['film'].value_counts()})

In [15]:
nominated_films = pd.DataFrame(oscar_df.film.value_counts().reset_index())
nominated_films.columns = ['film', 'nominated']

In [16]:
nominated_films.head(10)

Unnamed: 0,film,nominated
0,Titanic (1997),14
1,All about Eve (1950),14
2,La La Land (2016),14
3,The Shape of Water (2017),13
4,Gone with the Wind (1939),13
5,Mary Poppins (1964),13
6,Forrest Gump (1994),13
7,The Curious Case of Benjamin Button (2008),13
8,The Lord of the Rings: The Fellowship of the R...,13
9,Who's Afraid of Virginia Woolf? (1966),13


In [17]:
top_10_nominated = nominated_films.head(10)['film'].to_list()
print(top_10_nominated)

['Titanic (1997)', 'All about Eve (1950)', 'La La Land (2016)', 'The Shape of Water (2017)', 'Gone with the Wind (1939)', 'Mary Poppins (1964)', 'Forrest Gump (1994)', 'The Curious Case of Benjamin Button (2008)', 'The Lord of the Rings: The Fellowship of the Ring (2001)', "Who's Afraid of Virginia Woolf? (1966)"]


In [18]:
awarded_films = pd.DataFrame(oscar_df.film[(oscar_df.winner == True)].value_counts().reset_index())
awarded_films.columns = ['film', 'won']
awarded_films['won'].fillna(0, inplace=True)

In [19]:
top_10_awarded = awarded_films.head(10)['film'].to_list()
print(top_10_awarded)

['Ben-Hur (1959)', 'The Lord of the Rings: The Return of the King (2003)', 'Titanic (1997)', 'West Side Story (1961)', 'The English Patient (1996)', 'The Last Emperor (1987)', 'Gigi (1958)', 'My Fair Lady (1964)', 'Amadeus (1984)', 'Gone with the Wind (1939)']


In [20]:
awarded_films.to_csv("../src/datasets/awarded_only_films.csv")

In [21]:
merged_df = pd.merge(nominated_films, awarded_films, on='film',how = 'outer')

In [22]:
merged_df['won'].fillna(0, inplace=True)
merged_df['won']=pd.to_numeric(merged_df['won'], downcast='integer')
merged_df['nominated']=pd.to_numeric(merged_df['nominated'], downcast='integer')
merged_df

Unnamed: 0,film,nominated,won
0,Titanic (1997),14,11
1,All about Eve (1950),14,6
2,La La Land (2016),14,6
3,The Shape of Water (2017),13,4
4,Gone with the Wind (1939),13,8
...,...,...,...
4929,Suzy (1936),1,0
4930,Paris--Underground (1945),1,0
4931,Sing (2016),1,1
4932,Snow Falling on Cedars (1999),1,0


In [23]:
merged_df.to_csv("../src/datasets/nominated_films.csv")

In [24]:
# just double check if there are any NA values
df1 = merged_df[merged_df.isna().any(axis=1)]
df1

Unnamed: 0,film,nominated,won


In [25]:
# getting all fields just for top 10
top_10_awarded_df = oscar_df[oscar_df['film'].isin(top_10_awarded)]
top_10_nominated_df = oscar_df[oscar_df['film'].isin(top_10_nominated)]

In [26]:
top_10_awarded_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
785,1939,1940,12,ACTOR,Clark Gable,Gone with the Wind (1939),False
798,1939,1940,12,ACTRESS,Vivien Leigh,Gone with the Wind (1939),True
799,1939,1940,12,ACTRESS IN A SUPPORTING ROLE,Olivia de Havilland,Gone with the Wind (1939),False
801,1939,1940,12,ACTRESS IN A SUPPORTING ROLE,Hattie McDaniel,Gone with the Wind (1939),True
807,1939,1940,12,ART DIRECTION,Gone with the Wind,Gone with the Wind (1939),True
...,...,...,...,...,...,...,...
8411,2003,2004,76,MUSIC (Original Song),The Lord of the Rings: The Return of the King,The Lord of the Rings: The Return of the King ...,True
8415,2003,2004,76,BEST PICTURE,The Lord of the Rings: The Return of the King,The Lord of the Rings: The Return of the King ...,True
8434,2003,2004,76,SOUND MIXING,The Lord of the Rings: The Return of the King,The Lord of the Rings: The Return of the King ...,True
8438,2003,2004,76,VISUAL EFFECTS,The Lord of the Rings: The Return of the King,The Lord of the Rings: The Return of the King ...,True


In [27]:
top_10_nominated_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
785,1939,1940,12,ACTOR,Clark Gable,Gone with the Wind (1939),False
798,1939,1940,12,ACTRESS,Vivien Leigh,Gone with the Wind (1939),True
799,1939,1940,12,ACTRESS IN A SUPPORTING ROLE,Olivia de Havilland,Gone with the Wind (1939),False
801,1939,1940,12,ACTRESS IN A SUPPORTING ROLE,Hattie McDaniel,Gone with the Wind (1939),True
807,1939,1940,12,ART DIRECTION,Gone with the Wind,Gone with the Wind (1939),True
...,...,...,...,...,...,...,...
10095,2017,2018,90,BEST PICTURE,The Shape of Water,The Shape of Water (2017),True
10101,2017,2018,90,PRODUCTION DESIGN,The Shape of Water,The Shape of Water (2017),True
10115,2017,2018,90,SOUND EDITING,The Shape of Water,The Shape of Water (2017),False
10120,2017,2018,90,SOUND MIXING,The Shape of Water,The Shape of Water (2017),False


In [28]:
top_10_nominated_df.to_csv('../src/datasets/top_10_nominated.csv')
top_10_awarded_df.to_csv('../src/datasets/top_10_awarded.csv')

## 2. Actors and Actresses 

In [29]:
actor_df = oscar_df.loc[oscar_df.category == 'ACTOR']
actor_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
0,1927,1928,1,ACTOR,Richard Barthelmess,The Noose (1927),False
1,1927,1928,1,ACTOR,Emil Jannings,The Last Command (1927),True
35,1928,1929,2,ACTOR,George Bancroft,Thunderbolt (1928),False
36,1928,1929,2,ACTOR,Warner Baxter,In Old Arizona (1928),True
37,1928,1929,2,ACTOR,Chester Morris,Alibi (1928),False
...,...,...,...,...,...,...,...
5298,1975,1976,48,ACTOR,Walter Matthau,The Sunshine Boys (1975),False
5299,1975,1976,48,ACTOR,Jack Nicholson,One Flew over the Cuckoo's Nest (1975),True
5300,1975,1976,48,ACTOR,Al Pacino,Dog Day Afternoon (1975),False
5301,1975,1976,48,ACTOR,Maximilian Schell,The Man in the Glass Booth (1975),False


In [30]:
actress_df = oscar_df.loc[oscar_df.category == 'ACTRESS']
actress_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
2,1927,1928,1,ACTRESS,Louise Dresser,A Ship Comes In (1927),False
3,1927,1928,1,ACTRESS,Janet Gaynor,7th Heaven (1927),True
4,1927,1928,1,ACTRESS,Gloria Swanson,Sadie Thompson (1927),False
40,1928,1929,2,ACTRESS,Ruth Chatterton,Madame X (1928),False
41,1928,1929,2,ACTRESS,Betty Compson,The Barker (1928),False
...,...,...,...,...,...,...,...
5308,1975,1976,48,ACTRESS,Isabelle Adjani,The Story of Adele H. (1975),False
5309,1975,1976,48,ACTRESS,Ann-Margret,Tommy (1975),False
5310,1975,1976,48,ACTRESS,Louise Fletcher,One Flew over the Cuckoo's Nest (1975),True
5311,1975,1976,48,ACTRESS,Glenda Jackson,Hedda (1975),False


In [31]:
actor_supporting_df = oscar_df.loc[oscar_df.category == 'ACTOR IN A SUPPORTING ROLE']
actor_supporting_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
420,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Mischa Auer,My Man Godfrey (1936),False
421,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Walter Brennan,Come and Get It (1936),True
422,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Stuart Erwin,Pigskin Parade (1936),False
423,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Basil Rathbone,Romeo and Juliet (1936),False
424,1936,1937,9,ACTOR IN A SUPPORTING ROLE,Akim Tamiroff,The General Died at Dawn (1936),False
...,...,...,...,...,...,...,...
10272,2019,2020,92,ACTOR IN A SUPPORTING ROLE,Tom Hanks,A Beautiful Day in the Neighborhood (2019),False
10273,2019,2020,92,ACTOR IN A SUPPORTING ROLE,Anthony Hopkins,The Two Popes (2019),False
10274,2019,2020,92,ACTOR IN A SUPPORTING ROLE,Al Pacino,The Irishman (2019),False
10275,2019,2020,92,ACTOR IN A SUPPORTING ROLE,Joe Pesci,The Irishman (2019),False


In [32]:
actress_supporting_df = oscar_df.loc[oscar_df.category == 'ACTRESS IN A SUPPORTING ROLE']
actress_supporting_df

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
430,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Beulah Bondi,The Gorgeous Hussy (1936),False
431,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Alice Brady,My Man Godfrey (1936),False
432,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Bonita Granville,These Three (1936),False
433,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Maria Ouspenskaya,Dodsworth (1936),False
434,1936,1937,9,ACTRESS IN A SUPPORTING ROLE,Gale Sondergaard,Anthony Adverse (1936),True
...,...,...,...,...,...,...,...
10282,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Kathy Bates,Richard Jewell (2019),False
10283,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Laura Dern,Marriage Story (2019),True
10284,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Scarlett Johansson,Jojo Rabbit (2019),False
10285,2019,2020,92,ACTRESS IN A SUPPORTING ROLE,Florence Pugh,Little Women (2019),False


In [33]:
actor_df.to_csv('../src/datasets/actor.csv')
actor_supporting_df.to_csv('../src/datasets/actor.csv')
actress_df.to_csv('../src/datasets/actress.csv')
actress_supporting_df.to_csv('../src/datasets/actress_supporting.csv')