# Academy Awards Ratings Analysis Project

In [1]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Academy Award Dataset

In [2]:
#import dataset 
#kaggle URL https://www.kaggle.com/datasets/unanimad/the-oscar-award?resource=download
full_oscars = pd.read_csv(r"C:\Users\chlot\OneDrive\Desktop\the_oscars.csv")

In [3]:
full_oscars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10395 entries, 0 to 10394
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   year_film      10395 non-null  int64 
 1   year_ceremony  10395 non-null  int64 
 2   ceremony       10395 non-null  int64 
 3   category       10395 non-null  object
 4   name           10395 non-null  object
 5   film           10091 non-null  object
 6   winner         10395 non-null  bool  
dtypes: bool(1), int64(3), object(3)
memory usage: 497.5+ KB


In [4]:
full_oscars.head()

Unnamed: 0,year_film,year_ceremony,ceremony,category,name,film,winner
0,1927,1928,1,ACTOR,Richard Barthelmess,The Noose,False
1,1927,1928,1,ACTOR,Emil Jannings,The Last Command,True
2,1927,1928,1,ACTRESS,Louise Dresser,A Ship Comes In,False
3,1927,1928,1,ACTRESS,Janet Gaynor,7th Heaven,True
4,1927,1928,1,ACTRESS,Gloria Swanson,Sadie Thompson,False


In [5]:
#filtering out the columns ceremony because the year of the ceremony is what counts for this analysis. 
oscars = full_oscars[["year_film", "year_ceremony", "category", "name", "film", "winner"]]

In [6]:
oscars.head()

Unnamed: 0,year_film,year_ceremony,category,name,film,winner
0,1927,1928,ACTOR,Richard Barthelmess,The Noose,False
1,1927,1928,ACTOR,Emil Jannings,The Last Command,True
2,1927,1928,ACTRESS,Louise Dresser,A Ship Comes In,False
3,1927,1928,ACTRESS,Janet Gaynor,7th Heaven,True
4,1927,1928,ACTRESS,Gloria Swanson,Sadie Thompson,False


### Analysis

In [7]:
winners = oscars[oscars["winner"] == True]

In [8]:
winners

Unnamed: 0,year_film,year_ceremony,category,name,film,winner
1,1927,1928,ACTOR,Emil Jannings,The Last Command,True
3,1927,1928,ACTRESS,Janet Gaynor,7th Heaven,True
6,1927,1928,ART DIRECTION,William Cameron Menzies,The Dove;,True
9,1927,1928,CINEMATOGRAPHY,Charles Rosher,Sunrise,True
10,1927,1928,CINEMATOGRAPHY,Karl Struss,Sunrise,True
...,...,...,...,...,...,...
10390,2019,2020,WRITING (Original Screenplay),"Screenplay by Bong Joon Ho, Han Jin Won; Story...",Parasite,True
10391,2019,2020,JEAN HERSHOLT HUMANITARIAN AWARD,Geena Davis,,True
10392,2019,2020,HONORARY AWARD,David Lynch,,True
10393,2019,2020,HONORARY AWARD,Wes Studi,,True


In [9]:
nominated = oscars[oscars["winner"] == False]

In [10]:
#what are the most nominted movies
oscars[["film"]].value_counts()

film                       
A Star Is Born                 25
Titanic                        16
Moulin Rouge                   15
Mutiny on the Bounty           15
Little Women                   14
                               ..
Let's Make Love                 1
Let's Pollute                   1
Lethal Weapon                   1
Lethal Weapon 2                 1
Éramos Pocos (One Too Many)     1
Length: 4833, dtype: int64

In [12]:
#The results show that A Star is Born was nominated far more than any other movie and seems like an outlier.
#There might be more than one film with that name in the list.

#### Outlier Analysis 

In [13]:
star_born = oscars[oscars["film"] == "A Star Is Born"]
star_born

Unnamed: 0,year_film,year_ceremony,category,name,film,winner
523,1937,1938,ACTOR,Fredric March,A Star Is Born,False
534,1937,1938,ACTRESS,Janet Gaynor,A Star Is Born,False
558,1937,1938,ASSISTANT DIRECTOR,Eric Stacey,A Star Is Born,False
573,1937,1938,DIRECTING,William Wellman,A Star Is Born,False
607,1937,1938,OUTSTANDING PRODUCTION,Selznick International Pictures,A Star Is Born,False
634,1937,1938,WRITING (Original Story),"William A. Wellman, Robert Carson",A Star Is Born,True
639,1937,1938,WRITING (Screenplay),"Dorothy Parker, Alan Campbell, Robert Carson",A Star Is Born,False
2888,1954,1955,ACTOR,James Mason,A Star Is Born,False
2896,1954,1955,ACTRESS,Judy Garland,A Star Is Born,False
2913,1954,1955,ART DIRECTION (Color),"Art Direction: Malcolm Bert, Gene Allen, Iren...",A Star Is Born,False


In [14]:
#There are 4 different A Star Is Born Movies: 1938, 1955, 1977 and 2019

In [15]:
#adding the release year to the table helps differentiate the films from different years with the same title
count = oscars[["film", "year_film"]].value_counts() 

In [16]:
#another way to find the list of individual movies would be using .drop_duplicates
unique_movies = oscars.drop_duplicates(subset=["film", "year_film"])
unique_movies

Unnamed: 0,year_film,year_ceremony,category,name,film,winner
0,1927,1928,ACTOR,Richard Barthelmess,The Noose,False
1,1927,1928,ACTOR,Emil Jannings,The Last Command,True
2,1927,1928,ACTRESS,Louise Dresser,A Ship Comes In,False
3,1927,1928,ACTRESS,Janet Gaynor,7th Heaven,True
4,1927,1928,ACTRESS,Gloria Swanson,Sadie Thompson,False
...,...,...,...,...,...,...
10371,2019,2020,SOUND MIXING,"Gary Rydstrom, Tom Johnson and Mark Ulano",Ad Astra,False
10376,2019,2020,VISUAL EFFECTS,"Dan DeLeeuw, Russell Earl, Matt Aitken and Dan...",Avengers: Endgame,False
10378,2019,2020,VISUAL EFFECTS,"Robert Legato, Adam Valdez, Andrew R. Jones an...",The Lion King,False
10386,2019,2020,WRITING (Original Screenplay),Written by Rian Johnson,Knives Out,False


In [17]:
#Certain awards are givin out without a movie related to it, let's drop those rows
unique_movies.dropna()

Unnamed: 0,year_film,year_ceremony,category,name,film,winner
0,1927,1928,ACTOR,Richard Barthelmess,The Noose,False
1,1927,1928,ACTOR,Emil Jannings,The Last Command,True
2,1927,1928,ACTRESS,Louise Dresser,A Ship Comes In,False
3,1927,1928,ACTRESS,Janet Gaynor,7th Heaven,True
4,1927,1928,ACTRESS,Gloria Swanson,Sadie Thompson,False
...,...,...,...,...,...,...
10365,2019,2020,SHORT FILM (Live Action),Delphine Girard,A Sister,False
10371,2019,2020,SOUND MIXING,"Gary Rydstrom, Tom Johnson and Mark Ulano",Ad Astra,False
10376,2019,2020,VISUAL EFFECTS,"Dan DeLeeuw, Russell Earl, Matt Aitken and Dan...",Avengers: Endgame,False
10378,2019,2020,VISUAL EFFECTS,"Robert Legato, Adam Valdez, Andrew R. Jones an...",The Lion King,False


In [18]:
#this confirms that 4934 movies were nominated across all academy award ceremonies

#### Analysis of the highest winning movies

In [19]:
biggest_winners = winners[["film", "year_film"]].value_counts()
biggest_winners

film                                           year_film
Titanic                                        1997         11
Ben-Hur                                        1959         11
The Lord of the Rings: The Return of the King  2003         11
West Side Story                                1961         10
The Last Emperor                               1987          9
                                                            ..
Knighty Knight Bugs                            1958          1
Kokoda Front Line!                             1942          1
Kolya                                          1996          1
Kon-Tiki                                       1951          1
tom thumb                                      1958          1
Length: 1286, dtype: int64

In [20]:
df_winners = pd.DataFrame(biggest_winners)

In [22]:
#slicing the top 10
df_winners[:10]

Unnamed: 0_level_0,Unnamed: 1_level_0,0
film,year_film,Unnamed: 2_level_1
Titanic,1997,11
Ben-Hur,1959,11
The Lord of the Rings: The Return of the King,2003,11
West Side Story,1961,10
The Last Emperor,1987,9
Gigi,1958,9
The English Patient,1996,9
Slumdog Millionaire,2008,8
Cabaret,1972,8
On the Waterfront,1954,8


### Joining the top 10 with the original dataset

In [23]:
top10 = df_winners[:10].merge(oscars, on=["year_film", "film"], how="inner")
top10

Unnamed: 0,year_film,film,0,year_ceremony,category,name,winner
0,1997,Titanic,11,1998,ACTRESS IN A LEADING ROLE,Kate Winslet,False
1,1997,Titanic,11,1998,ACTRESS IN A SUPPORTING ROLE,Gloria Stuart,False
2,1997,Titanic,11,1998,ART DIRECTION,Art Direction: Peter Lamont; Set Decoration:...,True
3,1997,Titanic,11,1998,CINEMATOGRAPHY,Russell Carpenter,True
4,1997,Titanic,11,1998,COSTUME DESIGN,Deborah L. Scott,True
...,...,...,...,...,...,...,...
105,1954,On the Waterfront,8,1955,DIRECTING,Elia Kazan,True
106,1954,On the Waterfront,8,1955,FILM EDITING,Gene Milford,True
107,1954,On the Waterfront,8,1955,MUSIC (Music Score of a Dramatic or Comedy Pic...,Leonard Bernstein,False
108,1954,On the Waterfront,8,1955,BEST MOTION PICTURE,"Sam Spiegel, Producer",True


In [24]:
top10[top10["category"] == "BEST PICTURE"]

Unnamed: 0,year_film,film,0,year_ceremony,category,name,winner
10,1997,Titanic,11,1998,BEST PICTURE,"James Cameron and Jon Landau, Producers",True
33,2003,The Lord of the Rings: The Return of the King,11,2004,BEST PICTURE,"Barrie M. Osborne, Peter Jackson and Fran Wals...",True
54,1987,The Last Emperor,9,1988,BEST PICTURE,"Jeremy Thomas, Producer",True
75,1996,The English Patient,9,1997,BEST PICTURE,"Saul Zaentz, Producer",True
84,2008,Slumdog Millionaire,8,2009,BEST PICTURE,"Christian Colson, Producer",True
95,1972,Cabaret,8,1973,BEST PICTURE,"Cy Feuer, Producer",False


In [25]:
top10[top10["category"] == "BEST PICTURE"]

Unnamed: 0,year_film,film,0,year_ceremony,category,name,winner
10,1997,Titanic,11,1998,BEST PICTURE,"James Cameron and Jon Landau, Producers",True
33,2003,The Lord of the Rings: The Return of the King,11,2004,BEST PICTURE,"Barrie M. Osborne, Peter Jackson and Fran Wals...",True
54,1987,The Last Emperor,9,1988,BEST PICTURE,"Jeremy Thomas, Producer",True
75,1996,The English Patient,9,1997,BEST PICTURE,"Saul Zaentz, Producer",True
84,2008,Slumdog Millionaire,8,2009,BEST PICTURE,"Christian Colson, Producer",True
95,1972,Cabaret,8,1973,BEST PICTURE,"Cy Feuer, Producer",False
