In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [17]:
picture = pd.read_excel('Film_Awards.xlsx', skiprows=1, nrows=3, engine="openpyxl")
picture = picture.rename(columns={"Director": "AA-Director", "Director.1": "GGD-Director", "Director.2": "GGMC-Director"})
picture

Unnamed: 0,Year,Academy Award,AA-Director,Golden Globe - Drama,GGD-Director,Golden Globe - Musical or Comedy,GGMC-Director
0,2019,Parasite,Bong Joon-ho,1917,Sam Mendes,Once Upon a Time in Hollywood,Quentin Tarantino
1,2018,Green Book,Peter Farrelly,Bohemian Rhapsody,Bryan Singer,Green Book,Peter Farelly
2,2017,The Shape of Water,Guillermo del Toro,"Three Billboards Outside Ebbing, Missouri",Martin McDonagh,Lady Bird,Greta Gerwig


In [18]:
pic_melt = picture.melt(
    id_vars = ["Year"],
    var_name="Key",
    value_name="value"
).set_index(['Year'])

In [19]:
picture = picture.astype(
    {'Year': np.uint16,
    'Academy Award': pd.StringDtype(),
    'AA-Director': pd.StringDtype(),
    'Golden Globe - Drama': pd.StringDtype(),
    'GGD-Director': pd.StringDtype(),
    'Golden Globe - Musical or Comedy': pd.StringDtype(),
    'GGMC-Director': pd.StringDtype()})
print(picture.dtypes)

Year                                uint16
Academy Award                       string
AA-Director                         string
Golden Globe - Drama                string
GGD-Director                        string
Golden Globe - Musical or Comedy    string
GGMC-Director                       string
dtype: object


In [20]:
actor = pd.read_excel("Film_Awards.xlsx", skiprows=8, nrows=3, engine='openpyxl')
actor = actor.rename(columns={"Film": "AA-Film", "Film.1": "Film-GGD", "Film.2": "Film-GGMC"})
actor


Unnamed: 0,Year,Academy Award,AA-Film,Golden Globe - Drama,Film-GGD,Golden Globe - Musical or Comedy,Film-GGMC
0,2019,Joaquin Phoenix,Joker,Joaquin Phoenix,Joker,Taron Egerton,Rocketman
1,2018,Rami Malek,Bohemian Rhapsody,Rami Malek,Bohemian Rhapsody,Christian Bale,Vice
2,2017,Gary Oldman,Darkest Hour,Gary Oldman,Darkest Hour,James Franco,The Disaster Artist


In [21]:
actor_film_drop = actor.drop(['Academy Award', 'Golden Globe - Drama', 'Golden Globe - Musical or Comedy'], axis=1)
actor_melt = actor_film_drop.melt(
    id_vars = ["Year"],
    var_name="Key",
    value_name="value"
).set_index(['Year'])
actor_melt

Unnamed: 0_level_0,Key,value
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2019,AA-Film,Joker
2018,AA-Film,Bohemian Rhapsody
2017,AA-Film,Darkest Hour
2019,Film-GGD,Joker
2018,Film-GGD,Bohemian Rhapsody
2017,Film-GGD,Darkest Hour
2019,Film-GGMC,Rocketman
2018,Film-GGMC,Vice
2017,Film-GGMC,The Disaster Artist


In [22]:
actress = pd.read_excel('Film_Awards.xlsx', skiprows=15, nrows=3, engine='openpyxl')
actress = actress.rename(columns={"Film": "AA-Film", "Film.1": "Film-GGD", "Film.2": "Film-GGMC"})
actress

Unnamed: 0,Year,Academy Award,AA-Film,Golden Globe - Drama,Film-GGD,Golden Globe - Musical or Comedy,Film-GGMC
0,2019,Renée Zellweger,Judy,Renée Zellweger,Judy,Awkwafina,The Farewell
1,2018,Olivia Colman,The Favourite,Glenn Close,The Wife,Olivia Coleman,The Favourite\n
2,2017,Frances McDormand,"Three Billboards Outside Ebbing, Missouri",Frances McDormand,"Three Billboards Outside Ebbing, Missouri",Saoirse Ronan,Lady Bird


In [23]:
# Fixes misspelling of "The Favourite"
actress.at[1, 'Film-GGMC']='The Favourite'
actress.at[1, 'Golden Globe - Musical or Comedy']='Olivia Colman'
actress

Unnamed: 0,Year,Academy Award,AA-Film,Golden Globe - Drama,Film-GGD,Golden Globe - Musical or Comedy,Film-GGMC
0,2019,Renée Zellweger,Judy,Renée Zellweger,Judy,Awkwafina,The Farewell
1,2018,Olivia Colman,The Favourite,Glenn Close,The Wife,Olivia Colman,The Favourite
2,2017,Frances McDormand,"Three Billboards Outside Ebbing, Missouri",Frances McDormand,"Three Billboards Outside Ebbing, Missouri",Saoirse Ronan,Lady Bird


In [24]:
actress_film_drop = actress.drop(['Academy Award', 'Golden Globe - Drama', 'Golden Globe - Musical or Comedy'], axis=1)
actress_melt = actress_film_drop.melt(
    id_vars = ["Year"],
    var_name="Key",
    value_name="value"
).set_index(['Year'])
actress_melt

Unnamed: 0_level_0,Key,value
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2019,AA-Film,Judy
2018,AA-Film,The Favourite
2017,AA-Film,"Three Billboards Outside Ebbing, Missouri"
2019,Film-GGD,Judy
2018,Film-GGD,The Wife
2017,Film-GGD,"Three Billboards Outside Ebbing, Missouri"
2019,Film-GGMC,The Farewell
2018,Film-GGMC,The Favourite
2017,Film-GGMC,Lady Bird


In [25]:
# Checks datatype of each cell
ids = picture['Golden Globe - Drama']
for i, id_ in enumerate(ids):
    print(f"id {i}: {id_}\t{id_.__class__.__name__}") 
# Shows that 1917 is fixed to be a string

id 0: 1917	str
id 1: Bohemian Rhapsody	str
id 2: Three Billboards Outside Ebbing, Missouri	str


In [26]:
# For each film involved in multiple awards, list the award and year it is associated with
pic_melt_nondupes = pic_melt['value'].value_counts()
pic_melt_nondupes_return = pic_melt[pic_melt['value'].isin(pic_melt_nondupes[pic_melt_nondupes>1].index)]
pic_melt_nondupes_return

Unnamed: 0_level_0,Key,value
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2018,Academy Award,Green Book
2018,Golden Globe - Musical or Comedy,Green Book


In [27]:
# For each actor winning multiple awards, list the film and award they are associated with
actor_melt_nondupes = actor_melt['value'].value_counts()
actor_melt_nondupes_return = actor_melt[actor_melt['value'].isin(actor_melt_nondupes[actor_melt_nondupes>1].index)]
actor_melt_nondupes_return

Unnamed: 0_level_0,Key,value
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2019,AA-Film,Joker
2017,AA-Film,Darkest Hour
2019,Film-GGD,Joker
2017,Film-GGD,Darkest Hour


In [28]:
# For each actress winning multiple awards, list the film and award they are associated with
actress_melt_nondupes = actress_melt['value'].value_counts()
actress_melt_nondupes_return = actress_melt[actress_melt['value'].isin(actress_melt_nondupes[actress_melt_nondupes>1].index)]
actress_melt_nondupes_return

Unnamed: 0_level_0,Key,value
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2019,AA-Film,Judy
2018,AA-Film,The Favourite
2017,AA-Film,"Three Billboards Outside Ebbing, Missouri"
2019,Film-GGD,Judy
2017,Film-GGD,"Three Billboards Outside Ebbing, Missouri"
2018,Film-GGMC,The Favourite
