In [207]:
import numpy as np
import pandas as pd
import scipy.stats as stats

In [208]:
female_oscars_df = pd.read_csv("https://people.sc.fsu.edu/~jburkardt/data/csv/oscar_age_female.csv")
male_oscars_df = pd.read_csv("https://people.sc.fsu.edu/~jburkardt/data/csv/oscar_age_male.csv")

In [209]:
# change column names to be correct and not have quotation marks
# note 'unknown' added where data meaning is unknown and needs more
# exploration

female_oscars_columns = ['Year','Age','Name','Movie','Unknown']
female_oscars_df.columns = female_oscars_columns

In [210]:
#Changing index to start with zero

female_oscars_df.index = range(0,89)

In [211]:
# Finding out what is in Unknown column for examination

female_oscars_df.Unknown.unique()

array([' Street Angel and Sunrise: A Song of Two Humans"', nan,
       ' Little Sheba"'], dtype=object)

In [212]:
# remove quotation marks around names and movies

female_oscars_df['Name'] = female_oscars_df.Name.str.strip(' "')
female_oscars_df['Movie'] = female_oscars_df.Movie.str.strip(' "')
female_oscars_df['Unknown'] = female_oscars_df.Unknown.str.strip(' "')

In [213]:
# After research, found Unknown column includes additional 
# movies that person won the best actress winner for, so going to 
# combine data where needed

combined_column = female_oscars_df['Movie'].fillna('') + "; " + female_oscars_df['Unknown'].fillna('')
female_oscars_df['Movie_v2'] = combined_column.str.strip('"\t ;')

In [214]:
del female_oscars_df['Movie']

In [215]:
del female_oscars_df['Unknown']

In [216]:
female_oscars_df.rename(columns={'Movie_v2': 'Movie'}, inplace = True)

In [217]:
female_oscars_df.iloc[0,3] = 'Seventh Heaven; Street Angel; Sunrise: A Song of Two Humans'

In [218]:
female_oscars_df.head()

Unnamed: 0,Year,Age,Name,Movie
0,1928,22,Janet Gaynor,Seventh Heaven; Street Angel; Sunrise: A Song ...
1,1929,37,Mary Pickford,Coquette
2,1930,28,Norma Shearer,The Divorcee
3,1931,63,Marie Dressler,Min and Bill
4,1932,32,Helen Hayes,The Sin of Madelon Claudet


In [219]:
def female_oscar_winner(year_ask):
    '''
    Finds winner of Oscar for Best Actress between
    years of 1928 and 2016.
    ---
    Input: numeric year between 1928 and 2016
    Output: A string stating actress and movie that won Oscar of input year.
    '''
    for index, year in enumerate(female_oscars_df.Year):
        if year_ask == year:
            return f'''In {female_oscars_df.Year[index]} {female_oscars_df.Name[index]}\
 won the Best Actress Oscar for her role in {female_oscars_df.Movie[index]}.'''


In [220]:
female_oscar_winner(1949)

'In 1949 Olivia de Havilland won the Best Actress Oscar for her role in The Heiress.'

In [222]:
# Cleaning up column names

male_oscars_columns = ['Year','Age','Name','Movie','Unknown']
male_oscars_df.columns = male_oscars_columns

In [223]:
# Clean up column strings

male_oscars_df['Name'] = male_oscars_df.Name.str.strip(' "')
male_oscars_df['Movie'] = male_oscars_df.Movie.str.strip(' "')
male_oscars_df['Unknown'] = male_oscars_df.Unknown.str.strip(' "')

In [224]:
#Changing index to start with zero

male_oscars_df.index = range(0,89)

In [225]:
male_oscars_df.rename(columns={'Movie':'Movie1'}, inplace=True)

In [226]:
male_oscars_df['Movie'] = male_oscars_df.Movie1 + '; ' + male_oscars_df.Unknown.fillna('')
male_oscars_df['Movie'] = male_oscars_df.Movie.str.rstrip(' ;"')

In [227]:
del male_oscars_df['Movie1']
del male_oscars_df['Unknown']

In [228]:
def male_oscar_winner(year_ask):
    '''
    Finds winner of Oscar for Best Actress between
    years of 1928 and 2016.
    ---
    Input: numeric year between 1928 and 2016
    Output: A string stating actress and movie that won Oscar of input year.
    '''
    for index, year in enumerate(male_oscars_df.Year):
        if year_ask == year:
            return f'''In {male_oscars_df.Year[index]} {male_oscars_df.Name[index]}\
 won the Best Actor Oscar for his role in {male_oscars_df.Movie[index]}.'''

In [229]:
male_oscar_winner(1928)

'In 1928 Emil Jannings won the Best Actor Oscar for his role in The Last Command; The Way of All Flesh.'

In [230]:
female_oscars_df['Best_Actor'] = male_oscars_df.Name

In [248]:
female_oscars_df.head()

Unnamed: 0,Year,Age,Name,Movie,Best_Actor
0,1928,22,Janet Gaynor,Seventh Heaven; Street Angel; Sunrise: A Song ...,Emil Jannings
1,1929,37,Mary Pickford,Coquette,Warner Baxter
2,1930,28,Norma Shearer,The Divorcee,George Arliss
3,1931,63,Marie Dressler,Min and Bill,Lionel Barrymore
4,1932,32,Helen Hayes,The Sin of Madelon Claudet,Wallace Beery


In [249]:
list_test = female_oscars_df.Name.str.split()