In [3]:
import pandas as pd
import numpy as np

year_info = [1950,1960] # Implement as callback
genre_input = 'Any' #Implement as callback

df = pd.read_json('https://raw.githubusercontent.com/vega/vega-datasets/master/data/movies.json', orient = 'columns')
df['Release_Date'] =  pd.to_datetime(df['Release_Date'], infer_datetime_format=True)
df['Release_Date'] = df['Release_Date'].dt.year

In [4]:
def biggest_flop(year_info, genre_input):
    '''This function gives you the biggest flop from the movies dataset.
    
    Inputs :
        year_info : List with 2 values, takes in the year information from the callback above
                    [1970,1985]
        genre_input : (To be programmed) : takes in the genre information from the callback above
                    'Drama', 'Any'
    Returns
        A string (see example)
    
    Example
        'The biggest worldwide flop was Men of War'
    
    '''
    #Condition to wrangle based on 'Any' genre
    if genre_input != 'Any': 
        k = df[df['Major_Genre'] == genre_input]
    
    #Condition to have data between those years
    k = (df[(df['Release_Date'] > year_info[0]) & (df['Release_Date'] < year_info[1]) ]
     .sort_values(by = "US_Gross",ascending = True))
    
    k_US_Gross = k.iloc[1].loc['Worldwide_Gross']/1000 #We won't use this for the flopped movie.
    k_movie = k.iloc[0].loc['Title']
    
    return "The biggest worldwide flop was " + k_movie

biggest_flop([1970,2000],"Drama")

'The biggest worldwide flop was Men of War'

In [5]:
def biggest_success(year_info, genre_input):
    '''This function gives you the highest WW grossing movie and the amount.
    
    Inputs :
        year_info : List with 2 values, takes in the year information from the callback above
                    [1970,1985]
        genre_input : (To be programmed) : takes in the genre information
                    'Drama', 'Any'
    
    Returns
        A string (see example)
    
    Example
        'The most succesful movie was Titanic at a worldwide gross of 797.9 Million USD'        
    
    '''
    #Condition to wrangle based on 'Any' genre
    if genre_input != 'Any': 
        k = df[df['Major_Genre'] == genre_input]
    
    #Condition to have data between those years
    k = (df[(df['Release_Date'] > year_info[0]) & (df['Release_Date'] < year_info[1]) ]
     .sort_values(by = "US_Gross",ascending = False))
    k_WW_Gross = k.iloc[1].loc['Worldwide_Gross']/1000000
    k_movie = k.iloc[0].loc['Title']
    return "The most succesful movie was " + k_movie + " at a worldwide gross of " + str(k_WW_Gross) + " Million USD"

biggest_success([1970,2000],"Drama")

'The most succesful movie was Titanic at a worldwide gross of 797.9 Million USD'

In [6]:
def average_returns(year_info, genre_input):
    '''This function gives you the average return on investment during the period.
    
    Inputs :
        year_info : List with 2 values, takes in the year information from the callback above
                    [1970,1985]
        genre_input : (To be programmed) : takes in the genre information
                    'Drama', 'Any'
    
    Returns
        A string (see example)
    
    Example
        ''The average return on investment during the period 1970-2000 was 56.73 Million USD'        
    
    '''
    
    #Condition to wrangle based on 'Any' genre
    if genre_input != 'Any': 
        k = df[df['Major_Genre'] == genre_input]
    
    #Condition to have data between those years
    k = df[(df['Release_Date'] > year_info[0]) & (df['Release_Date'] < year_info[1]) ]
    average_returns = round(np.mean(k['Worldwide_Gross'] - k['Production_Budget'])/1000000,2)
    return ("The average return on investment during the period " 
            + str(year_info[0]) + '-' + str(year_info[1]) 
            + ' was ' + str(average_returns) + " Million USD" )
    
average_returns([1970,2000],'Drama')


'The average return on investment during the period 1970-2000 was 56.73 Million USD'

In [7]:
def how_big(year_info, genre_input):
    '''This function gives you an estimate of how big the movie industry 
        was in that period for that genre
    
    Inputs :
        year_info : List with 2 values, takes in the year information from the callback above
                    [1970,1985]
        genre_input : (To be programmed) : takes in the genre information
                    'Drama', 'Any'
    
    Returns
        A string (see example)
    
    Example
        ''The average return on investment during the period 1970-2000 was 56.73 Million USD'        
    
    '''
    #Condition to wrangle based on 'Any' genre
    if genre_input != 'Any': 
        k = df[df['Major_Genre'] == genre_input]
    
    #Condition to have data between those years
    k = df[(df['Release_Date'] > year_info[0]) & (df['Release_Date'] < year_info[1]) ]
    average_returns = round(np.sum(k['Worldwide_Gross'] - k['Production_Budget'])/1000000000,2)
    return ("The movie industry during the period " 
            + str(year_info[0]) + '-' + str(year_info[1]) 
            + ' was ' + str(average_returns) + " Billion USD" )
    
how_big([1998,2000],'Drama')


'The movie industry during the period 1998-2000 was 6.94 Billion USD'