In [None]:
import pandas as pd
sampleData = MLB2022 = pd.read_csv('Worst Loss Search Demo.csv')

In [None]:
def gameResult(gameDF):
    '''Function to be used after an initial groupby that--after applying to every game-- will
    allow us to summarize each game with the Losing Team, the final score, the biggest lead
    that the losing team had in the game and more'''
    gameDF['Home - Away'] = gameDF['home_score'] - gameDF['away_score']
    homeFinal = gameDF['post_home_score'].max()
    awayFinal = gameDF['post_away_score'].max()
    cols = ['Winning Team','Losing Team','Date','Score','Biggest Lead (blown)',
            'Latest Inning With Lead','Most Outs in the Inning with Lead']
    returnedSeries = {stat: None for stat in cols} #Was originally a Series but Dictionary was a better fit
    #Returned Series stores the data that's used to contruct the DataFrame later on
    returnedSeries['Date'] = gameDF['game_date'].iloc[0] #LINE 12 (For "Check Your Understanding")
    
    def findLeadData(lead):
        '''finds the latest moment when the losing team had their biggest lead of the night
        Output:[Inning, Outs]'''
        withLead = gameDF[gameDF['Home - Away'] == lead]
        inning = withLead['inning'].max()
        return [inning,withLead[withLead['inning'] == inning]['outs_when_up'].max()]
    
    def homeWon(): #fill returnedSeries with correct data if the home team wins
        returnedSeries['Winning Team'] = gameDF['home_team'].iloc[0]
        returnedSeries['Losing Team'] = gameDF['away_team'].iloc[0]
        returnedSeries['Score'] = f'{homeFinal} - {awayFinal}'
        returnedSeries['Biggest Lead (blown)'] = -1 * gameDF['Home - Away'].min()
        LeadData = findLeadData(gameDF['Home - Away'].min())
        returnedSeries['Latest Inning With Lead'] = LeadData[0]
        returnedSeries['Most Outs in the Inning with Lead'] = LeadData[1]
        
    def awayWon(): #fill returnedSeries with correct data if the home team wins
        returnedSeries['Losing Team'] = gameDF['home_team'].iloc[0]
        returnedSeries['Winning Team'] = gameDF['away_team'].iloc[0]
        returnedSeries['Score'] = f'{awayFinal} - {homeFinal}'
        returnedSeries['Biggest Lead (blown)'] = gameDF['Home - Away'].max()
        LeadData = findLeadData(gameDF['Home - Away'].max())
        returnedSeries['Latest Inning With Lead'] = LeadData[0]
        returnedSeries['Most Outs in the Inning with Lead'] = LeadData[1]
        
    if homeFinal > awayFinal:
        homeWon()
    else:
        awayWon()
    return pd.DataFrame({col:[returnedSeries[col]] for col in cols})

In [None]:
def findByTeam(teamDF):
    '''This function is grouped by losing team so for each team we can find their worst loss'''
    withBiggestLead = teamDF[teamDF['Biggest Lead (blown)'] == teamDF['Biggest Lead (blown)'].max()]
    inLatestInning = withBiggestLead[withBiggestLead['Latest Inning With Lead'] == withBiggestLead['Latest Inning With Lead'].max()]
    withMostOuts = inLatestInning[inLatestInning['Most Outs in the Inning with Lead'] == inLatestInning['Most Outs in the Inning with Lead'].max()]
    return withMostOuts

In [None]:
#First apply: Find the stats we want for every game
gameSummary = sampleData.groupby('game_pk').apply(gameResult)

In [None]:
#Second apply: Find the worst loss in our data for each team
teamSummary = gameSummary.groupby('Losing Team').apply(findByTeam)

In [None]:
### CHECK YOUR UNDERSTANDING
###
### If I grouped by Winning Team instead in the cell above, what would my chart represent?
###
### Why do I need to put homeWon, awayWon and findLeadData within the gameResult instead of
###     outside of it to make it less chunky?
###
### Why was line 12 of cell 2 (labelled above) put outside of the other functions that filled the returnedSeries data?