# Matchup Data Pull

Objectives:
- Scrape the ESPN Fantasy API to pull historic matchup data
- Clean & Aggregate data

## Step 1: Data Scrape

In [26]:
## Dependencies ##
import pandas as pd
import numpy as np
import requests
import os
from dotenv import load_dotenv, find_dotenv

In [27]:
# Load local .env
load_dotenv(find_dotenv(), override=False)

swid = os.getenv("SWID")
espn_s2 = os.getenv("ESPNS2")

In [28]:
## Define Objects ##

## Years of League
years = [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]

## Match ID to Name
data = {
    'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Name': ['Luca', 'Dylan', 'Cody', 'Matt', 'Nate', 'Hayden', 'Gavin', 'Zachary', 'Alexander', 'Evan']
}

## Create df of ID and Name
dfIdNameMatch = pd.DataFrame(data)

## Define columns
matchup_column_names = {'matchupPeriodId':'Week', 'away.teamId':'Team1', 'away.totalPoints':'Score1', 'home.teamId':'Team2', 'home.totalPoints':'Score2', 'playoffTierType': 'playoffType'}

## Define empty df to append Matchup Data
dfAllMatchups = pd.DataFrame(
    {
        'Week': [],
        'Team1': [],
        'Score1': [],
        'Team2': [],
        'Score2': [],
        'playoffType': [],
        'Type': [],
        'Year': []
    }
)

In [29]:
## Define Cookies ##
cookies = {
    "swid": swid,
    "espn_s2": espn_s2
}

In [30]:
## Extract Data from ESPN ##

## Loop through years
for year in years:

  ## 2017 requires a different API call
  if year == 2017:

    ## Define URL specific to league
    url = "https://lm-api-reads.fantasy.espn.com/apis/v3/games/ffl/leagueHistory/" + str(193193) + "?seasonId=" + str(year)
    ## Send GET request
    matchup_response = requests.get(url, params={"view": "mMatchup"}, cookies=cookies)
    ## Parse JSON response from API
    matchup_json = matchup_response.json()
    ## Normalize JSON data in table structure
    dfMatchup = pd.json_normalize(matchup_json[0]['schedule'])
    ## Define column playoffTierType
    dfMatchup['playoffTierType'] = 'NONE'

  ## Other years
  else:

    url = f"https://lm-api-reads.fantasy.espn.com/apis/v3/games/ffl/seasons/{year}/segments/0/leagues/{193193}"
    matchup_response = requests.get(url, params={"view": "mMatchupScore"}, cookies=cookies) #permissions
    matchup_json = matchup_response.json()
    dfMatchup = pd.json_normalize(matchup_json['schedule'])

  ## Reindex columns
  dfMatchup = dfMatchup.reindex(columns=matchup_column_names).rename(columns=matchup_column_names)

  ## Create column Type
  ## IN FUTURE: Determine Playoffs by playoffTierType ##
  if year == 2023:
    dfMatchup['Type'] = ['Regular' if week<=11 else 'Playoff' for week in dfMatchup['Week']]

  elif year in [2018, 2019]:
    dfMatchup['Type'] = ['Regular' if week<=12 else 'Playoff' for week in dfMatchup['Week']]

  elif year in [2017, 2020, 2024]:
    dfMatchup['Type'] = ['Regular' if week<=13 else 'Playoff' for week in dfMatchup['Week']]

  elif year == 2021:
    dfMatchup['Type'] = ['Regular' if week<=14 else 'Playoff' for week in dfMatchup['Week']]

  else: #2022
    dfMatchup['Type'] = ['Regular' if week<=15 else 'Playoff' for week in dfMatchup['Week']]

  ## Create column Year
  dfMatchup['Year'] = year
  dfMatchup['Year'] = dfMatchup['Year'].astype(str)

  ## Concatenates data from all years
  dfAllMatchups = pd.concat([dfAllMatchups, dfMatchup], ignore_index=True)

In [31]:
## Add Playoffs for 2017 data
dfAllMatchups.iloc[52, 5] = 'WINNERS_BRACKET'
dfAllMatchups.iloc[53, 5] = 'WINNERS_BRACKET'
dfAllMatchups.iloc[54, 5] = 'LOSERS_CONSOLATION_LADDER'
dfAllMatchups.iloc[55, 5] = 'LOSERS_CONSOLATION_LADDER'
dfAllMatchups.iloc[56, 5] = 'WINNERS_BRACKET'
dfAllMatchups.iloc[57, 5] = 'WINNERS_CONSOLATION_LADDER'
dfAllMatchups.iloc[58, 5] = 'LOSERS_CONSOLATION_LADDER'
dfAllMatchups.iloc[59, 5] = 'LOSERS_CONSOLATION_LADDER'

In [32]:
## By Week
dfAllMatchups = dfAllMatchups[~((dfAllMatchups['Year'] == '2025') & (dfAllMatchups['Week'] != 1.0))]

In [33]:
## Drop NAs
dfAllMatchups = dfAllMatchups.dropna(subset=['Team1'])

In [34]:
## Merge Team1 IDs with Names
dfAllMatchupsMerge1 = dfAllMatchups.merge(dfIdNameMatch, left_on='Team1', right_on='id', suffixes=('', '_Team1'))

## Merge Team2 IDs with Names
dfAllMatchupsMerge2 = dfAllMatchupsMerge1.merge(dfIdNameMatch, left_on='Team2', right_on='id', suffixes=('', '_Team2'))

## Drop ID columns
dfAllMatchupsDrop = dfAllMatchupsMerge2.drop(['id', 'id_Team2', 'Team1', 'Team2'], axis=1)
dfAllMatchupsRename = dfAllMatchupsDrop.rename(columns={'Name': 'Team1', 'Name_Team2': 'Team2'})

## Reorder columns
new_order = ['Week', 'Team1', 'Score1', 'Team2', 'Score2', 'Type', 'playoffType', 'Year']
dfAllMatchupsNewOrder = dfAllMatchupsRename[new_order]

## Drop consolation playoff matchups
dfMatchupsPd = dfAllMatchupsNewOrder[~((dfAllMatchupsNewOrder['Type'] == 'Playoff') & (dfAllMatchupsNewOrder['playoffType'].str.lower().str.contains('consolation')))].reset_index(drop=True)

## Step 2: Data Clean & Aggregation

### Head to Head Summary

In [35]:
## Define Teams
league_members = ['Luca', 'Dylan', 'Cody', 'Matt', 'Nate', 'Hayden', 'Gavin', 'Zachary', 'Alexander', 'Evan']

In [36]:
## Create column Winner and Loser (or tie)
dfMatchupsPd['Winner'] = dfMatchupsPd.apply(lambda row: row['Team1'] \
                                            if row['Score1'] > row['Score2'] \
                                            else (row['Team2'] if row['Score1'] < row['Score2'] else 'Tie'), axis=1)

dfMatchupsPd['Loser'] = dfMatchupsPd.apply(lambda row: row['Team2'] \
                                            if row['Score1'] > row['Score2'] \
                                            else (row['Team1'] if row['Score1'] < row['Score2'] else 'Tie'), axis=1)

In [37]:
## Function to Swap Teams
## Team 1 should be the overall winner of the matchup
## Identifies H2H winner and loser
def swap_teams(df, member1, member2):

    ## Calculate win counts for each member
    member1_wins = (df['Winner'] == member1).sum()
    member2_wins = (df['Winner'] == member2).sum()

    winning_member = member1 if member1_wins >= member2_wins else member2
    losing_member = member2 if member1_wins >= member2_wins else member1

    ## Iterate through each matchup
    for index, row in df.iterrows():

      ## Forcing the winning_member to be Team1
      if row['Team1'] != winning_member:
          ## Swap team and score columns for the current row
          df.loc[index, ['Team1', 'Team2']] = row[['Team2', 'Team1']].values
          df.loc[index, ['Score1', 'Score2']] = row[['Score2', 'Score1']].values

    return df, winning_member, losing_member

In [38]:
## Function to calculae H2H Metrics
def h2h_metrics(df, winning_member, losing_member):

  ## Score margin
  avg_score_margin = round((df['Score1'] - df['Score2']).mean(),1) #team 1 margin
  avg_score_margin_abs = round((df['Score1'] - df['Score2']).abs().mean(),1) #winning team margin

  ## Number of matchups
  total_matchups = round(len(df),0)

  ## Ties
  tie_games = round((df['Winner'] == 'Tie').sum(),0)

  ## Teams
  team1 = df['Team1'].iloc[0]
  team2 = df['Team2'].iloc[0]

  ## Points
  team1_points = round(df['Score1'].sum(),1)
  team2_points = round(df['Score2'].sum(),1)
  allteam_points = team1_points + team2_points

  ## Wins
  team1_wins = round((df['Winner'] == winning_member).sum(),0)
  team2_wins = round((df['Winner'] == losing_member).sum(),0)

  ## Win pct
  team1_win_pct = round((team1_wins / (total_matchups - tie_games)) * 100,1)
  team2_win_pct = round((team2_wins / (total_matchups - tie_games)) * 100,1)

  ## Playoff games
  playoff_games = round((df['playoffType'] != 'NONE').sum(),0)

  ## Define df
  results_df = pd.DataFrame({
      'Team 1': [team1],
      'Team 2': [team2],
      'Team 1 Points': [team1_points],
      'Team 2 Points': [team2_points],
      'Total H2H Points': [allteam_points],
      'Average Score Margin (Team 1)': [avg_score_margin],
      'Average Score Margin (Winning Team)': [avg_score_margin_abs],
      'Total Matchups': [total_matchups],
      'Team 1 Wins': [team1_wins],
      'Team 2 Wins': [team2_wins],
      'Team 1 Win %': [team1_win_pct],
      'Team 2 Win %': [team2_win_pct],
      'Tie Games': [tie_games],
      'Playoff Games': [playoff_games]
  })

  return results_df

In [39]:
## Initialize df for H2H Metrics
data = {
    'Team 1': [],
    'Team 2': [],
    'Team 1 Points': [],
    'Team 2 Points': [],
    'Total H2H Points': [],
    'Average Score Margin (Team 1)': [],
    'Average Score Margin (Winning Team)': [],
    'Total Matchups': [],
    'Team 1 Wins': [],
    'Team 2 Wins': [],
    'Team 1 Win %': [],
    'Team 2 Win %': [],
    'Tie Games': [],
    'Playoff Games': []
}

dfH2HAll = pd.DataFrame(data)

In [40]:
## Build loop for all H2H Matchups
for member1 in league_members:
    for member2 in league_members:

      ## Ensure we aren't comparing the same member
      if member1 != member2:

        ## Filter on current iteration of member matchups
        dfH2HRaw = dfMatchupsPd[((dfMatchupsPd['Team1'] == member1) & (dfMatchupsPd['Team2'] == member2)) \
                                | ((dfMatchupsPd['Team1'] == member2) & (dfMatchupsPd['Team2'] == member1))]

        ## Swap teams if not aligned in H2H
        dfH2HSwap, winning_member, losing_member = swap_teams(dfH2HRaw, member1, member2)

        ## Calculate H2H Metrics
        dfH2H = h2h_metrics(dfH2HSwap, winning_member, losing_member)

      ## Continue to next iteration if they are the same member
      else:
        continue

      ## Append all H2H History
      dfH2HAll = pd.concat([dfH2HAll, dfH2H], ignore_index=True)

## Removes Duplicates
dfH2HAll['key'] = dfH2HAll.apply(lambda row: tuple(sorted([row['Team 1'], row['Team 2']])), axis=1)
dfH2HFinal = dfH2HAll.drop_duplicates(subset='key', keep='first').drop('key', axis=1).reset_index(drop=True)

### Matchups History

In [41]:
## Individual Matchup History

## Rename columns
dfMatchupsPd['TeamA'] = dfMatchupsPd['Team1']
dfMatchupsPd['TeamB'] = dfMatchupsPd['Team2']

## Transform dfMatchupsPd from wide to long
melted_df = pd.melt(dfMatchupsPd, id_vars=['Week', 'Type', 'Year', 'Score1', 'Score2', 'Winner', 'Loser', 'TeamA', 'TeamB'], #columns left unchanged
                    value_vars=['Team1', 'Team2'], #columns to melt
                    var_name='team_type', value_name='Team') #new column names

## Create columns
melted_df['Outcome'] = np.where(melted_df['Team'] == melted_df['Winner'], 'Win', np.where(melted_df['Winner'] == 'Tie', 'Tie', 'Loss'))
melted_df['Opponent'] = np.where(melted_df['Team'] == melted_df['TeamA'], melted_df['TeamB'], melted_df['TeamA'])
melted_df['Score'] = np.where(melted_df['team_type'] == 'Team1', melted_df['Score1'], melted_df['Score2'])
melted_df['Opponent_score'] = np.where(melted_df['team_type'] == 'Team1', melted_df['Score2'], melted_df['Score1'])
melted_df['Score_margin'] = melted_df['Score'] - melted_df['Opponent_score']
melted_df['Top_scoring_week'] = melted_df.groupby(['Year', 'Week'])['Score'].transform(lambda x: (x == x.max()).astype(int))
melted_df['Lowest_scoring_week'] = melted_df.groupby(['Year', 'Week'])['Score'].transform(lambda x: (x == x.min()).astype(int))

## Drop columns
melted_df = melted_df.drop(['Score1', 'Score2', 'Winner', 'team_type', 'TeamA', 'TeamB'], axis=1)

## Select desired columns
dfTeamHistory = melted_df[['Year', 'Week', 'Team', 'Score', 'Opponent', 'Opponent_score', 'Score_margin', 'Outcome', 'Type', 'Top_scoring_week', 'Lowest_scoring_week']]

## Sort df
dfTeamHistory = dfTeamHistory.sort_values(by=['Year', 'Week', 'Score'], ascending=[True, True, False]).reset_index(drop=True)

### Standings

#### Regular

In [42]:
## Regular Season Standings ##
data = {
    'Year': [],
    '1st': [],
    '2nd': [],
    '3rd': [],
    '4th': [],
    '5th': [],
    '6th': [],
    '7th': [],
    '8th': [],
    '9th': [],
    '10th': []
}

dfRegSeasonStandings = pd.DataFrame(data)

dfTeamHistory['Year'] = dfTeamHistory['Year'].astype(int)

for year in [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]:

  ## Filter team history
  dfTeamHistoryYear = dfTeamHistory[dfTeamHistory['Year'] == year]

  dfRecordYear = dfTeamHistoryYear[dfTeamHistoryYear['Type'] != 'Playoff'].groupby('Team')['Outcome'].value_counts().unstack('Outcome')
  dfPointsYear = dfTeamHistoryYear[dfTeamHistoryYear['Type'] != 'Playoff'].groupby('Team')['Score'].sum().reset_index(name='Points')

  if 'Tie' not in dfRecordYear.columns:
    dfRecordYear['Tie'] = 0

  dfRecordYear = dfRecordYear.rename(columns={'Win': 'Wins', 'Loss': 'Losses', 'Tie': 'Ties'}).fillna(0).reset_index()
  dfMergedYear = pd.merge(dfPointsYear, dfRecordYear, on='Team').sort_values(by=['Wins', 'Ties', 'Points'], ascending=[False, False, False]).reset_index(drop=True)

  dfMergedYear['Standings'] = range(1, len(dfMergedYear) + 1)
  dfMergedYear['Standings'] = dfMergedYear['Standings'].astype(str)

  dfTranspose = dfMergedYear.pivot_table(columns='Standings', values='Team', aggfunc='first').reset_index(drop=True)
  dfTranspose.columns.name = None

  if year in [2017, 2018]:
    dfTranspose['9th'] = None
    dfTranspose['10th'] = None

  dfTranspose = dfTranspose.rename(columns={'1': '1st', '2': '2nd', '3': '3rd', '4': '4th', '5': '5th'
                                          , '6': '6th', '7': '7th', '8': '8th', '9': '9th', '10': '10th'})
  dfTranspose['Year'] = year

  dfRegSeasonStandings = pd.concat([dfRegSeasonStandings, dfTranspose], ignore_index=True)

#### Final

In [43]:
## Final Standings ##
### HARD-CODEDED ###
final_standings = {
    'Year':[2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
    '1st': ['Dylan', 'Luca', 'Gavin', 'Matt', 'Gavin', 'Luca', 'Alexander', 'Evan'],
    '2nd': ['Luca', 'Matt', 'Alexander', 'Luca', 'Evan', 'Matt', 'Luca', 'Zachary'],
    '3rd': ['Hayden', 'Nate', 'Dylan', 'Hayden', 'Cody', 'Gavin', 'Matt', 'Matt'],
    '4th': ['Gavin', 'Gavin', 'Nate', 'Dylan', 'Zachary', 'Cody', 'Hayden', 'Gavin'],
    '5th': ['Matt', 'Zachary', 'Hayden', 'Cody', 'Nate', 'Hayden', 'Cody', 'Dylan'],
    '6th': ['Zachary', 'Hayden', 'Matt', 'Evan', 'Hayden', 'Dylan', 'Gavin', 'Cody'],
    '7th': ['Nate', 'Dylan', 'Zachary', 'Alexander', 'Luca', 'Zachary', 'Nate', 'Nate'],
    '8th': ['Cody', 'Cody', 'Cody', 'Gavin', 'Matt', 'Alexander', 'Evan', 'Alexander'],
    '9th': [None, None, 'Evan', 'Nate', 'Alexander', 'Evan', 'Zachary', 'Luca'],
    '10th':[None, None, 'Luca', 'Zachary', 'Dylan', 'Nate', 'Dylan', 'Hayden']
}

dfFinalStandings = pd.DataFrame(final_standings)
dfFinalStandings = dfFinalStandings.sort_values(by='Year', ascending=True).reset_index(drop=True)

## Step 3: Write Out

In [26]:
## Change directory
os.chdir(r'C:\Users\hurst\OneDrive\Desktop\league-of-morons-reference\Dashboards\TeamPerformance')

In [27]:
## H2H Summary ##
dfH2HFinal.to_csv('headtoheadsummary.csv', index=False)

## Matchups ##
dfTeamHistory.to_csv('matchups_clean.csv', index=False)

## Regular Season Standings ##
dfRegSeasonStandings.to_csv('standings_regular.csv', index=False)

## Final Season Standings ##
dfFinalStandings.to_csv('standings_final.csv', index=False)