# Introduction

The purpose of this analysis is to compare the normalized EPA (via Statbotics) of Minnesota teams against that of those who come to our events from other states.

# Code

## Imports

In [None]:
import json
from frcpy import FRCPy
from frcpy.models import Location, Event, Team, TeamYearStats
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os


## Constants

In [None]:
US_STATE_ABBREVIATIONS = {
    'AL': 'Alabama',
    'AK': 'Alaska',
    'AZ': 'Arizona',
    'AR': 'Arkansas',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'HI': 'Hawaii',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'IA': 'Iowa',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'ME': 'Maine',
    'MD': 'Maryland',
    'MA': 'Massachusetts',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MS': 'Mississippi',
    'MO': 'Missouri',
    'MT': 'Montana',
    'NE': 'Nebraska',
    'NV': 'Nevada',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NY': 'New York',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VT': 'Vermont',
    'VA': 'Virginia',
    'WA': 'Washington',
    'WV': 'West Virginia',
    'WI': 'Wisconsin',
    'WY': 'Wyoming',
    'DC': 'District of Columbia',
    'MP': 'Northern Mariana Islands',
    'PW': 'Palau',
    'PR': 'Puerto Rico',
    'VI': 'Virgin Islands',
    'AA': 'Armed Forces Americas (Except Canada)',
    'AE': 'Armed Forces Other/Canada/Other/Middle East',
    'AP': 'Armed Forces Pacific'
}


In [None]:
try:
    # Load the tokens from the environment
    TBA_TOKEN = os.environ['SECRET_TBA_TOKEN']
    GMAPS_TOKEN = os.environ['SECRET_GMAPS_TOKEN']
except KeyError:
    # Or load from JSON file
    with open('token.json', 'r', encoding='UTF+8') as f:
        tokens = json.load(f)
        TBA_TOKEN = tokens['TBA']
        GMAPS_TOKEN = tokens['GMAPS']

API = FRCPy(TBA_TOKEN, GMAPS_TOKEN)


## Support functions

In [None]:
def is_fum(loc: Location) -> bool:
    if loc.country() != 'USA':
        return False

    state_prov = loc.state_prov()
    if state_prov in US_STATE_ABBREVIATIONS.keys():  # Harmonize state names
        state_prov = US_STATE_ABBREVIATIONS[state_prov]
    return state_prov == 'Minnesota' or state_prov == 'North Dakota' or state_prov == 'South Dakota'


## Read data from TBA & Statbotics

In [None]:
# Prepare team keys
teams = API.teams()
print(f"{len(teams)} teams")


In [None]:
# Prepare event keys
event_keys_by_year: dict[int, list[str]] = {}
year_range = API.year_range()
for year in range(year_range[0], year_range[1]):
    event_keys_by_year[year] = API.year_events(year)
    print(f"Found {len(event_keys_by_year[year])} in {year}")


In [None]:
# Prepare event structures
events: dict[str, Event] = {}
for year, year_event_keys in event_keys_by_year.items():
    for event_key in year_event_keys:
        events[event_key] = API.event(event_key)
    print(f"Completed {year}")


In [None]:
# Filter for only FUM events
fum_events: list[Event] = []
for event_key, event in events.items():
    if event.event_type() != 0:
        continue
    if is_fum(event.location()):
        fum_events.append(event)
print(f"Found {len(fum_events)} FUM events")


In [None]:
# Grab team lists from FUM events
fum_event_teams: dict[str, list[str]] = {}
for event in fum_events:
    fum_event_teams[event.key()] = API.event_teams(event.key())
print(f"Found team lists for {len(fum_event_teams)} FUM events")


In [None]:
# Grab team data for all teams at FUM events
teams: dict[str, Team] = {}
for event_key, team_keys in fum_event_teams.items():
    for team_key in team_keys:
        teams[team_key] = API.team(team_key)
print(f"Found team data for {len(teams.keys())} teams")


In [None]:
# Grab team stats for all teams at FUM events
team_stats: dict[int, dict[str, TeamYearStats]] = {}
count = 0
for event_key, team_keys in fum_event_teams.items():
    year = Event.event_key_to_year(event_key)
    team_stats[year] = {}
    for team_key in team_keys:
        if team_key in team_stats[year].keys():
            continue

        try:
            team_stats[year][team_key] = API.team_year_stats(team_key, year)
            count += 1
        except UserWarning:
            print(f"Unable to access {team_key} for {year}")
print(f"Found team stats for {count} team-year entries")


## Reformat data into a DataFrame

In [None]:
data = {'Team': [], 'Year': [], 'Normalized EPA': [], 'Location': []}
for year, year_data in team_stats.items():
    for team_key, team_year_stats in year_data.items():
        data['Team'].append(Team.team_key_to_number(team_key))
        data['Year'].append(year)
        data['Normalized EPA'].append(team_year_stats.norm_epa_end())

        location = 'FUM' if is_fum(teams[team_key].location()) else 'External'
        data['Location'].append(location)

df = pd.DataFrame(data)


# Plot

In [None]:
sns.set_theme(style='darkgrid', font_scale=0.625)
sns.boxplot(x='Year', y='Normalized EPA', hue='Location', data=df)
plt.xlabel('Year')
plt.ylabel('Normalized EPA')
plt.legend(loc='upper left', title='Location')
plt.savefig('fum-vs-external-epa.png', dpi=512, bbox_inches='tight')
plt.show()
