In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import requests
import json
import pandas as pd

In [3]:
data_from_march = requests.get(
    'https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/scoreboard?dates=202403'
).json()

In [3]:
short_teams, long_teams = [], []
for event in data_from_march['events']:
    s_teams = event['shortName'].split(' VS ')
    assert len(s_teams) == 2
    l_teams = event['name'].split(' at ')
    assert len(l_teams) == 2
    short_teams += s_teams
    long_teams += l_teams

long_to_short = dict(zip(long_teams, short_teams))


In [4]:
short_to_long = dict(zip(short_teams, long_teams))

In [5]:
event_types = set()
for event in data_from_march['events']:
    event_types.add(event['season']['slug'])

In [6]:
test_df = pd.read_csv('monkey-madness-teams.csv')

In [7]:
def normalize_team_name(team_name):
    n = team_name.strip(".").strip()
    if n.endswith("'s"):
        return n[:-2]
    return n

In [8]:
def team_name_to_short_code(team_name_norm):
    short_team_code = None
    for long_team, short_team in long_to_short.items():
        if team_name_norm.lower() in long_team.lower() or long_team.lower() in team_name_norm.lower():
            short_team_code = short_team
        elif short_team.lower() in team_name_norm.lower():
            short_team_code = short_team
    return short_team_code

In [9]:
test_df['team_name_norm'] = test_df.team_name.apply(normalize_team_name)
test_df['team_code'] = test_df.team_name_norm.apply(team_name_to_short_code)

In [10]:
# fix empty and duplicates

In [11]:
code_counts = dict(test_df.team_code.value_counts())

In [12]:
dup_codes = [key for key, value in code_counts.items() if value > 1]

In [13]:
dup_codes

['LONG', 'FAU', 'TA&M', 'JMU', 'ORE', 'WKU', 'COLO']

In [14]:
no_code = test_df[pd.isna(test_df.team_code)]
print(len(no_code))
no_code.head(15)

0


Unnamed: 0,participant_name,team_name,seed,team_name_norm,team_code


In [15]:
dup_code = test_df[test_df.team_code.apply(lambda c: c in dup_codes)]
print(len(dup_code))
dup_code.head(15)

14


Unnamed: 0,participant_name,team_name,seed,team_name_norm,team_code
2,Molloy,Florida Atlantic,8,Florida Atlantic,FAU
11,Nicolai,Morehead St.,14,Morehead St,ORE
31,F-Cup,Long Beach St.,15,Long Beach St,LONG
33,Meshi (2),Longwood,16,Longwood,LONG
35,Gateson (2),Texas A&M,9,Texas A&M,TA&M
37,Katz (2),James Madison,12,James Madison,JMU
38,Scott (2),Duke,4,Duke,JMU
42,Cimi (2),Kentucky,3,Kentucky,WKU
44,Molloy (2),Florida,7,Florida,FAU
45,Bobby + Derham (2),Colorado,10,Colorado,COLO


In [16]:
fixes = {
    'Molloy (2)': 'FLA',
    'Nicolai': 'MORE',
    'F-Cup': 'LBSU',
    'Scott (2)': 'DUKE',
    'Cimi (2)': 'UK',
    'Peralo (2)': 'CSU',
    'Leff (2)': 'TEX'
}

In [17]:
test_df['team_code'] = test_df.apply(
    lambda row: fixes[row.participant_name] if row.participant_name in fixes else row.team_code,
    axis=1
)

In [18]:
test_df.isna().sum()

participant_name    0
team_name           0
seed                0
team_name_norm      0
team_code           0
dtype: int64

In [19]:
len(test_df.team_code.unique())

63

In [20]:
test_df.team_code.value_counts()

team_code
TEX     2
CONN    1
WKU     1
NEB     1
TA&M    1
       ..
BAY     1
COLG    1
DAY     1
NEV     1
SPU     1
Name: count, Length: 63, dtype: int64

In [25]:
test_df[test_df.team_code.values == 'TEX'].head()

Unnamed: 0,participant_name,team_name,seed,team_name_norm,team_code
40,John + Tyler (2),Texas Tech,6,Texas Tech,TEX
60,Leff (2),Texas,7,Texas,TEX


In [33]:
test_df[test_df.team_code.values == 'TTU'].head()

Unnamed: 0,participant_name,team_name,seed,team_name_norm,team_code


In [24]:
len(test_df.team_code.unique())

63

In [21]:
test_df.to_csv('mm-with-team-codes.csv', index=False)

In [34]:
# manually changed to TTU^

In [35]:
df_clean = pd.read_csv('mm-with-team-codes.csv')
assert len(df_clean.team_code.unique()) == 64

In [9]:
from construct_bracket import Bracket

In [12]:
bracket = Bracket.from_csv('mm-with-team-codes.csv')

In [13]:
bracket

<construct_bracket.Bracket at 0x132c98e20>

In [4]:
data_from_march.keys()

dict_keys(['leagues', 'groups', 'day', 'events', 'eventsDate'])

In [35]:
is_finished = data_from_march['events'][0]['status']['type']['completed']  # boolean
status = data_from_march['events'][0]['status']['type']['name']
# change last 0 to 1 for second team
team_to_score = {}
for i in range(2):
    score = data_from_march['events'][0]['competitions'][0]['competitors'][i]['score']
    team_code = data_from_march['events'][0]['competitions'][0]['competitors'][i]['team']['abbreviation']
    team_to_score[team_code] = score
status, is_finished, team_to_score

('STATUS_FINAL', True, {'UVA': '42', 'CSU': '67'})

In [46]:
data_from_march['events'][0]['competitions'][0]['competitors'][0]['winner']

False