final write-up

In [1]:
# ----- set-up -----
import pandas as pd
import os
import folium
from folium.features import DivIcon
# set main and output directory
main_dir = os.getcwd()
output_dir = f"{main_dir}/outputs"

In [2]:
# ----- read in outputs -----
model_travel_schedule = pd.read_csv(f"{output_dir}/OSU_outcome_travel_schedule.csv")
game_schedule = pd.read_csv(f"{output_dir}/schedule.csv")
arenas = pd.read_csv(f"{output_dir}/arena_info.csv")

In [5]:
# ----- rename schools to match the other dfs -----
matrixlist_to_team = {
    'University of Illinois Urbana-Champaign': 'Illinois',
    'Indiana University Bloomington': 'Indiana',
    'University of Iowa': 'Iowa',
    'University of Maryland, College Park': 'Maryland',
    'University of Michigan': 'Michigan',
    'Michigan State University': 'MichiganState',
    'University of Minnesota Twin Cities': 'Minnesota',
    'University of Nebraska-Lincoln': 'Nebraska',
    'Northwestern University': 'Northwestern',
    'The Ohio State University': 'OhioState',
    'University of Oregon': 'Oregon',
    'Pennsylvania State University': 'PennState',
    'Purdue University': 'Purdue',
    'Rutgers, The State University of New Jersey, New Brunswick': 'Rutgers',
    'University of Southern California': 'SouthernCalifornia',
    'University of California, Los Angeles': 'UCLA',
    'University of Washington': 'Washington',
    'University of Wisconsin-Madison': 'Wisconsin'
}
arenas_renamed = arenas.copy()
arenas_renamed['Institution'] = arenas_renamed['Institution'].map(matrixlist_to_team)

In [6]:
# ----- clean the game_schedule -----
# rename columns
game_schedule.columns = ['Unnamed', 'Rk', 'Date', 'Time', 'Day', 'Winner', 'Pts', 'LocationDesignation', 'Loser', 'Pts.1', 'Notes']
# create home/away assignment
game_schedule['Home'] = game_schedule.apply(
    lambda row: row['Loser'] if row['LocationDesignation'] == '@' else row['Winner'],
    axis=1
)
game_schedule['Away'] = game_schedule.apply(
    lambda row: row['Winner'] if row['LocationDesignation'] == '@' else row['Loser'],
    axis=1
)

# assign points
game_schedule['HomePts'] = game_schedule.apply(
    lambda row: row['Pts.1'] if row['LocationDesignation'] == '@' else row['Pts'],
    axis=1
)
game_schedule['AwayPts'] = game_schedule.apply(
    lambda row: row['Pts'] if row['LocationDesignation'] == '@' else row['Pts.1'],
    axis=1
)

# select and order desired columns
game_schedule = game_schedule[['Rk', 'Date', 'Time', 'Day', 'Home', 'HomePts', 'Away', 'AwayPts', 'Notes']]
# clean the Home and Away column to remove rankings
game_schedule['Home'] = game_schedule['Home'].str.replace('\xa0', '', regex=False).str.replace(r"[ ()\d]", "", regex=True)
game_schedule['Away'] = game_schedule['Away'].str.replace('\xa0', '', regex=False).str.replace(r"[ ()\d]", "", regex=True)

In [7]:
# ----- merge model output schedule and game schedule to get final results (and clean it a bit) -----
model_travel_schedule['Date'] = pd.to_datetime(model_travel_schedule['date'])
game_schedule['Date'] = pd.to_datetime(game_schedule['Date'])

# final schedule
travel_game_schedule = pd.merge(
    model_travel_schedule,
    game_schedule,
    left_on=['Date', 'destination'],
    right_on=['Date', 'Home'],
    how='inner' 
)
travel_game_schedule = travel_game_schedule[['Date', 'Time', 'Day', 'Home', 'HomePts', 'Away', 'AwayPts', 'source', 'destination']]
travel_game_schedule_clean = travel_game_schedule.rename(columns={
    'source': 'Travel_Start',
    'destination': 'Travel_End'
})
travel_game_schedule_clean

Unnamed: 0,Date,Time,Day,Home,HomePts,Away,AwayPts,Travel_Start,Travel_End
0,2024-09-07,3:30 PM,Sat,Maryland,24,MichiganState,27,OhioState,Maryland
1,2024-09-14,7:30 PM,Sat,UCLA,13,Indiana,42,OhioState,UCLA
2,2024-09-20,8:00 PM,Fri,Nebraska,24,Illinois,31,OhioState,Nebraska
3,2024-09-27,8:00 PM,Fri,Rutgers,21,Washington,18,OhioState,Rutgers
4,2024-10-05,7:30 PM,Sat,Minnesota,24,SouthernCalifornia,17,OhioState,Minnesota
5,2024-10-18,8:00 PM,Fri,Purdue,0,Oregon,35,OhioState,Purdue
6,2024-10-19,3:30 PM,Sat,Illinois,21,Michigan,7,OhioState,Illinois
7,2024-10-25,11:00 PM,Fri,SouthernCalifornia,42,Rutgers,20,OhioState,SouthernCalifornia
8,2024-10-26,7:30 PM,Sat,Wisconsin,13,PennState,28,OhioState,Wisconsin
9,2024-11-02,12:00 PM,Sat,PennState,13,OhioState,20,OhioState,PennState


In [11]:
# ----- merge the final travel and game schedule with arena info for travel visualization -----
arena_schedule = pd.merge(
    travel_game_schedule_clean,
    arenas_renamed,
    left_on=['Home'],
    right_on=['Institution'],
    how='inner' 
)
arena_schedule = arena_schedule.drop('Institution', axis=1)
# arena_schedule

# ----- save to csv -----
arena_schedule.to_csv(f"{output_dir}/final_results.csv")
arena_schedule_sorted = arena_schedule.sort_values('Date').reset_index(drop=True)
arena_schedule_sorted

Unnamed: 0,Date,Time,Day,Home,HomePts,Away,AwayPts,Travel_Start,Travel_End,Latitude,Longitude,Location
0,2024-09-07,3:30 PM,Sat,Maryland,24,MichiganState,27,OhioState,Maryland,38.988056,-76.943056,"College Park, Maryland"
1,2024-09-14,7:30 PM,Sat,UCLA,13,Indiana,42,OhioState,UCLA,34.072222,-118.442778,"Los Angeles, California(Westwood)"
2,2024-09-20,8:00 PM,Fri,Nebraska,24,Illinois,31,OhioState,Nebraska,40.8175,-96.701389,"Lincoln, Nebraska"
3,2024-09-27,8:00 PM,Fri,Rutgers,21,Washington,18,OhioState,Rutgers,40.501667,-74.448056,"New Brunswick-Piscataway, New Jersey"
4,2024-10-05,7:30 PM,Sat,Minnesota,24,SouthernCalifornia,17,OhioState,Minnesota,44.975,-93.235278,"Minneapolis-St. Paul, Minnesota[e]"
5,2024-10-18,8:00 PM,Fri,Purdue,0,Oregon,35,OhioState,Purdue,40.425,-86.923056,"West Lafayette, Indiana"
6,2024-10-19,3:30 PM,Sat,Illinois,21,Michigan,7,OhioState,Illinois,40.110556,-88.228333,"Champaign-Urbana, Illinois[a]"
7,2024-10-25,11:00 PM,Fri,SouthernCalifornia,42,Rutgers,20,OhioState,SouthernCalifornia,34.020556,-118.284722,"Los Angeles, California(University Park)"
8,2024-10-26,7:30 PM,Sat,Wisconsin,13,PennState,28,OhioState,Wisconsin,43.075278,-89.404167,"Madison, Wisconsin"
9,2024-11-02,12:00 PM,Sat,PennState,13,OhioState,20,OhioState,PennState,40.798333,-77.86,"University Park, Pennsylvania"


In [None]:
# ----- visualize the final results

# columbus' coordinates for the graph
ohio_state_coords = [39.999998, -83.014702]  

# create the map and center on usa
m = folium.Map(location=[41.5, -96], zoom_start=4)

# loop through each destination row and draw line from OhioState to destination
for i, row in arena_schedule_sorted.iterrows():
    end_coords = [row['Latitude'], row['Longitude']]

    # travel line
    folium.PolyLine(
        locations=[ohio_state_coords, end_coords],
        color='#2C3E50',
        weight=2,
        tooltip=f"OhioState → {row['Travel_End']} on {row['Date']}"
    ).add_to(m)

    # adding numbered marker at Travel_End location
    folium.Marker(
        location=end_coords,
        icon=folium.DivIcon(html=f"""
            <div style="
                background-color: #2C3E50;
                color: white;
                border-radius: 50%;
                text-align: center;
                width: 20px;
                height: 20px;
                line-height: 20px;
                font-weight: bold;
                border: 2px solid white;
                box-shadow: 0 0 2px #555;
            ">{i+1}</div>
        """),
        popup=f"{row['Travel_End']} ({row['Date']})"
    ).add_to(m)

# preview
m