# Functions for Courses File

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import sqlite3
import courses
from importlib import reload

In [19]:
# Run this if you change the 'courses.py' file but have already created the database in this notebook
reload(courses);

In [4]:
# connect to the database
# use create = True only the very first time running
db = courses.CoursesDB('courses.db', create = True) 

Database created at courses.db


In [5]:
# run this box on your first time connecting to the database
# OR if you change something in the 'courses.py' file related to building the tables
db.drop_all_tables(are_you_sure = True)
db.build_tables()

In [6]:
# load the data for any meets you want to look at
# use db.load_results('link')
db.load_results('https://www.tfrrs.org/results/xc/23538/The_CNU_XC_Invitational')
db.load_results('https://www.tfrrs.org/results/xc/23362/2024_Pirate_Cross_Country_Invitational_')
db.load_results('https://www.tfrrs.org/results/xc/25186/Panorama_Farms_Invitational')
db.load_results('https://tfrrs.org/results/xc/24854/2024_CAA_Cross_Country_Championship')
db.load_results('https://www.tfrrs.org/results/xc/23502/Elon_Phoenix_XC_Invitational_')
db.load_results('https://www.tfrrs.org/results/xc/25322/NCAA_DI_Southeast_Regional_Cross_Country_Championships')
db.load_results('https://www.tfrrs.org/results/xc/25334/NCAA_Division_I_Cross_Country_Championships')

In [7]:
# all the data together will look something like this
db.run_query('''SELECT * FROM tRunner
JOIN tRaceResult USING(runner_id)
JOIN tRace USING(race_id)''')

Unnamed: 0,runner_id,name,eligibility,school,race_id,raw_time,time,place,race,date
0,1,Peninah Mutisya,FR-1,Hampton,1,21:13.3,1273.3,1,The CNU XC Invitational,"September 13, 2024"
1,2,Molly Weithman,Freshman,Unattached,1,22:09.4,1329.4,2,The CNU XC Invitational,"September 13, 2024"
2,3,Nia Warren,SR-4,VCU,1,22:14.5,1334.5,3,The CNU XC Invitational,"September 13, 2024"
3,4,Jennifer Tsai,JR-3,William & Mary,1,22:16.8,1336.8,4,The CNU XC Invitational,"September 13, 2024"
4,5,Madelyn Gypson,Freshman,Unattached,1,22:20.3,1340.3,5,The CNU XC Invitational,"September 13, 2024"
...,...,...,...,...,...,...,...,...,...,...
1114,825,Alex Bauer,JR-3,Toledo,7,22:18.6,1338.6,250,NCAA Division I Cross Country Championships,"November 23, 2024"
1115,826,Jayden Harberts,FR-1,Syracuse,7,22:19.7,1339.7,251,NCAA Division I Cross Country Championships,"November 23, 2024"
1116,827,Elizabeth Stockman,SR-4,Texas,7,22:28.9,1348.9,252,NCAA Division I Cross Country Championships,"November 23, 2024"
1117,828,Anna Sentner,SR-4,Florida State,7,22:32.3,1352.3,253,NCAA Division I Cross Country Championships,"November 23, 2024"


In [8]:
# see which races you have loaded
db.see_loaded_races()

Unnamed: 0,race_id,race,date
0,1,The CNU XC Invitational,"September 13, 2024"
1,2,2024 Pirate Cross Country Invitational,"October 4, 2024"
2,3,Panorama Farms Invitational,"October 19, 2024"
3,4,2024 CAA Cross Country Championship,"November 1, 2024"
4,5,Elon Phoenix XC Invitational,"October 18, 2024"
5,6,NCAA DI Southeast Regional Cross Country Champ...,"November 15, 2024"
6,7,NCAA Division I Cross Country Championships,"November 23, 2024"


In [26]:
# look up a race by a fragment of the name
db.course_lookup('Panorama')

Unnamed: 0,race_id,race,date,runner_id,raw_time,time,place
0,2,Panorama Farms Invitational,"October 19, 2024",210,20:09.7,1209.7,1
1,2,Panorama Farms Invitational,"October 19, 2024",211,20:27.9,1227.9,2
2,2,Panorama Farms Invitational,"October 19, 2024",212,20:28.7,1228.7,3
3,2,Panorama Farms Invitational,"October 19, 2024",213,20:38.2,1238.2,4
4,2,Panorama Farms Invitational,"October 19, 2024",214,20:44.5,1244.5,5
...,...,...,...,...,...,...,...
129,2,Panorama Farms Invitational,"October 19, 2024",279,26:23.6,1583.6,130
130,2,Panorama Farms Invitational,"October 19, 2024",280,26:52.9,1612.9,131
131,2,Panorama Farms Invitational,"October 19, 2024",281,27:00.1,1620.1,132
132,2,Panorama Farms Invitational,"October 19, 2024",282,27:18.7,1638.7,133


In [27]:
# look up a runner by a fragment of their name
db.runner_lookup('Anna')

Unnamed: 0,runner_id,name,eligibility,school
0,4,Arianna DeBoer,SR-4,William & Mary
1,32,Anna Berquist,SR-4,Davidson
2,37,Savannah Stoutt,SO-2,Col. of Charleston
3,54,Hannah Brown,SR-4,Milligan
4,70,Anna Conrad,FR-1,Winthrop
5,88,Leanna Lewis,SO-2,Norfolk State
6,110,Gianna Del Pizzo,FR-1,UCF
7,148,Brianna Dooney,FR-1,Winthrop
8,164,Hannah Morgan,SO-2,Davidson
9,166,Anna Jones,SR-4,Milligan


In [28]:
# find all meets that two runners ran in together (using their runner_id's)
db.find_races_in_common(1,2)

Unnamed: 0,race_id,race,date
0,1,2024 Pirate Cross Country Invitational,"October 4, 2024"
1,3,2024 CAA Cross Country Championship,"November 1, 2024"


In [29]:
# This function compares two courses specified by their race_id's.
# It will output the difference in seconds in average race times (difference), the ratio of average race times (ratio), and
# the number of runners in common between the two courses (NumCompared).
# The first course is used as a comparison point. 'difference' is the number of seconds faster or slower that the second course
# averages compared to the first course; a negative value for 'difference' means the second course was faster.
# 'ratio' is the number that times from the second course would need to be multiplied by in order to standardize them to the first
# course; the average time from the second course multiplied by 'ratio' should yield the average time from the first course.
# This function only compares times in runners who competed in both meets. The number of runners in common is shown as NumCompared.
db.compare_two_courses(1,2)

Unnamed: 0,Difference,Ratio,NumCompared
0,31.133333,1.022844,60


In [30]:
## predict all runners' times on one course
db.predict_times(4)

Unnamed: 0,runner_id,name,school,predicted_time,formatted_time
0,1,Peninah Mutisya,Hampton,1254.528911,20:54
1,2,Kyra Holland,William & Mary,1255.939115,20:55
2,3,Sofia Istnick,William & Mary,1270.809409,21:10
3,4,Arianna DeBoer,William & Mary,1277.245539,21:17
4,6,Catherine Garrison,William & Mary,1277.505323,21:17
...,...,...,...,...,...
100,243,Jennifer Tsai,William & Mary,1321.619579,22:01
101,248,Celia Dawson,William & Mary,1329.367507,22:09
102,350,Mekayla Wilson,Hampton,1531.532813,25:31
103,351,Kylee King,Hampton,1524.334674,25:24


In [31]:
#predict how runners from one team will do on a course
db.predict_team_results('William & Mary', 5)

Unnamed: 0,runner_id,name,predicted_time,formatted_time
1,2,Kyra Holland,1419.306575,23:39
2,3,Sofia Istnick,1436.111136,23:56
3,4,Arianna DeBoer,1443.384452,24:03
4,6,Catherine Garrison,1443.678028,24:03
6,9,Molly Weithman,1452.614622,24:12
14,22,Abby Lane,1486.75085,24:46
17,30,Kelly Ann Sutterfield,1519.494661,25:19
20,34,Lucy Young,1517.445745,25:17
22,38,Emily Sell,1548.015814,25:48
54,90,Annika Griggs,1565.624064,26:05


In [9]:
db.conversions(4)

Unnamed: 0,race_id,race,date,ratio_conversion,time_conversion
0,1,The CNU XC Invitational,"September 13, 2024",0.998651,-1.527685
1,2,2024 Pirate Cross Country Invitational,"October 4, 2024",0.985391,-20.968421
2,3,Panorama Farms Invitational,"October 19, 2024",1.007262,9.809339
3,4,2024 CAA Cross Country Championship,"November 1, 2024",1.0,0.0
4,5,Elon Phoenix XC Invitational,"October 18, 2024",1.006916,10.046154
5,6,NCAA DI Southeast Regional Cross Country Champ...,"November 15, 2024",0.967847,-43.756522
6,7,NCAA Division I Cross Country Championships,"November 23, 2024",0.990912,-14.391304
