# Import packages and initial settings

In [189]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from urllib.request import Request,urlopen
from datetime import datetime

In [2]:
%matplotlib inline 
sns.set_style('darkgrid')

# Create Function to dowload webpage
Downloads webpage html, decodes, returns list of tables

In [2]:
def get_webpage(url):
    req = Request(url, headers={'User-Agent': 'Safari/12.0'})
    webpage = urlopen(req).read()
    webpage = webpage.decode('utf-8','ignore')
    tables = pd.read_html(webpage,index_col=0)
    return tables

# Define lists of events and gender
Sets the order of events and genders based on the order of events that TFRRS uses. This makes it possible to assign an event an gender to each table

In [195]:
event_order = ['100','100','200','200','400','400',
               '800','800','1500','1500','5000','5000','10000','10000',
               '100H','110H','400H','400H','3000S','3000S',
               '4x100','4x100','4x400','4x400',
               'HJ','HJ','PV','PV','LJ','LJ','TJ','TJ',
               'SP','SP','DT','DT','HT','HT', 'JT','JT',
               'Hep','Dec']
gender_order = ['M','W','M','W','M','W',
                'M','W','M','W','M','W','M','W',
                'M','W','M','W','M','W',
                'M','W','M','W','M','W','M','W','M','W',
                'M','W','M','W','M','W','M','W','M','W',
                'M','W','M','W',]

# Get url and download page

In [206]:
url = 'https://www.tfrrs.org/archived_lists/1228/2014_NCAA_Division_I_Outdoor_Qualifying_(FINAL)/2014/o'

In [207]:
events = get_webpage(url)

# Add event and gender column and save
Save each year and event separately for safe keeping

In [208]:
for i,event in enumerate(events):
    event['EVENT'] = event_order[i]
    event['GENDER'] = gender_order[i]


In [209]:
#CHANGE YEAR

for event in events:
    event.to_csv('./TFRRS/2014_{}_{}.csv'.format(event.iloc[0]['GENDER'],event.iloc[0]['EVENT']))

# Compile Results from each year into one frame for each event
Also convert 'MEET DATE' column to datetime object and save each file

In [210]:
#COMPILE ALL YEARS

compiled_events = []

for i,event in enumerate(event_order):
    filenames = glob.glob('./TFRRS/*{}_{}.csv'.format(gender_order[i],event))
    years = []
    for j in range(len(filenames)):
        years.append(pd.read_csv(filenames[j],index_col=0))
    compiled_years = pd.concat(years)
    compiled_years['MEET DATE'] = compiled_years['MEET DATE'].apply(lambda day: datetime.strptime(day,'%b %d, %Y'))
    compiled_events.append(compiled_years)
    compiled_years.to_csv('./TFRRS_Compiled/{}_{}.csv'.format(compiled_years.iloc[0]['GENDER'],
                                                              compiled_years.iloc[0]['EVENT']))

### Optionally allows parsing of 'ATHLETE NAME' into 'FIRST' and 'LAST' columns



In [110]:
for event in events:
    if event.iloc[0]['EVENT'] not in ('4x100','4x400'):
        event['FIRST'] = event['ATHLETE'].apply(lambda x: x.split(', ')[0])
        event['LAST'] = event['ATHLETE'].apply(lambda x: x.split(', ')[1])