In [1]:
import pandas as pd

In [2]:
events = pd.read_csv('data/events.csv')
event_results = pd.read_csv('data/event_results.csv')
event_details = pd.read_csv('data/event_details.csv')

In [3]:
# 1. events table
events['host_acp_code'] = events['club'].str.replace('^.* / ', '', regex=True)
events_norm = events[['eid', 'host_acp_code', 'date', 'rtid', 'type']]

In [4]:
# 2. event_details table
event_details_norm = event_details[['eid', 'finishers', 'dnf']]
# drop 0 participation events -- with no finishers and no dnf
event_details_norm = event_details_norm[
    (event_details_norm.finishers > 0 ) |
    (event_details_norm.dnf > 0  )
]

# some events have dnf = -1, we'll take these as 0
event_details_norm.loc[event_details_norm.dnf < 0, 'dnf'] = 0
event_details_norm['starters'] = event_details_norm.finishers + event_details_norm.dnf

In [28]:
# 3. riders, with most recent club affiliation from results
riders = \
    event_results.merge(events_norm, on='eid') \
    .sort_values('date') \
    .groupby('rusa') \
    .tail(1)

riders['acp_code'] = riders['club / acp code'].str.replace('^.* / ', '', regex=True)
riders['last_name'] = riders['name'].str.replace(',.*$', '', regex=True)
riders['first_name'] = riders['name'].str.replace('^.*, ', '', regex=True)

riders_norm = riders[['rusa', 'acp_code', 'name', 'first_name', 'last_name']]

In [60]:
# 4. event results
event_results_norm = event_results[['cert','rusa','eid','hours','minutes','medal']] \
    .assign(finish_hours = lambda x: x.hours + x.minutes/60)


In [92]:
# 5. clubs
clubs_norm = events[['club', 'region']].drop_duplicates()

clubs_norm['acp_code'] = clubs_norm.club.str.replace('^.* / ', '', regex=True)
clubs_norm['club_name'] = clubs_norm.club.str.replace(' /.*$', '', regex=True)
clubs_norm['state_cd'] = clubs_norm.region.str.replace(':.*$', '', regex=True)
clubs_norm['city'] = clubs_norm.region.str.replace('^.*: ', '', regex=True)

clubs_norm = clubs_norm[['acp_code', 'club_name', 'region', 'city', 'state_cd', 'club']].reset_index(drop=True)

In [93]:
clubs_norm

Unnamed: 0,acp_code,club_name,region,city,state_cd,club
0,910004,Audax Atlanta,GA: Atlanta,Atlanta,GA,Audax Atlanta / 910004
1,905051,Pacific Coast Highway Randonneurs,CA: Los Angeles,Los Angeles,CA,Pacific Coast Highway Randonneurs / 905051
2,943026,Lone Star Randonneurs,TX: Dallas,Dallas,TX,Lone Star Randonneurs / 943026
3,933057,Bicycle For Life Club,NC: High Point,High Point,NC,Bicycle For Life Club / 933057
4,938017,Pennsylvania Randonneurs,PA: Eastern,Eastern,PA,Pennsylvania Randonneurs / 938017
...,...,...,...,...,...,...
90,937004,Willamette Randonneurs,OR: Eugene,Eugene,OR,Willamette Randonneurs / 937004
91,909034,Northeast Florida Randonneurs,FL: Northeast,Northeast,FL,Northeast Florida Randonneurs / 909034
92,940046,Freewheelers of Spartanburg,SC: Spartanburg,Spartanburg,SC,Freewheelers of Spartanburg / 940046
93,904003,Central Arkansas Brevet Series,AR: Little Rock,Little Rock,AR,Central Arkansas Brevet Series / 904003
