In [1]:
import numpy as np
import pandas as pd
import os
import pickle 
import procyclingstats
import power_helper as ph


In [None]:
# Get stage data from 1985-2023
from procyclingstats import Race, Stage

stages_dict = {"year":[0],"Race_url": [""], 
                   "stage_departure": [""], "stage_arrival": [""], "stage_url":[""], "stage_type":[""],  "profile_icon":[""],
                   "stage_winner":[""], "stage_winner_url":[""], 
                   "stage_winner_time":[""], "stage_distance":[0.0], 
                   "stage_vertical_meters":[0.0], "gc_leader":[""], 
                   "gc_leader_url":[""], "gc_stage_time":[""]}

for year in range(1985,2024):
    race = Race("race/tour-de-france/" + str(year))
    for race_stage in race.stages("stage_url"):
        stage = Stage(race_stage["stage_url"])
        # No gc results in protested stages 
        # No results in TTT 
        if (stage.stage_type() != "TTT"):
            if len(stage.results("rider_name")) > 0 and len(stage.gc("rider_name")) > 0: 
                stages_dict["year"].append(year)
                stages_dict["Race_url"].append("race/tour-de-france/" + str(year))
                stages_dict["stage_departure"].append(stage.departure())
                stages_dict["stage_arrival"].append(stage.arrival())
                stages_dict["stage_url"].append(stage.url)
                stages_dict["stage_type"].append(stage.stage_type())
                stages_dict["profile_icon"].append(stage.profile_icon())
                stages_dict["stage_winner"].append(stage.results('rider_name')[0])
                stages_dict["stage_winner_url"].append(stage.results('rider_url')[0])
                stages_dict["stage_winner_time"].append(stage.results('time')[0])
                stages_dict["stage_distance"].append(stage.distance())
                stages_dict["stage_vertical_meters"].append(stage.vertical_meters())
                stages_dict["gc_leader"].append(stage.gc("rider_name")[0])
                stages_dict["gc_leader_url"].append(stage.gc("rider_url")[0])
                stages_dict["gc_stage_time"].append([d for d in stage.results("rider_name","time") if d['rider_name'] in stage.gc("rider_name")[0]['rider_name']][0]['time'])

        
    

df = pd.DataFrame(stages_dict).iloc[1:]

In [2]:
# Convert_dataset
import ast
def to_seconds(time):
    h, m, s = time.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)

# Read raw data
df = pd.read_csv("Data_raw.csv").iloc[1:]

df.stage_winner = df.stage_winner.apply(lambda x: ast.literal_eval(x)['rider_name']) # convert json to text
df.stage_winner_url = df.stage_winner_url.apply(lambda x: ast.literal_eval(x)['rider_url']) # convert json to text
df.stage_winner_time = df.stage_winner_time.apply(lambda x: ast.literal_eval(x)['time']) # convert json to text
df.insert(6, "stage_winner_time_str", df.stage_winner_time, True) # insert time
df.stage_winner_time = df.stage_winner_time.apply(to_seconds) # convert time to seconds for power calculations
df.gc_leader = df.gc_leader.apply(lambda x: ast.literal_eval(x)['rider_name']) # convert json to text
df.gc_leader_url = df.gc_leader_url.apply(lambda x: ast.literal_eval(x)['rider_url']) # convert json to text

df.insert(12, "gc_stage_time_str", df.gc_stage_time, True) # Insert gc time
df.gc_stage_time = df.gc_stage_time.apply(to_seconds) # convert gc time to seconds
df['power'] = np.zeros(len(df)) # add power column
df['gc_weight'] = np.zeros(len(df)) # add gc_weight column
df['gc_height'] = np.zeros(len(df)) # add gc_height column
df['stage_grade'] = df.stage_vertical_meters / (df.stage_distance * 1000) # add stage_grade
df['gc_speed'] = (df.stage_distance *1000) / df.gc_stage_time # add velocity

from procyclingstats import Stage, Rider

# Get height and weight data for gc riders
for i, stage in enumerate(df.itertuples(), 1):
    rider = Rider(stage.gc_leader_url)
    
    try:
        df.loc[i, 'gc_weight'] = rider.weight()
        df.loc[i, 'gc_height'] = rider.height()
    except (AttributeError, IndexError):
        # print("No weight/height for " + rider.name())
        df.loc[i, 'gc_weight'] = 65
        df.loc[i, 'gc_height'] = 1.70
        
powers = []
for row in df.itertuples():
    powers.append(ph.cycling_power_profile(row.stage_grade, row.gc_weight, row.gc_speed, row.profile_icon))
    
df.power = powers
df.to_csv("Data_edited.csv")


In [110]:
lats, lons, elevs = [],[],[]


In [112]:
import googlemaps
from keys import ClientKeys
import requests
from time import sleep

gmaps = googlemaps.Client(key=ClientKeys.GEO_KEY)

def get_geolocation(name):
    geocode_result = gmaps.geocode(address=name)
    print(geocode_result)
    if geocode_result:
        location = geocode_result[0]["geometry"]["location"]
        r = requests.get('https://api.open-elevation.com/api/v1/lookup?locations={0},{1}'.format(location["lat"],location["lng"]))
        return location["lat"], location["lng"]
    else:
        geocode_result = gmaps.geocode(address=name+', France')
        if geocode_result:
            location = geocode_result[0]["geometry"]["location"]
            r = requests.get('https://api.open-elevation.com/api/v1/lookup?locations={0},{1}'.format(location["lat"],location["lng"]))
            return location["lat"], location["lng"]
        else:
            return 0,0
        

# df = pd.read_csv('Data_edited.csv')

for i in range(len(lats),len(df)):
    lat, lon = get_geolocation(df.iloc[i].stage_departure)
    lats.append(lat)
    lons.append(lon)

df['stage_departure_lat'] = lats
df['stage_departure_lon'] = lons

# lats, lons, elevs = [],[],[]
# for i in range(len(lats), len(df)):
#     lat, lon = get_geolocation(df.iloc[i].stage_arrival)
#     lats.append(lat)
#     lons.append(lon)

# df['stage_arrival_lat'] = lats
# df['stage_arrival_lon'] = lons

[{'address_components': [{'long_name': 'Plumelec', 'short_name': 'Plumelec', 'types': ['locality', 'political']}, {'long_name': 'Morbihan', 'short_name': 'Morbihan', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'Brittany', 'short_name': 'Brittany', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'France', 'short_name': 'FR', 'types': ['country', 'political']}, {'long_name': '56420', 'short_name': '56420', 'types': ['postal_code']}], 'formatted_address': '56420 Plumelec, France', 'geometry': {'bounds': {'northeast': {'lat': 47.8660235, 'lng': -2.5479288}, 'southwest': {'lat': 47.781374, 'lng': -2.6974317}}, 'location': {'lat': 47.837699, 'lng': -2.639409}, 'location_type': 'APPROXIMATE', 'viewport': {'northeast': {'lat': 47.8660235, 'lng': -2.5479288}, 'southwest': {'lat': 47.781374, 'lng': -2.6974317}}}, 'place_id': 'ChIJ48lb9WPZD0gRV5Ns6nOZZyE', 'types': ['locality', 'political']}]
[{'address_components': [{'long_name': 'Vannes', 'short_n

In [115]:
df

Unnamed: 0.1,Unnamed: 0,year,Race_url,stage,stage_url,stage_type,stage_winner_time_str,profile_icon,stage_winner,stage_winner_url,...,gc_weight,gc_height,stage_grade,gc_speed,stage_departure,stage_arrival,stage_arrival_lat,stage_arrival_lon,stage_departure_lat,stage_departure_lon
0,0,1985,race/tour-de-france/1985,Plumelec,https://www.procyclingstats.com/race/tour-de-f...,RR,0:08:47,p1,HINAULT Bernard,rider/bernard-hinault,...,62.0,1.74,0.018824,12.903226,Plumelec,Plumelec,47.837699,-2.639409,47.837699,-2.639409
1,1,1985,race/tour-de-france/1985,Lanester,https://www.procyclingstats.com/race/tour-de-f...,RR,6:32:52,p1,MATTHIJS Rudy,rider/rudy-matthijs,...,74.0,1.82,0.012504,10.860343,Vannes,Lanester,47.763494,-3.347354,47.658617,-2.759902
2,2,1985,race/tour-de-france/1985,Vitré,https://www.procyclingstats.com/race/tour-de-f...,RR,6:29:21,p1,MATTHIJS Rudy,rider/rudy-matthijs,...,74.0,1.82,0.011372,10.359146,Lorient,Vitré,48.124746,-1.215430,47.748252,-3.370245
3,3,1985,race/tour-de-france/1985,Pont-Audemer,https://www.procyclingstats.com/race/tour-de-f...,RR,6:31:46,p1,SOLLEVELD Gerrit,rider/gerrit-solleveld,...,65.0,1.70,0.012722,10.188888,Fougères,Pont-Audemer,49.354883,0.514299,48.351561,-1.204626
4,4,1985,race/tour-de-france/1985,Roubaix,https://www.procyclingstats.com/race/tour-de-f...,RR,6:27:25,p1,MANDERS Henri,rider/henri-manders,...,65.0,1.70,0.008473,9.631923,Neufchâtel-en-Bray,Roubaix,50.692705,3.177847,49.732771,1.441891
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
821,821,2023,race/tour-de-france/2023,Courchevel,https://www.procyclingstats.com/race/tour-de-f...,RR,4:49:08,p4,GALL Felix,rider/felix-gall,...,60.0,1.75,0.032583,9.490263,Saint-Gervais Mont-Blanc,Courchevel,45.405105,6.631668,45.892013,6.712187
822,822,2023,race/tour-de-france/2023,Bourg-en-Bresse,https://www.procyclingstats.com/race/tour-de-f...,RR,4:06:48,p1,ASGREEN Kasper,rider/kasper-asgreen,...,60.0,1.75,0.006577,12.486494,Moûtiers,Bourg-en-Bresse,46.205167,5.225501,45.484615,6.529926
823,823,2023,race/tour-de-france/2023,Poligny,https://www.procyclingstats.com/race/tour-de-f...,RR,3:31:02,p2,MOHORIČ Matej,rider/matej-mohoric,...,60.0,1.75,0.011192,12.814238,Moirans-en-Montagne,Poligny,46.836429,5.709100,46.430899,5.723854
824,824,2023,race/tour-de-france/2023,Le Markstein,https://www.procyclingstats.com/race/tour-de-f...,RR,3:27:18,p4,POGAČAR Tadej,rider/tadej-pogacar,...,60.0,1.75,0.026000,10.733237,Belfort,Le Markstein,47.926232,7.028784,47.639674,6.863849


In [8]:
df = pd.read_csv("Data_edited.csv").iloc[:,1:]

In [22]:

# Riders strikes
df = df.drop(axis=0, index=248) # 1996 stage 9
df = df.drop(axis=0, index=234) # 1995 stage 16
# Calculate power for full tour with fixed grade
total_climbed = df.groupby("year").stage_vertical_meters.sum()
total_distance = df.groupby("year").stage_distance.sum()
gc_weights = df.groupby("year").gc_weight.mean()
gc_time = df.groupby("year").gc_stage_time.sum()
total_grades = total_climbed/(total_distance*1000)
velocity = (total_distance*1000)/gc_time
year_power = ph.cycling_power(total_grades, gc_weights, velocity)