In [None]:
import pandas as pd
import numpy as np
from iso3166 import countries

In [None]:
wf = "wcm"

In [None]:
df_rank_67_79 = pd.read_csv(f'../../data/points67-79.csv')#, index_col=0)
df_rank_79 = pd.read_csv(f'../../data/points79.csv')
df_rank_80_91 = pd.read_csv(f'../../data/points80-91.csv')
df_rank_92 = pd.read_csv(f'../../data/points92.csv')
df_rank_93_now = pd.read_csv(f'../../data/points93-now.csv')

In [None]:
country_translation = {
    "MON": "MCO",
    "MAD": "MDG",
    "CRO": "HRV",
    "BUL": "BGR",
    "ZIM": "ZWE",
    "CKS": "SVK",
    "DAN": "DNK",
    "KOS": "XKX",
    "URS": "RUS",
    "GRE": "GRC",
    "CHI": "CHL",
    "SUI": "CHE",
    "NED": "NLD",
    "IRA": "IRN",
    "LIB": "LBN",
    "SLO": "SVK",
    "LAT": "LVA",
    "GER": "DEU",
    "JUG": "SRB",
    "IRE": "IRL",
    "RSA": "ZAF",
    "SPA": "ESP",
}

In [None]:
def get_alpha2_country(alpha3_name):
    return getattr(countries.get(country_translation.get(alpha3_name, alpha3_name), alpha3_name), "alpha2", "")
def get_name_country(alpha3_name):
    return getattr(countries.get(country_translation.get(alpha3_name, alpha3_name), alpha3_name), "name", "")

# Refactor Race results

In [None]:
for type in ["m", "f"]:

    df_photo = pd.read_csv(f'../../data/ath{type}.csv')[["name", "photo"]]
    df = pd.read_csv(f'../../data/wc{type}.csv')
    df = df.drop(columns=["ath_ski", "ath_id"])
    df.country = df.country.apply(get_alpha2_country)
    df.ath_country = df.ath_country.apply(get_alpha2_country)
    df["country_name"] = df.country.apply(get_name_country)
    df["ath_country_name"] = df.ath_country.apply(get_name_country)
    df = df[df['ath_name'].notna()]
    df = df.merge(df_photo, how="left",left_on="ath_name", right_on="name").drop(columns=["name"])  
    df.to_csv(f'../../web/website/data/race_results_wc{type}.csv')

In [None]:
for type in ["m", "f"]:
    df = pd.read_csv(f'../../data/wc{type}.csv')
    women_events = {str(season) :{x:[y,z] for x, y, z in df[df["season"] == season][["date", "venue", "event"]].drop_duplicates().values} for season in df["season"].unique()}
    import json 
    with open(f'../../web/website/data/wc{type}_events.json', 'w') as fp:
        json.dump(women_events, fp)

# Find event locations

In [None]:
df = pd.concat([pd.read_csv(f'../../data/wcm.csv'), pd.read_csv(f'../../data/wcf.csv')])

In [None]:
df.country = df.country.apply(get_alpha2_country)

In [None]:
events_cities = [(x[0], ", ".join(x)) for x in df[["venue", "country"]].drop_duplicates().values]

In [None]:
from geopy.geocoders import Nominatim
locations = []
for city_name, city in events_cities:
    address = city
    geolocator = Nominatim()
    location = geolocator.geocode(address, timeout=10, exactly_one=False)
    locations.append((city_name, location[0] if location else None))

In [None]:
name_to_location = {
    "Borovez" : (42.2673025, 23.6060001),
"Yong Pyong": (37.644881, 128.681255),
 "Les Arcs": (45.572354, 6.829653),
 "La Mongie": (42.909253, 0.167278),
 "Campiglio": (46.229577, 10.826522), 
 "Santa Caterina": (46.391973, 10.480116),
 "Cortina": (46.536306, 12.121626),
 "Kvitfjell": (61.485798, 10.136583),
 "Voss": (60.634741, 6.425773),
 "Naeba": (36.956099, 138.756491),
 "Mont St. Anne": (47.117210, -70.904111),
 "Nakiska" : (50.942825, -115.151013),
 "Copper Mnt." : (39.500336, -106.155852),
 "Steamboat": (40.458830, -106.804668),
 "Jackson Hole" : (43.587732, -110.827897),
 "Breckenridge" : (39.476483, -106.047852),
 "Sun Valley" : (43.671389, -114.367284),
 "Garmisch": (47.472021, 11.063838),
 "Beaver Creek": (39.604452, -106.516807),
 "Crystal Mnt.": (46.936169, -121.474640),
 "Sugarloaf":(45.054363, -70.308575),
 "Sunshine": (51.119870, -115.763976),
 "Waterville": (43.965456, -71.527769),
 "Panorama": (50.460435, -116.238179),
 "Les Contamines": (45.786378, 6.693766),
 "St. Gervais": (45.889079, 6.706937),
 "Limone": (44.201048, 7.577518),
 "Villars": (46.304778, 7.055345),
 "Fluehli": (46.822168, 8.030034),
 "Aare": (63.402575, 13.076255),
 "Reiteralm" : (47.388662, 13.613442),
 "Sankt Anton" : (47.130511, 10.268137),
 "Badgastein" : (47.116867, 13.139601),
 "Val Zoldana" : (46.391142, 12.100192),
 "Squaw Valley": (39.195895, -120.234871),
 "Mammoth Mnt.": (37.630436, -119.032434)
}

In [None]:
import folium
lat_lon_dict = {}
#latlon = [(location[0].latitude, location[0].longitude, city_name) for city_name, location in locations if location is not None]
mapit = folium.Map( location=[ 45, 10 ] , zoom_start=6)
for city_name, location in locations:
    if city_name in name_to_location.keys():
        print(city_name)
        latitude, longitude = name_to_location.get(city_name,(0,0))
    else:        
        latitude, longitude = location.latitude, location.longitude
    lat_lon_dict[city_name] = [latitude, longitude]
    folium.Marker( location=[latitude, longitude ], popup=city_name.replace("'","") ).add_to( mapit )
    
mapit

In [None]:
import json 
with open('../../web/website/data/event_location.json', 'w') as fp:
    json.dump(lat_lon_dict, fp)

# Create data for Bar chart race

In [None]:
def give_parallel_points(position):
    points = {1:100,
              2:80,
              3:60,
              4:50,
              5:40,
              9:15}
    return points.get(position, 0)

In [None]:
def get_points(season):
    df_rank = df_rank_93_now
    if (season < 1979):
        df_rank = df_rank_67_79
    elif (season < 1980):
        df_rank = df_rank_79
    elif (season < 1992):
        df_rank = df_rank_80_91
    elif (season < 1993):
        df_rank = df_rank_92
    else:
        df_rank = df_rank_93_now
        
    return df_rank

In [None]:
for wf in ["wcf", "wcm"]:
    df = pd.read_csv(f'../../data/{wf}.csv').drop(['ath_ski'], axis=1)
    df.ath_country = df.ath_country.apply(get_alpha2_country)
    for season in range(1967, 2021):


        df_rank = get_points(season)

        df_season = df[df["season"] == season]
        df_season = df_season.dropna()

        df_season["ath_rank"]=df_season["ath_rank"].apply(lambda x: np.where(x.isdigit(),x,'0')).astype(int)

        events = df_season[["season", "date", "venue", "country", "event"]].drop_duplicates().reset_index(drop="True")
        events["key"] = 0
        skiers = df_season[["ath_name", "ath_country", "ath_id"]].drop_duplicates().reset_index(drop="True")
        skiers["key"] = 0

        cart_prod = events.merge(skiers, how='outer')
        df_season = pd.merge(cart_prod, df_season,  how='left', left_on=["season", "date", "venue", "country", "event", "ath_name", "ath_country", "ath_id"], right_on = ["season", "date", "venue", "country", "event", "ath_name", "ath_country", "ath_id"])
        df_season = df_season.fillna(0, downcast='infer')
        df_season["ath_rank"] = df_season.apply(lambda x : 0 if x.ath_rank > df_rank.ath_rank.max() else x.ath_rank, axis=1)
        df_total = pd.merge(df_season, df_rank, on="ath_rank")
      
        specialty = df_total.groupby(["ath_name", "event"]).points.sum().reset_index()
        specialty = specialty.loc[specialty.groupby('ath_name')['points'].idxmax()].rename(columns={"event": "specialty"}).reset_index().drop(["index", "points"], axis = 1)
        
        df_total = pd.merge(df_total, specialty, on="ath_name")
        
        df_total = df_total.sort_values(by=['date'])
        df_total["value"] = df_total.groupby(['ath_name'])["points"].cumsum()
        df_total = df_total.drop_duplicates(subset=["date", "ath_name"], keep="first")
        df_total.rename(columns={"ath_name": "name"})[["date", "name", "ath_country", "value", "specialty"]].to_csv(f"../../web/website/data/rankings/{wf}_ranking_{season}.csv", index=False,)
        
        

        df_rank = get_points(season)




        df_season = df[df["season"] == season]
        df_season = df_season.dropna()

        for event in set(df_season.event.tolist()):
            df_season_event = df_season[df_season["event"] == event]

            df_season_event["ath_rank"]=df_season_event["ath_rank"].apply(lambda x: np.where(x.isdigit(),x,'0')).astype(int)

            events = df_season_event[["season", "date", "venue", "country", "event"]].drop_duplicates().reset_index(drop="True")
            events["key"] = 0
            skiers = df_season_event[["ath_name", "ath_country", "ath_id"]].drop_duplicates().reset_index(drop="True")
            skiers["key"] = 0

            cart_prod = events.merge(skiers, how='outer')

            df_season_event = pd.merge(cart_prod, df_season_event,  how='left', left_on=["season", "date", "venue", "country", "event", "ath_name", "ath_country", "ath_id"], right_on = ["season", "date", "venue", "country", "event", "ath_name", "ath_country", "ath_id"])
            df_season_event = df_season_event.fillna(0, downcast='infer')
            df_season_event["ath_rank"] = df_season_event.apply(lambda x : 0 if x.ath_rank > df_rank.ath_rank.max() else x.ath_rank, axis=1)
        
            df_total = pd.merge(df_season_event, df_rank, on="ath_rank")
            
            df_total = pd.merge(df_total, specialty, on="ath_name")
        
            df_total = df_total.sort_values(by=['date'])
            

            df_total["value"] = df_total.groupby(['ath_name'])["points"].cumsum()
            df_total = df_total.drop_duplicates(subset=["date", "ath_name"], keep="first")
            df_total.rename(columns={"ath_name": "name"})[["date", "name", "ath_country", "value", "specialty"]].to_csv(f"../../web/website/data/rankings/{wf}_{event}_ranking_{season}.csv", index=False,)
            


# Find Flags

In [None]:
from PIL import Image
import requests
from io import BytesIO
for country in set(df.ath_country.tolist()):
    if country != "":
        response = requests.get(f"https://www.countryflags.io/{country}/flat/64.png")
        img = Image.open(BytesIO(response.content))
        display(img)