In [None]:
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd
import seaborn as sns
import networkx as nx
import json

In [None]:
def parseCSV(isMale=True):
    gender = 'm' if isMale else 'f'
    df = pd.read_csv(f'../../data/wc{gender}.csv')
    df = df.replace(np.nan, '', regex=True)
    df.date = pd.to_datetime(df.date, format='%Y-%m-%d')
    df['gender'] = gender
    return df

In [None]:
def remove_cancelled_race(df):
    return df[df['ath_name']!='']

In [None]:
def compute_matrix(season, df):
    season_data = df[df['season'] == season]
    athletes = season_data['ath_name'].unique()
    number_athletes = len(athletes)

    athlete_index = {}
    reverse_index = {}
    i = 0
    for athlete in athletes:
        athlete_index[athlete] = i
        reverse_index[i]=athlete
        i+=1

    matrix = np.zeros((number_athletes, number_athletes), dtype=np.int32)

    races = season_data[['venue','event','date']].drop_duplicates()
    races = races.reset_index()
    for i, row in races.iterrows():
        race = season_data[(season_data['venue']==races.iloc[i]['venue'])&(season_data['event']==races.iloc[i]['event'])&(season_data['date']==races.iloc[i]['date'])]

        for athlete1 in race['ath_name']:
            for athlete2 in race['ath_name']:
                if athlete1 != athlete2:
                    matrix[athlete_index[athlete1],athlete_index[athlete2]] += 1

    return matrix, reverse_index

In [None]:
def get_winner_ranking(ranking,year, isMale=True):
    gender = 'm' if isMale else 'f'
    ranking = '_'+ranking+'_' if ranking != 'Overall' else '_'
    rankings = pd.read_csv(f'../website/data/rankings/wc{gender}{ranking}ranking_{year}.csv')
    return rankings[rankings['date'] ==rankings['date'].max()].sort_values('value', ascending=False).iloc[0]['name']

def get_winners(year, isMale=True, verbose=False):
    rankings = ['Parallel', 'Super G','Combined',  'Giant Slalom',  'Slalom','Downhill' ,'Overall']
    
    winners={}
    for ranking in rankings:
        try:
            winners[get_winner_ranking(ranking, year, isMale)] = ranking 
        except:
            if(verbose):
                print('not found : '+ranking+', '+str(year)+', '+str(isMale))
    return winners

In [None]:
def graph_to_json(graph, reverse_index,winners,filename):
    groups = {'Overall':1,'Downhill':2,'Super G':3,'Giant Slalom':4,'Slalom':5,'Combined':6,'Parallel':7 }
    edges = list(graph.edges)
    nodes = list(graph.nodes)

    links_json = []
    for edge in edges:
        links_json.append({"source":edge[0], "target":edge[1], "value":1})

    nodes_json = []
    for node in nodes:
        group = 0
        name = reverse_index[node]
        if(name in winners):
            group = groups[winners[name]]
        nodes_json.append({"id":str(node),"name":name, "group":group})

    graph = {"nodes":nodes_json, "links":links_json}
    with open(filename, "w") as json_file:
        json.dump(graph, json_file)

In [None]:
def drop_node_degree_zero(graph):
    to_remove = [node for node, degree in dict(graph.degree()).items() if degree == 0]
    graph.remove_nodes_from(to_remove)
    return graph

In [None]:
threshold_change_years = [1979,1992,2001]
thresholds = [2,3,4,10]
MALE = False
PATH = "./data/wc"

if MALE:
    PATH += "m"
else:
    PATH += "f"
PATH += '_graph_'
    
df = remove_cancelled_race(parseCSV(MALE))
seasons = list(df['season'].unique())

threshold_index = 0
for season in seasons:
    if(season in threshold_change_years):
        threshold_index+=1
    winners = get_winners(season, MALE)
    matrix, reverse_index = compute_matrix(season, df)
    adjacency = matrix.copy()
    adjacency[adjacency <=thresholds[threshold_index]] = 0
    adjacency[adjacency > thresholds[threshold_index]] = 1
    graph=nx.from_numpy_matrix(adjacency)
    graph=drop_node_degree_zero(graph)
    graph_to_json(graph, reverse_index,winners,PATH+str(season)+".json")

In [None]:
df = remove_cancelled_race(parseCSV(True))
seasons = list(df['season'].unique())
num_athletes = []
for season in seasons:
    season_data = df[df['season'] == season]
    athletes = season_data['ath_name'].unique()
    number_athletes = len(athletes)
    num_athletes.append(number_athletes)


fig, ax = plt.subplots(figsize=(25,11))
ax = sns.barplot(ax=ax,x=seasons,color=[0,134.0/255,1], y=num_athletes)
ax.set_title("Athletes per season")
ax.set_xlabel("Seasons")
ax.set_ylabel("# athletes")
plt.show()
fig.savefig('threshold.png')

In [None]:
df

In [None]:
matrix = compute_matrix(2019, dfm)

In [None]:
G=nx.from_numpy_matrix(matrix)
nx.draw_kamada_kawai(G, with_labels=True,  alpha = 0.8)

In [None]:
adjacency = matrix.copy()
adjacency[adjacency <=10] = 0
adjacency[adjacency > 10] = 1

In [None]:
G2=nx.from_numpy_matrix(adjacency)
nx.draw_kamada_kawai(G2, with_labels=False,  alpha = 0.5)

In [None]:
matrix_1980 = compute_matrix(1980, dfm)
adjacency_1980 = matrix_1980.copy()
adjacency_1980[adjacency_1980 <=2] = 0
adjacency_1980[adjacency_1980 > 2] = 1
G3=nx.from_numpy_matrix(adjacency_1980)
nx.draw_kamada_kawai(G3, with_labels=False,  alpha = 0.5)

In [None]:
 list(df['season'].unique())