In [1]:
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None

import calendar, datetime, math
import seaborn as sns

# This file contains all the data processing needed for this model
import data_processing

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import dcg_score, ndcg_score
from scipy.stats import kendalltau

In [2]:
"""
Dataset sourced from https://www.kaggle.com/brkurzawa/ifsc-sport-climbing-competition-results
"""
cr_raw = pd.read_csv('ifsc_climbing_data/combined_results.csv')
br_raw = pd.read_csv('ifsc_climbing_data/boulder_results.csv')
lr_raw = pd.read_csv('ifsc_climbing_data/lead_results.csv')
sr_raw = pd.read_csv('ifsc_climbing_data/speed_results.csv')

In [3]:
# The first dataset we will process is for all the individual disipline 
# events before the 2019 World Championship Combined Event in Hachioji.

comp_date = datetime.datetime(2019, 8, 11)
comp_name = 'IFSC Climbing World Championships Combined - Hachioji (JPN) 2019'

pred_aggs = data_processing.process_data(br_raw, lr_raw, sr_raw, cr_raw, comp_date, comp_name)

In [4]:
pred_aggs[pred_aggs.gender == 'M'][['first', 'last', 'rank', 'gender', 
                                    'lead_avg_rank', 'boulder_avg_rank',
                                    'speed_avg_rank'
                                   ]].head()

Unnamed: 0,first,last,rank,gender,lead_avg_rank,boulder_avg_rank,speed_avg_rank
20,Tomoa,NARASAKI,1,M,12.166667,4.166667,37.5
21,Jakob,SCHUBERT,2,M,4.6,14.0,48.727273
22,Rishat,KHAIBULLIN,3,M,57.75,70.333333,23.5
23,Kai,HARADA,4,M,21.666667,14.75,44.8
24,Meichi,NARASAKI,5,M,15.5,18.142857,35.166667


In [5]:
pred_aggs['rank_score'] = (20 - pred_aggs['rank'])/20

In [6]:
max_avg_rank = pred_aggs['avg_rank_multi'].max()
pred_aggs['avg_rank_multi_score'] = (max_avg_rank - pred_aggs['avg_rank_multi'])/max_avg_rank

In [7]:
pred_aggs[pred_aggs.gender == 'M'][['first', 'last', 'rank', 'gender', 
                                    'lead_avg_rank', 'boulder_avg_rank',
                                    'speed_avg_rank', 'avg_rank_multi', 
                                    'rank_score', 'avg_rank_multi_score'
                                   ]].sort_values('avg_rank_multi').head()

Unnamed: 0,first,last,rank,gender,lead_avg_rank,boulder_avg_rank,speed_avg_rank,avg_rank_multi,rank_score,avg_rank_multi_score
37,Adam,ONDRA,18,M,2.333333,6.833333,72.0,1148.0,0.1,0.991463
20,Tomoa,NARASAKI,1,M,12.166667,4.166667,37.5,1901.041667,0.95,0.985863
21,Jakob,SCHUBERT,2,M,4.6,14.0,48.727273,3138.036364,0.9,0.976664
25,Kokoro,FUJII,6,M,12.142857,8.428571,38.083333,3897.712585,0.7,0.971015
39,Jongwon,CHON,20,M,27.0,6.769231,53.714286,9817.318681,0.0,0.926994


In [8]:
pred_aggs[pred_aggs.gender == 'F'][['first', 'last', 'rank', 'gender', 
                                    'lead_avg_rank', 'boulder_avg_rank',
                                    'speed_avg_rank', 'avg_rank_multi', 
                                    'rank_score', 'avg_rank_multi_score'
                                   ]].sort_values('avg_rank_multi').head()

Unnamed: 0,first,last,rank,gender,lead_avg_rank,boulder_avg_rank,speed_avg_rank,avg_rank_multi,rank_score,avg_rank_multi_score
0,Janja,GARNBRET,1,F,2.181818,1.1,34.888889,83.733333,0.95,0.999377
1,Akiyo,NOGUCHI,2,F,5.375,2.307692,35.461538,439.859467,0.9,0.996729
4,Miho,NONAKA,5,F,14.142857,2.727273,24.111111,930.0,0.75,0.993084
9,Jessica,PILZ,10,F,3.0,10.0,41.777778,1253.333333,0.5,0.99068
5,Ai,MORI,6,F,3.5,17.0,63.666667,3788.166667,0.7,0.971829


In [9]:
mens_results = np.asarray([list(pred_aggs[pred_aggs.gender == 'M']['rank_score'].values)])
mens_avg_pred = np.asarray([list(pred_aggs[pred_aggs.gender == 'M']['avg_rank_multi_score'].values)])

print("Men's NDCG All: ", ndcg_score(mens_results, mens_avg_pred))
print("Men's NDCG Top 3: ", ndcg_score(mens_results, mens_avg_pred, 3))
print("Men's Kendall: ", kendalltau(mens_results, mens_avg_pred)[0])

Men's NDCG All:  0.8077067072045893
Men's NDCG Top 3:  0.59160052907239
Men's Kendall:  0.052631578947368425


In [10]:
womens_results = np.asarray([list(pred_aggs[pred_aggs.gender == 'F']['rank_score'].values)])
womens_avg_pred = np.asarray([list(pred_aggs[pred_aggs.gender == 'F']['avg_rank_multi_score'].values)])

print("Womens's NDCG All: ", ndcg_score(womens_results, womens_avg_pred))
print("Womens's NDCG Top 3: ", ndcg_score(womens_results, womens_avg_pred, 3))
print("Womens's Kendall: ", kendalltau(womens_results, womens_avg_pred)[0])

Womens's NDCG All:  0.9770159311401715
Womens's NDCG Top 3:  0.974264436127282
Womens's Kendall:  0.6105263157894737


In [11]:
feature_columns = ['lead_avg_rank', 'boulder_avg_rank', 'speed_avg_rank']
X = pred_aggs[feature_columns]
y = pred_aggs['rank_score']
X_train, X_test, y_train, y_test  = train_test_split(X, y, random_state = 1)

reg = LinearRegression().fit(X_train, y_train)

pred_LR = np.asarray([list(reg.predict(X_test))])

print("NDCG: ", ndcg_score(pred_LR , np.asarray([list(y_test.values)])))
print("Kendall: ", kendalltau(pred_LR, np.asarray([list(y_test.values)]))[0])
print("Number Test Examples: ", y_test.shape[0])

NDCG:  0.9789542166568036
Kendall:  0.4319297483312999
Number Test Examples:  10


In [12]:
X_train, X_test, y_train, y_test  = train_test_split(X, y, random_state = 10)

reg = LinearRegression().fit(X_train, y_train)

pred_LR = np.asarray([list(reg.predict(X_test))])

print("NDCG: ", ndcg_score(pred_LR , np.asarray([list(y_test.values)])))
print("Kendall: ", kendalltau(pred_LR, np.asarray([list(y_test.values)]))[0])
print("Number Test Examples: ", y_test.shape[0])

NDCG:  0.8378307274224305
Kendall:  0.08989331499509895
Number Test Examples:  10


In [13]:
date_train = datetime.datetime(2018, 9, 16)
comp_name_train = 'IFSC Climbing World Championships Combined - Innsbruck (AUT) 2018'

pred_aggs_train = data_processing.process_data(br_raw, lr_raw, sr_raw, cr_raw, date_train, comp_name)
pred_aggs_train['rank_score'] = (20 - pred_aggs_train['rank'])/20

pred_aggs_test = pred_aggs

In [14]:
reg_full_comp = LinearRegression().fit(pred_aggs_train[feature_columns], pred_aggs_train['rank_score'])

pred_aggs_test['pred'] = reg.predict(pred_aggs_test[feature_columns])

In [15]:
mens_results_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'M']['rank_score'].values)])
mens_pred_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'M']['pred'].values)])

print("Men's NDCG: ", ndcg_score(mens_results_LR, mens_pred_LR))
print("Men's NDCG Top 3: ", ndcg_score(mens_results_LR, mens_pred_LR, 3))
print("Men's Kendall: ", kendalltau(mens_results_LR, mens_pred_LR)[0])

Men's NDCG:  0.9422327714247841
Men's NDCG Top 3:  0.8422170657278147
Men's Kendall:  0.4631578947368421


In [16]:
womens_results_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'F']['rank_score'].values)])
womens_pred_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'F']['pred'].values)])

print("Womens's NDCG: ", ndcg_score(womens_results_LR, womens_pred_LR))
print("Womens's NDCG Top 3: ", ndcg_score(womens_results_LR, womens_pred_LR, 3))
print("Womens's Kendall: ", kendalltau(womens_results_LR, womens_pred_LR)[0])

Womens's NDCG:  0.854810270718025
Womens's NDCG Top 3:  0.5382981347055015
Womens's Kendall:  0.38947368421052636


In [17]:
full_comp_nn = MLPRegressor(random_state=1, max_iter=1000).fit(pred_aggs_train[feature_columns], pred_aggs_train['rank_score'])

pred_aggs_test['pred_nn'] = full_comp_nn.predict(pred_aggs_test[feature_columns])

In [18]:
mens_results_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'M']['rank_score'].values)])
mens_pred_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'M']['pred_nn'].values)])

print("Men's NDCG: ", ndcg_score(mens_results_LR, mens_pred_LR))
print("Men's NDCG Top 3: ", ndcg_score(mens_results_LR, mens_pred_LR, 3))
print("Men's Kendall: ", kendalltau(mens_results_LR, mens_pred_LR)[0])

womens_results_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'F']['rank_score'].values)])
womens_pred_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'F']['pred_nn'].values)])

print("Womens's NDCG: ", ndcg_score(womens_results_LR, womens_pred_LR))
print("Womens's NDCG Top 3: ", ndcg_score(womens_results_LR, womens_pred_LR, 3))
print("Womens's Kendall: ", kendalltau(womens_results_LR, womens_pred_LR)[0])

Men's NDCG:  0.6938132494686169
Men's NDCG Top 3:  0.21997313733240445
Men's Kendall:  -0.2947368421052632
Womens's NDCG:  0.9611316063642132
Womens's NDCG Top 3:  0.9518984232904415
Womens's Kendall:  0.5684210526315789


In [19]:
full_comp_t = DecisionTreeRegressor().fit(pred_aggs_train[feature_columns], pred_aggs_train['rank_score'])

pred_aggs_test['pred_t'] = full_comp_t.predict(pred_aggs_test[feature_columns])

In [20]:
mens_results_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'M']['rank_score'].values)])
mens_pred_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'M']['pred_t'].values)])

print("Men's NDCG: ", ndcg_score(mens_results_LR, mens_pred_LR))
print("Men's Kendall: ", kendalltau(mens_results_LR, mens_pred_LR)[0])

womens_results_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'F']['rank_score'].values)])
womens_pred_LR = np.asarray([list(pred_aggs_test[pred_aggs_test.gender == 'F']['pred_t'].values)])

print("Womens's NDCG: ", ndcg_score(womens_results_LR, womens_pred_LR))
print("Womens's Kendall: ", kendalltau(womens_results_LR, womens_pred_LR)[0])

Men's NDCG:  0.7543754165394441
Men's Kendall:  0.032626046344280464
Womens's NDCG:  0.954841916359237
Womens's Kendall:  0.33713581222423145
