In [54]:
import pandas as pd
import numpy as np

### load csv's with team information and all the scores

In [55]:
df_teams = pd.read_csv('sample_data//teams.csv')
df_scores = pd.read_csv('sample_data//scores.csv')

df_teams.head()

Unnamed: 0,teamID,competitorID,competitor_name,competitor_school
0,1,1,a,x
1,1,2,b,x
2,2,3,c,xx
3,2,4,d,xx
4,3,5,e,xxx


In [36]:
df_scores.head()

Unnamed: 0,competitorID,judgeID,score_raw
0,1,1,305
1,1,2,333
2,1,3,328
3,1,4,366
4,1,5,335


### get each judge's average and standard deviation scores

In [37]:
#judge averages for each judge
df_judge_avgs = df_scores.groupby('judgeID')\
                    .mean()\
                    .reset_index()\
                    .drop('competitorID', axis=1)
df_judge_avgs.columns = ['judgeID', 'judge_avg']

df_judge_avgs.head(10)

Unnamed: 0,judgeID,judge_avg
0,1,370.166667
1,2,353.857143
2,3,339.833333
3,4,368.8
4,5,347.8
5,6,337.0


In [38]:
#overall judge average
avg_score = df_scores['score_raw'].mean()

print(avg_score)

353.0882352941176


In [39]:
#judge standard deviations
df_judge_stds = df_scores.groupby('judgeID')\
                    .std()\
                    .reset_index()\
                    .drop('competitorID', axis=1)
df_judge_stds.columns = ['judgeID', 'judge_std']

df_judge_stds.head(10)

Unnamed: 0,judgeID,judge_std
0,1,33.090281
1,2,23.807762
2,3,35.085135
3,4,22.840753
4,5,35.926313
5,6,36.565011


In [40]:
#overall judge standard deviation
std_score = df_scores['score_raw'].std()

print(std_score)

31.672918981228168


### get z-score for each entry in df_scores (i.e. standardize all scores), then calculate final scores

In [41]:
#merge both lists
df_scores_new = df_scores.merge(df_judge_avgs, on='judgeID', how='left')\
                    .merge(df_judge_stds, on='judgeID', how='left')
df_scores_new

Unnamed: 0,competitorID,judgeID,score_raw,judge_avg,judge_std
0,1,1,305,370.166667,33.090281
1,1,2,333,353.857143,23.807762
2,1,3,328,339.833333,35.085135
3,1,4,366,368.8,22.840753
4,1,5,335,347.8,35.926313
5,1,6,302,337.0,36.565011
6,1,1,376,370.166667,33.090281
7,1,2,361,353.857143,23.807762
8,2,3,361,339.833333,35.085135
9,2,4,331,368.8,22.840753


In [44]:
#calculate z-score of each entry based on judge's respective average and standard deviation scores
df_scores_new.loc[:, 'score_zscore'] = (df_scores_new.loc[:, 'score_raw'] - df_scores_new.loc[:, 'judge_avg'])/df_scores_new.loc[:, 'judge_std']

#convert each z-score to a final score
df_scores_new.loc[:,'score_final'] = df_scores_new.loc[:, 'score_zscore']*std_score + avg_score

df_scores_new

Unnamed: 0,competitorID,judgeID,score_raw,judge_avg,judge_std,score_zscore,score_final
0,1,1,305,370.166667,33.090281,-1.96936,290.712863
1,1,2,333,353.857143,23.807762,-0.876065,325.340705
2,1,3,328,339.833333,35.085135,-0.337275,342.405756
3,1,4,366,368.8,22.840753,-0.122588,349.205519
4,1,5,335,347.8,35.926313,-0.356285,341.803655
5,1,6,302,337.0,36.565011,-0.957199,322.770942
6,1,1,376,370.166667,33.090281,0.176285,358.671708
7,1,2,361,353.857143,23.807762,0.300022,362.590814
8,2,3,361,339.833333,35.085135,0.603294,372.196331
9,2,4,331,368.8,22.840753,-1.654937,300.67156


### using transformed scores, get individual and team rankings

In [59]:
# get individual rankings
df_participant_scores = df_scores_new.groupby('competitorID').mean()\
                            .reset_index()\
                            .drop(['judgeID', 'judge_avg','judge_std', 'score_zscore'], axis=1)\

df_participant_scores.head()

Unnamed: 0,competitorID,score_raw,score_final
0,1,338.25,336.687745
1,2,375.75,373.538366
2,3,350.2,349.631279
3,4,348.875,353.35979


In [102]:
#reformat individual rankings table, with participant names
df_participant_scores_final = df_participant_scores.merge(df_teams, on='competitorID', how='left')\
                                    [[ 'competitorID', 'competitor_name', 'competitor_school','score_raw', 'score_final']]\
                                    .sort_values('score_final', ascending=False)

df_participant_scores_final

Unnamed: 0,competitorID,competitor_name,competitor_school,score_raw,score_final
1,2,b,x,375.75,373.538366
3,4,d,xx,348.875,353.35979
2,3,c,xx,350.2,349.631279
0,1,a,x,338.25,336.687745


In [73]:
# get team rankings
df_team_scores = df_scores_new.drop(['judgeID','judge_avg','judge_std', 'score_zscore'], axis=1)\
                    .merge(df_teams, on='competitorID', how='left')\
                    .groupby('teamID').mean()\
                    .reset_index()\
                    .drop('competitorID', axis=1)

df_team_scores.head()

Unnamed: 0,teamID,score_raw,score_final
0,1,357.0,355.113056
1,2,349.611111,351.288395


In [101]:
#reformat df_teams for easier merging later on
df_teams2 = df_teams.drop_duplicates('teamID').merge(df_teams, on='teamID', how='left', suffixes=('_1', '_2'))
df_teams2 = df_teams2.loc[df_teams2['competitorID_1'] != df_teams2['competitorID_2'], :]\
                .reset_index(drop=True)\
                .drop(['competitor_school_1'], axis=1)\
                .rename(columns={'competitor_school_2':'competitor_school'})

df_teams2.head()

Unnamed: 0,teamID,competitorID_1,competitor_name_1,competitorID_2,competitor_name_2,competitor_school
0,1,1,a,2,b,x
1,2,3,c,4,d,xx
2,3,5,e,6,f,xxx
3,4,7,g,8,h,x
4,5,9,i,10,j,xx


In [103]:
df_team_scores_final = df_team_scores.merge(df_teams2, on='teamID', how='left')\
                        [['teamID', 'competitorID_1', 'competitor_name_1', 'competitorID_2', 'competitor_name_2',\
                          'competitor_school', 'score_raw', 'score_final']]\
                        .sort_values('score_final', ascending=False)

df_team_scores_final

Unnamed: 0,teamID,competitorID_1,competitor_name_1,competitorID_2,competitor_name_2,competitor_school,score_raw,score_final
0,1,1,a,2,b,x,357.0,355.113056
1,2,3,c,4,d,xx,349.611111,351.288395


### output all dataframes to csv's

In [140]:
df_scores_new.to_csv('output//scores_full.csv')
df_participant_scores_final.to_csv('output//scores_individual_{}.csv'.format(timestamp))
df_team_scores_final.to_csv('output//scores_team_{}.csv'.format(timestamp))

output//scores_full_2018-12-21_19:37.csv


In [131]:
timestamp = now.strftime("%Y-%m-%d_%H:%M")
'output//scores_full_{0}{1}'.format(timestamp, '.csv')

'output//scores_full_2018-12-21_19:27.csv'