# Normalize on all at once

In [3]:
import numpy as np
import pandas as pd
import scipy.stats as stats

In [4]:
panels = [2, 3]
cols = ["technology", "design", "presentation",
        "collaboration", "implementation"]


all_panels_data = pd.DataFrame()

for panel in panels:
    panel_data = pd.read_csv(f'marksheets/panel{panel:02}.csv')
    panel_data = panel_data.dropna(how="all")
    panel_data = panel_data.ffill()
    panel_data['panel'] = panel
    panel
    all_panels_data = pd.concat([all_panels_data, panel_data])

all_panels_data.columns = all_panels_data.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
all_panels_data = all_panels_data.reset_index(drop=True)
all_panels_data

Unnamed: 0,team_no,judges,technology,design,presentation,collaboration,implementation,total,collective_total,panel
0,110.0,Smt. Satya Kiranmai,2.0,2.0,2.0,2.0,2.0,2.0,1.7,2
1,110.0,Mr.Madhukar,2.0,1.0,2.0,1.0,1.0,1.4,1.7,2
2,67.0,Smt. Satya Kiranmai,4.0,5.0,4.0,4.0,4.0,4.2,4.2,2
3,67.0,Mr.Madhukar,4.0,4.0,4.0,5.0,4.0,4.2,4.2,2
4,123.0,Smt. Satya Kiranmai,2.0,1.0,1.0,1.0,1.0,1.2,1.2,2
5,123.0,Mr.Madhukar,2.0,1.0,1.0,1.0,1.0,1.2,1.2,2
6,147.0,Smt. Satya Kiranmai,3.0,2.0,2.0,3.0,3.0,2.6,2.6,2
7,147.0,Mr.Madhukar,3.0,2.0,3.0,2.0,3.0,2.6,2.6,2
8,186.0,Smt. Satya Kiranmai,4.0,4.0,4.0,4.0,5.0,4.2,4.4,2
9,186.0,Mr.Madhukar,4.0,5.0,4.0,5.0,5.0,4.6,4.4,2


In [5]:
all_teams_raw_average_scores = all_panels_data.groupby('team_no')[cols].mean().reset_index()
all_teams_raw_average_scores["total"] = all_teams_raw_average_scores[cols].mean(axis=1)
all_teams_raw_average_scores_sorted = all_teams_raw_average_scores.sort_values(
    by="total", ascending=False)
all_teams_raw_average_scores_sorted

Unnamed: 0,team_no,technology,design,presentation,collaboration,implementation,total
20,186.0,4.0,4.5,4.0,4.5,5.0,4.4
1,50.0,4.0,4.5,4.0,5.0,4.5,4.4
2,67.0,4.0,4.5,4.0,4.5,4.0,4.2
5,97.0,4.0,3.5,3.0,3.5,4.0,3.6
0,15.0,3.0,3.5,3.5,4.0,3.5,3.5
8,117.0,3.0,3.0,4.0,4.0,3.0,3.4
17,171.0,3.0,3.5,3.5,3.5,3.5,3.4
18,177.0,3.0,3.5,3.0,3.0,3.0,3.1
4,95.0,3.0,3.0,3.0,3.5,2.5,3.0
6,103.0,3.0,3.0,3.0,3.0,3.0,3.0


In [6]:
all_panels_data_normalized = all_panels_data.copy()
# Normalize the marks parameter wise
for col in cols:
    all_panels_data_normalized[col] = stats.zscore(all_panels_data_normalized[col])

all_panels_data_normalized["total"] = all_panels_data_normalized[cols].sum(axis=1)
all_panels_data_normalized

Unnamed: 0,team_no,judges,technology,design,presentation,collaboration,implementation,total,collective_total,panel
0,110.0,Smt. Satya Kiranmai,-0.497947,-0.542897,-0.61667,-0.571241,-0.501859,-2.730613,1.7,2
1,110.0,Mr.Madhukar,-0.497947,-1.357242,-0.61667,-1.345179,-1.312555,-5.129593,1.7,2
2,67.0,Smt. Satya Kiranmai,1.403304,1.900138,1.301859,0.976637,1.119533,6.701471,4.2,2
3,67.0,Mr.Madhukar,1.403304,1.085793,1.301859,1.750576,1.119533,6.661065,4.2,2
4,123.0,Smt. Satya Kiranmai,-0.497947,-1.357242,-1.575934,-1.345179,-1.312555,-6.088857,1.2,2
5,123.0,Mr.Madhukar,-0.497947,-1.357242,-1.575934,-1.345179,-1.312555,-6.088857,1.2,2
6,147.0,Smt. Satya Kiranmai,0.452679,-0.542897,-0.61667,0.202698,0.308837,-0.195353,2.6,2
7,147.0,Mr.Madhukar,0.452679,-0.542897,0.342594,-0.571241,0.308837,-0.010028,2.6,2
8,186.0,Smt. Satya Kiranmai,1.403304,1.085793,1.301859,0.976637,1.930229,6.697822,4.4,2
9,186.0,Mr.Madhukar,1.403304,1.900138,1.301859,1.750576,1.930229,8.286106,4.4,2


In [7]:
all_teams_scores_normalized = all_panels_data_normalized.groupby("team_no")[cols].mean().reset_index()
all_teams_scores_normalized["total"] = all_teams_scores_normalized[cols].mean(axis=1)
all_teams_scores_normalized["total_cdf_scaled"] = stats.norm.cdf(all_teams_scores_normalized["total"]) * 5
all_teams_scores_normalized

Unnamed: 0,team_no,technology,design,presentation,collaboration,implementation,total,total_cdf_scaled
0,15.0,0.452679,0.678621,0.822226,0.976637,0.714185,0.72887,3.834796
1,50.0,1.403304,1.492966,1.301859,1.750576,1.524881,1.494717,4.662529
2,67.0,1.403304,1.492966,1.301859,1.363607,1.119533,1.336254,4.546334
3,77.0,-0.497947,0.271448,-1.575934,-0.571241,-0.501859,-0.575106,1.413048
4,95.0,0.452679,0.271448,0.342594,0.589668,-0.096511,0.311976,3.112352
5,97.0,1.403304,0.678621,0.342594,0.589668,1.119533,0.826744,3.979044
6,103.0,0.452679,0.271448,0.342594,0.202698,0.308837,0.315651,3.119332
7,110.0,-0.497947,-0.950069,-0.61667,-0.95821,-0.907207,-0.786021,1.079639
8,117.0,0.452679,0.271448,1.301859,0.976637,0.308837,0.662292,3.73054
9,122.0,-1.448572,-1.357242,-0.61667,-0.95821,-1.312555,-1.13865,0.637123


In [8]:
all_panels_data_normalized_sorted = all_teams_scores_normalized.sort_values("total_cdf_scaled", ascending=False)
all_panels_data_normalized_sorted

Unnamed: 0,team_no,technology,design,presentation,collaboration,implementation,total,total_cdf_scaled
20,186.0,1.403304,1.492966,1.301859,1.363607,1.930229,1.498393,4.664922
1,50.0,1.403304,1.492966,1.301859,1.750576,1.524881,1.494717,4.662529
2,67.0,1.403304,1.492966,1.301859,1.363607,1.119533,1.336254,4.546334
5,97.0,1.403304,0.678621,0.342594,0.589668,1.119533,0.826744,3.979044
0,15.0,0.452679,0.678621,0.822226,0.976637,0.714185,0.72887,3.834796
8,117.0,0.452679,0.271448,1.301859,0.976637,0.308837,0.662292,3.73054
17,171.0,0.452679,0.678621,0.822226,0.589668,0.714185,0.651476,3.713151
18,177.0,0.452679,0.678621,0.342594,0.202698,0.308837,0.397086,3.271739
6,103.0,0.452679,0.271448,0.342594,0.202698,0.308837,0.315651,3.119332
19,182.0,0.452679,0.271448,0.342594,0.202698,0.308837,0.315651,3.119332


In [9]:
all_teams_raw_average_scores_sorted = all_teams_raw_average_scores.sort_values("total", ascending=False)
all_teams_raw_average_scores_sorted

Unnamed: 0,team_no,technology,design,presentation,collaboration,implementation,total
20,186.0,4.0,4.5,4.0,4.5,5.0,4.4
1,50.0,4.0,4.5,4.0,5.0,4.5,4.4
2,67.0,4.0,4.5,4.0,4.5,4.0,4.2
5,97.0,4.0,3.5,3.0,3.5,4.0,3.6
0,15.0,3.0,3.5,3.5,4.0,3.5,3.5
8,117.0,3.0,3.0,4.0,4.0,3.0,3.4
17,171.0,3.0,3.5,3.5,3.5,3.5,3.4
18,177.0,3.0,3.5,3.0,3.0,3.0,3.1
4,95.0,3.0,3.0,3.0,3.5,2.5,3.0
6,103.0,3.0,3.0,3.0,3.0,3.0,3.0


In [10]:
# Loading data from previously merged results
all_teams_scores_indiv_norm_avg_cdf_scaled = pd.read_csv("merged_results/merged_team_scores.csv")
all_teams_scores_indiv_norm_avg_cdf_scaled

Unnamed: 0,team no,raw_avg_norm,indiv_norm_avg,raw_avg_norm_cdf_scaled,indiv_norm_avg_cdf_scaled,raw_avg
0,15,1.295665,1.224383,4.512273,4.44798,3.5
1,50,1.163506,1.123437,4.38844,4.34687,4.4
2,171,1.17784,1.123382,4.402849,4.346811,3.4
3,117,1.185446,1.117854,4.410397,4.340926,3.4
4,186,1.149096,1.109178,4.373709,4.331616,4.4
5,67,0.973601,0.949209,4.174363,4.143715,4.2
6,182,0.767389,0.740876,3.892874,3.853078,3.0
7,95,0.763586,0.723058,3.887215,3.825889,3.0
8,97,0.4255,0.430124,3.323819,3.332236,3.6
9,137,0.345529,0.324149,3.175758,3.135437,2.6


In [11]:
print(all_teams_raw_average_scores_sorted["team_no"].astype(int).tolist())
print(all_teams_raw_average_scores_sorted["total"].tolist())

[186, 50, 67, 97, 15, 117, 171, 177, 95, 103, 182, 137, 147, 77, 110, 163, 166, 122, 123, 135, 145]
[4.4, 4.4, 4.2, 3.6, 3.5, 3.4, 3.4, 3.1, 3.0, 3.0, 3.0, 2.6, 2.6, 2.0, 1.7, 1.5, 1.5, 1.3, 1.2, 1.0, 1.0]


In [12]:
print(all_panels_data_normalized_sorted["team_no"].astype(int).tolist())
print(all_panels_data_normalized_sorted["total_cdf_scaled"].tolist())

[186, 50, 67, 97, 15, 117, 171, 177, 103, 182, 95, 147, 137, 77, 110, 166, 163, 122, 123, 135, 145]
[4.664921894921788, 4.66252922453363, 4.546333955624724, 3.9790444393499733, 3.8347963457490106, 3.7305395233601657, 3.7131513021766787, 3.2717394560195903, 3.1193319302856692, 3.1193319302856692, 3.1123522180891072, 2.459035376264875, 2.4343114804587787, 1.4130482538215934, 1.0796385119345142, 0.8270419893207898, 0.8179357075012614, 0.6371231228034385, 0.5582770872410281, 0.39790428362205865, 0.39790428362205865]


In [14]:
print(all_teams_scores_indiv_norm_avg_cdf_scaled["team no"].astype(int).tolist())

[15, 50, 171, 117, 186, 67, 182, 95, 97, 137, 177, 103, 77, 147, 166, 163, 122, 135, 145, 110, 123]
