# Understanding of how two reviewers of round 2 ranked candidates similar.

Code: Python
Packages: Pandas, numpy, Scipy
Stat techniques: Rankdata, Spearman
Visualization: Bokeh with Hoover Tool

This code reads the selection data and takes reviewer 1 and reviewer 2 ranks columns. There are couple of issues with the data.

a. The rank data has ties.
b. those candiadates were not selected by reviwers has rank 0.

In order to find ranking similarity, the spearman ranking coefficient requires Tie rank data to be taken care. This is something has been achieved by using Rank Data function provides by scipy. Once it is handled and a manual assignment of missing ranking was passed to Scipy Spearman ranking coefficent calculation.

Finally that is plotted using bokeh

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from collections import Counter
from bokeh.io import output_notebook, show
#import bokeh.charts
from bokeh.plotting import figure
pd.set_option('display.max_columns',100)
from scipy.stats import rankdata, spearmanr
from bokeh.core.properties import field
#from bokeh.charts import Scatter, output_notebook, show
from bokeh.models import (
    ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label,
    CategoricalColorMapper,
)

In [2]:
def select_rows_required(): 
    data = pd.read_excel('input/Selection.xlsx');
    rank_data = data[['UniqueAppID','Position', 'Round2 GHC Semifinalist Reviewer1', 'Round2 Total Score Reviewer1', 'Round2 Is this applicant moving on as a GHC Semi-Finalist Reviewer1', 'Round2 Total Score Reviewer2', 'Round2 Is this applicant moving on as a GHC Semi-Finalist Reviewer2', 'Round2 GHC Semifinalist Reviewer2']];
    rank_data=rank_data.rename(columns= {'UniqueAppID':'appl_id','Position':'position','Round2 GHC Semifinalist Reviewer1':'reviewer_1_rank','Round2 Total Score Reviewer1':'reviewer_1_score','Round2 Is this applicant moving on as a GHC Semi-Finalist Reviewer1':'reviewer1_decn','Round2 Total Score Reviewer2':'reviewer_2_score','Round2 Is this applicant moving on as a GHC Semi-Finalist Reviewer2':'reviewer2_decn', 'Round2 GHC Semifinalist Reviewer2':'reviewer_2_rank' });
    rank_data = rank_data.dropna(axis=0, how='any');
    rank_data = rank_data.drop(rank_data[(rank_data.reviewer_1_rank == 0.0) & (rank_data.reviewer_2_rank == 0.0)].index);
    return rank_data;

In [3]:
def find_unique_positions(rank_data):
    positions = rank_data.position.unique();
    return positions;

In [4]:
def assign_rank_to_unranked(rank_data, positions):
    
    temp=rank_data[rank_data['position'] == positions]
    
    temp_before = temp
    for index, row in temp.iterrows():
        max_val = temp['reviewer_1_rank'].max();
        if row['reviewer_1_rank'] == 0.0:
            temp.set_value(index,'reviewer_1_rank',(max_val+1.0))

    for indx, rows in temp.iterrows():
        max_val = temp['reviewer_2_rank'].max();
        if rows['reviewer_2_rank'] == 0.0:
            temp.set_value(indx,'reviewer_2_rank',(max_val+1.0))
            
    return temp;

In [5]:
def find_rank_corelation(temp):
    spearman_1_rank = rankdata(temp.reviewer_1_rank, method='average')
    spearman_2_rank = rankdata(temp.reviewer_2_rank, method='average')
    
    match_degree =  spearmanr(spearman_1_rank, spearman_2_rank)
    return match_degree;

In [6]:
def build_dataframe_match_data(prev_data, position, match_degree_correlation, match_degree_pvalue):
    data = [[position, match_degree_correlation, match_degree_pvalue]]
    new_df  = pd.DataFrame(data,columns=['position','match_degree_correlation','match_degree_pvalue'])
    #new_df = pd.DataFrame(np.array([[position, match_degree.correlation, match_degree.pvalue]]), columns=['position','match_degree_correlation','match_degree_pvalue']).append(new_df, ignore_index=True)
    data_frame = pd.concat([prev_data, new_df]) 
    return data_frame;

In [7]:
def final_data_prep(rank_data, positions):
    prev_data = pd.DataFrame(columns=['position','match_degree_correlation','match_degree_pvalue'])

    for each_pos in positions:
        temp = assign_rank_to_unranked(rank_data, each_pos)
        match_degree = find_rank_corelation(temp)
        match_degree_correlation = match_degree.correlation
        match_degree_pvalue = match_degree.pvalue
        data = build_dataframe_match_data(prev_data, each_pos, match_degree_correlation, match_degree_pvalue)
        prev_data = data;
    return data;

In [8]:
def visualize_matching_data(final_data):
    
    mySource = ColumnDataSource(data=dict(x=final_data.match_degree_correlation, y=final_data.match_degree_pvalue,desc=final_data.position))
    p = figure(x_axis_label='Maching Degree Correlation ', y_axis_label='p-value', tools=["hover"])
    p.circle(x='x'  , y='y' , source=mySource)
    hover = HoverTool(tooltips=[('position', '@desc')],show_arrow=False, point_policy='follow_mouse')
    p.add_tools(hover)
    output_notebook()
    show(p);

In [9]:
rank_data = select_rows_required();

In [10]:
positions = find_unique_positions(rank_data);

In [11]:
final_data = final_data_prep(rank_data, positions);

  if __name__ == '__main__':
  


In [12]:
visualize_matching_data(final_data)