In [2]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

In [9]:
def compare_lists(list1, list2):
    # return number of schools in both lists
    val =  abs(stats.spearmanr(list1, list2)[0])
    if np.isnan(val):
        return 0
    return val
adj = pd.read_csv("./running_all_d3.csv", header=0)
labels = adj.columns.tolist()
A = adj.values
G = nx.DiGraph()
for i, u in enumerate(labels):
    for j, v in enumerate(labels):
        w = A[i, j]
        if w != 0:
            G.add_edge(u, v, weight=w)
def springRank(G, nodelist, alpha=0.1, change_adj = False,change_minus_one=False):
    if change_adj:
        adj = pd.read_csv("./running_all_d3_diff.csv", header=0)
        A = adj.values
    else: #just use what we already have 
        A = nx.to_numpy_array(G, nodelist=nodelist, weight='weight')
    k_in = np.sum(A, axis=0)
    k_out = np.sum(A, axis=1)
    new_sub = np.zeros(len(A))
    for i in range(len(A)):
        for j in range(len(A[i])):
                new_sub[i] += (A[j][i] - A[i][j])
    Kin = np.diag(k_in)
    Kout = np.diag(k_out)
    M = Kout + Kin - (A + A.T) + alpha * np.identity(len(nodelist))
    b = (new_sub)
    s = np.linalg.solve(M, b)
    return dict(zip(nodelist, s))
final_rankings = [
    "MIT", "U. of Chicago", "Williams", "NYU", "Johns Hopkins", "Colorado College", "Emory", "Washington and Lee", "SUNY Geneseo", "Washington U.", "Claremont-Mudd-Scripps", "RPI", "Wis.-La Crosse", "Amherst", "Calvin", "Tufts", "St. Olaf", "Carleton", "UC Santa Cruz", "Vassar", "George Fox", "Middlebury", "Connecticut College", "Wesleyan", "Carnegie Mellon", "Wartburg", "Lynchburg", "Trine", "DePauw", "Pomona-Pitzer", "Coast Guard", "Rowan"
]
def ordered(scores):
    pr = dict(zip(labels, scores))
    sorted_list = sorted(pr.items(), key=lambda x: x[1], reverse=True)
    ordered = []
    for i in range(len(final_rankings)):
        ordered.append(sorted_list[i][0])
    return ordered
alpha = 0.1
def do_springrank(G):
    raw_scores = springRank(G, labels, alpha, change_minus_one=True) 
    scores = np.array([ raw_scores[node] for node in labels ])
    return ordered(scores)
def top_k_mse(k, rankings, final_rankings):
    """
    Calculate the mean squared error for the top k rankings.
    """
    mse = compare_lists(rankings[:k], final_rankings[:k])
    return mse
# print(spring_rankings)
springrank_mses = []
spring_rankings = do_springrank(G)
for k in range(1, 31):
    springrank_mse = top_k_mse(k, spring_rankings, final_rankings)
    springrank_mses.append(springrank_mse)
print("SpringRank MSEs for top k rankings:")
print(springrank_mses)

SpringRank MSEs for top k rankings:
[0, 0.9999999999999999, 0.5, 0.39999999999999997, 0.7, 0.7714285714285715, 0.7857142857142859, 0.523809523809524, 0.5, 0.33333333333333326, 0.2545454545454546, 0.2797202797202798, 0.12637362637362637, 0.09010989010989012, 0.07857142857142857, 0.008823529411764706, 0.03921568627450981, 0.07739938080495357, 0.010526315789473684, 0.04360902255639097, 0.03506493506493507, 0.042348955392433656, 0.04545454545454545, 0.13304347826086957, 0.03461538461538462, 0.009230769230769232, 0.030525030525030524, 0.012588943623426383, 0.05714285714285713, 0.06963292547274749]
