In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('qs_data.csv', encoding='latin1')

In [3]:
df = df[df['year']==2026].pivot_table(index='institution', columns='metric', values='score').reset_index()

In [4]:
df.sample(3)

metric,institution,Academic Reputation,Citations per Faculty,Employer Reputation,Employment Outcomes,Faculty Student Ratio,International Faculty Ratio,International Research Network,International Student Diversity,International Student Ratio,Overall,Sustainability
86,Bandung Institute of Technology (ITB),58.5,5.4,85.3,68.6,54.7,94.7,39.8,13.7,8.0,49.9,59.2
763,Tamkang University,7.8,11.0,9.9,35.6,5.2,8.0,24.6,16.4,11.6,,45.7
683,Satbayev University (previously Kazakh Technic...,40.6,1.7,58.8,33.0,94.2,89.2,25.7,48.5,53.7,42.5,48.7


In [5]:
weights = {
'Academic Reputation': 0.3,
'Citations per Faculty': 0.2,
'Employer Reputation': 0.15,
'Faculty Student Ratio': 0.1,
'Employment Outcomes': 0.05,
'International Faculty Ratio': 0.05,
'International Research Network': 0.05,
'International Student Ratio': 0.05,
'Sustainability': 0.05,
'International Student Diversity': 0.0
}

metric_cols = [
    'Academic Reputation',
    'Citations per Faculty',
    'Employer Reputation',
    'Faculty Student Ratio',
    'Employment Outcomes',
    'International Faculty Ratio',
    'International Research Network',
    'International Student Ratio',
    'Sustainability',
    'International Student Diversity'
]

In [6]:
# We now need the weighted average of all the scores per university.

# Extract the relevant columns
metrics_df = df[metric_cols].copy()

# Create a weight vector in the same order as metric_cols
weight_vector = np.array([weights[col] for col in metric_cols])

# Compute weighted average for each row, handling missing values:
# Adjust weights to only include available values per row

def weighted_average(row, weights_dict):
    values = row.values.astype(float)
    metric_names = row.index
    mask = ~np.isnan(values)

    if mask.sum() == 0:
        return np.nan  # skip if all values are NaN

    # Get the corresponding weights for the non-NaN metrics
    used_weights = np.array([weights_dict[metric] for metric in metric_names[mask]])
    used_weights = used_weights / used_weights.sum()  # normalize weights

    return np.dot(values[mask], used_weights)

# Apply row-wise
df['Weighted Score'] = df[metric_cols].apply(lambda row: weighted_average(row, weights), axis=1)

# View result
print(df[['institution', 'Weighted Score']].head())


metric               institution  Weighted Score
0                 ADA University       15.394737
1       AGH University of Krakow       23.465000
2             Aalborg University       45.610000
3               Aalto University       66.270000
4              Aarhus University       64.360000


In [7]:
# Compute min and max of the 'Weighted Score'
min_score = df['Weighted Score'].min()
max_score = df['Weighted Score'].max()

# Apply min-max normalization to scale between 1 and 100
df['Normalized Score'] = ((df['Weighted Score'] - min_score) / (max_score - min_score)) * (100 - 1) + 1

# View result
print(df[['institution', 'Weighted Score', 'Normalized Score']].head())

metric               institution  Weighted Score  Normalized Score
0                 ADA University       15.394737          8.331623
1       AGH University of Krakow       23.465000         17.182850
2             Aalborg University       45.610000         41.470836
3               Aalto University       66.270000         64.130117
4              Aarhus University       64.360000         62.035285


In [8]:
df[df['Overall']==39.4]

metric,institution,Academic Reputation,Citations per Faculty,Employer Reputation,Employment Outcomes,Faculty Student Ratio,International Faculty Ratio,International Research Network,International Student Diversity,International Student Ratio,Overall,Sustainability,Weighted Score,Normalized Score
798,The American University in Cairo,40.8,12.2,46.9,88.6,39.0,98.0,46.8,11.7,7.1,39.4,51.6,40.22,35.559242
824,The University of East Anglia,24.7,55.5,15.0,31.8,24.0,81.9,86.8,54.0,49.3,39.4,91.1,40.205,35.542791
1208,University of Luxembourg,13.2,39.5,8.2,58.8,87.2,100.0,68.1,100.0,100.0,39.4,42.4,40.275,35.619565


In [9]:
df['new_rank'] = df['Weighted Score'].rank(method='min', ascending=False).astype(int)


In [10]:
df.sample(3)

metric,institution,Academic Reputation,Citations per Faculty,Employer Reputation,Employment Outcomes,Faculty Student Ratio,International Faculty Ratio,International Research Network,International Student Diversity,International Student Ratio,Overall,Sustainability,Weighted Score,Normalized Score,new_rank
352,Islamic University of Madinah,8.1,3.0,3.5,2.6,7.8,46.5,19.1,100.0,100.0,,7.6,13.125,5.842242,1294
183,Columbia University,99.9,44.5,99.9,100.0,100.0,48.5,98.4,72.9,98.1,85.6,77.9,85.0,84.672631,39
84,Baku State University,22.0,1.9,20.1,64.1,94.0,13.4,22.1,11.2,7.0,25.6,43.3,26.89,20.93929,693


In [12]:
df['Weighted Score'].rank(method='min', ascending=False).astype(int)

0       1182
1        825
2        308
3        115
4        131
        ... 
1496     599
1497      49
1498     570
1499     829
1500      22
Name: Weighted Score, Length: 1501, dtype: int32