In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('qs_data.csv', encoding='latin1')

In [3]:
df = df[df['year']==2026].pivot_table(index='institution', columns='metric', values='score').reset_index()

In [4]:
df.sample(3)

metric,institution,Academic Reputation,Citations per Faculty,Employer Reputation,Employment Outcomes,Faculty Student Ratio,International Faculty Ratio,International Research Network,International Student Diversity,International Student Ratio,Overall,Sustainability
1475,Wuhan University,53.6,98.0,46.7,58.9,21.3,47.1,77.8,8.2,10.7,57.3,60.8
216,Duy Tan University,13.7,76.0,27.7,6.4,6.3,43.4,87.7,6.3,2.1,33.0,58.7
1157,University of Engineering & Technology (UET) L...,11.1,15.1,49.1,54.1,20.4,,56.5,6.5,2.3,,40.7


In [5]:
weights = {
'Academic Reputation': 0.3,
'Citations per Faculty': 0.2,
'Employer Reputation': 0.15,
'Faculty Student Ratio': 0.1,
'Employment Outcomes': 0.05,
'International Faculty Ratio': 0.05,
'International Research Network': 0.05,
'International Student Ratio': 0.05,
'Sustainability': 0.05,
'International Student Diversity': 0.0
}

metric_cols = [
    'Academic Reputation',
    'Citations per Faculty',
    'Employer Reputation',
    'Faculty Student Ratio',
    'Employment Outcomes',
    'International Faculty Ratio',
    'International Research Network',
    'International Student Ratio',
    'Sustainability',
    'International Student Diversity'
]

In [6]:
# We now need the weighted average of all the scores per university.

# Extract the relevant columns
metrics_df = df[metric_cols].copy()

# Create a weight vector in the same order as metric_cols
weight_vector = np.array([weights[col] for col in metric_cols])

# Compute weighted average for each row, handling missing values:
# Adjust weights to only include available values per row

def weighted_average(row, weights_dict):
    values = row.values.astype(float)
    metric_names = row.index
    mask = ~np.isnan(values)

    if mask.sum() == 0:
        return np.nan  # skip if all values are NaN

    # Get the corresponding weights for the non-NaN metrics
    used_weights = np.array([weights_dict[metric] for metric in metric_names[mask]])
    used_weights = used_weights / used_weights.sum()  # normalize weights

    return np.dot(values[mask], used_weights)

# Apply row-wise
df['Weighted Score'] = df[metric_cols].apply(lambda row: weighted_average(row, weights), axis=1)

# View result
print(df[['institution', 'Weighted Score']].head())


metric               institution  Weighted Score
0                 ADA University       15.394737
1       AGH University of Krakow       23.465000
2             Aalborg University       45.610000
3               Aalto University       66.270000
4              Aarhus University       64.360000


In [17]:
df[['Weighted Score', 'Overall']]

metric,Weighted Score,Overall
0,15.394737,
1,23.465000,
2,45.610000,44.9
3,66.270000,66.3
4,64.360000,64.3
...,...,...
1496,29.610000,28.4
1497,83.430000,84.0
1498,30.468421,27.7
1499,23.375000,


In [14]:
df['Weighted Score']

0       15.394737
1       23.465000
2       45.610000
3       66.270000
4       64.360000
          ...    
1496    29.610000
1497    83.430000
1498    30.468421
1499    23.375000
1500    89.440000
Name: Weighted Score, Length: 1501, dtype: float64

In [7]:
# Compute min and max of the 'Weighted Score'
min_score = df['Weighted Score'].min()
max_score = df['Weighted Score'].max()

# Apply min-max normalization to scale between 1 and 100
df['Normalized Score'] = ((df['Weighted Score'] - min_score) / (max_score - min_score)) * (100 - 1) + 1

# View result
print(df[['institution', 'Weighted Score', 'Normalized Score']].head())

metric               institution  Weighted Score  Normalized Score
0                 ADA University       15.394737          8.331623
1       AGH University of Krakow       23.465000         17.182850
2             Aalborg University       45.610000         41.470836
3               Aalto University       66.270000         64.130117
4              Aarhus University       64.360000         62.035285


In [8]:
df[df['Overall']==39.4]

metric,institution,Academic Reputation,Citations per Faculty,Employer Reputation,Employment Outcomes,Faculty Student Ratio,International Faculty Ratio,International Research Network,International Student Diversity,International Student Ratio,Overall,Sustainability,Weighted Score,Normalized Score
798,The American University in Cairo,40.8,12.2,46.9,88.6,39.0,98.0,46.8,11.7,7.1,39.4,51.6,40.22,35.559242
824,The University of East Anglia,24.7,55.5,15.0,31.8,24.0,81.9,86.8,54.0,49.3,39.4,91.1,40.205,35.542791
1208,University of Luxembourg,13.2,39.5,8.2,58.8,87.2,100.0,68.1,100.0,100.0,39.4,42.4,40.275,35.619565


In [12]:
df['new_rank'] = df['Weighted Score'].rank(method='min', ascending=False).astype(int)


In [16]:
df['Weighted Score'].sample()

602    22.72
Name: Weighted Score, dtype: float64

In [26]:
df.institution.unique()

array(['ADA University', 'AGH University of Krakow', 'Aalborg University',
       ..., 'Zhengzhou University',
       'Zurich University of Applied Sciences (ZHAW)',
       'École Polytechnique Fédérale de Lausanne'], dtype=object)

In [33]:
new_var = df.loc[df["institution"] == 'Bogazici University', 'Sustainability'].iat[0]
new_var

68.5

In [13]:
df.sample(3)

metric,institution,Academic Reputation,Citations per Faculty,Employer Reputation,Employment Outcomes,Faculty Student Ratio,International Faculty Ratio,International Research Network,International Student Diversity,International Student Ratio,Overall,Sustainability,Weighted Score,Normalized Score,new_rank
1041,"Universita' degli Studi di Napoli ""Parthenope""",5.6,47.2,3.4,1.8,4.2,2.0,33.3,8.7,4.2,,41.2,16.175,9.187393,1146
116,Bogazici University,36.2,23.7,85.8,88.7,5.1,28.2,49.0,12.6,7.5,40.3,68.5,41.075,36.496981,372
837,The University of Leeds,81.8,48.8,82.1,62.9,39.7,89.7,99.2,96.5,96.3,72.0,76.4,71.81,70.206226,86


In [11]:
df['Weighted Score'].rank(method='min', ascending=False).astype(int)

0       1182
1        825
2        308
3        115
4        131
        ... 
1496     599
1497      49
1498     570
1499     829
1500      22
Name: Weighted Score, Length: 1501, dtype: int32