In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd

In [2]:
TOP_UNIV_BASE = "https://www.topuniversities.com"
TOP_UNIV_WEBSITE = "https://www.topuniversities.com/university-rankings/world-university-rankings/2018"
TOP_UNIV_DATA_INDICATOR = "https://www.topuniversities.com/sites/default/files/qs-rankings-data/357051_indicators.txt"

In [3]:
r = requests.get(TOP_UNIV_WEBSITE)
data_indicator = requests.get(TOP_UNIV_DATA_INDICATOR)

soup = BeautifulSoup(r.text, 'html.parser')
indicator_data = json.loads(data_indicator.text)

In [9]:
def parse_int(string):
    tmp = string.replace(',', '')
    return int(tmp)
    
def get_university_info_from_indicator(raw_university):
    overview_soup = BeautifulSoup(raw_university["uni"], 'html.parser')    
    
    name = overview_soup.a.text
    rank = raw_university["overall_rank"]
    region = raw_university["region"]
    country = raw_university["location"]
    
    details_request = requests.get(TOP_UNIV_BASE + "/" +  overview_soup.a.attrs['href'])
    details_soup = BeautifulSoup(details_request.text, 'html.parser')
        
    total_staff = parse_int(details_soup.find("div", {"class": "total faculty"}).find("div", {"class" : "number"}).text)
    inter_staff = parse_int(details_soup.find("div", {"class": "inter faculty"}).find("div", {"class" : "number"}).text)
    total_students = parse_int(details_soup.find("div", {"class": "total student"}).find("div", {"class": "number"}).text)
    inter_students = parse_int(details_soup.find("div", {"class": "total inter"}).find("div", {"class": "number"}).text)
    
    return [name, rank, country, region, inter_staff, total_staff, inter_students, total_students]


In [None]:
univ_list = []

for i, raw_university in enumerate(indicator_data["data"][0:200]):
    print(i, end="\r")
    try:
        univ_list.append(get_university_info_from_indicator(raw_university))
    except:
        pass
        
top_univ_df = pd.DataFrame(univ_list, columns=["name", "rank", "country", "region", "inter_staff", "total_staff", "inter_students", "total_students"])

109

In [None]:
top_univ_df

In [None]:
top_univ_df.to_csv("top_univ_df.csv")

In [18]:
top_univ_df = pd.read_csv("top_univ_df.csv", index_col=False, usecols=range(1, 9))

In [63]:
def print_best_university_with_ratio(ratio_df): 
    sorted_ratio = ratio_df.sort_values()
    best_entry = top_univ_df.loc[[sorted_ratio.index[0]]]#(tmp[0])
    best_entry.loc[:,'ratio'] = sorted_ratio[0]
    display(best_entry)

ratio_a = top_univ_df.total_staff / (top_univ_df.total_students + top_univ_df.total_staff)
ratio_b = top_univ_df.inter_students / top_univ_df.total_students

print("The best university in term of ratio between faculty members and students is")
print_best_university_with_ratio(ratio_a)
print("\n\nThe best university in term of  ratio of international students is")
print_best_university_with_ratio(ratio_b)

The best university in term of ratio between faculty members and students is


Unnamed: 0,name,rank,country,region,inter_staff,total_staff,inter_students,total_students,ratio
180,Vienna University of Technology,182,Austria,Europe,172,706,5199,17479,0.212257




The best university in term of  ratio of international students is


Unnamed: 0,name,rank,country,region,inter_staff,total_staff,inter_students,total_students,ratio
177,Indian Institute of Technology Bombay (IITB),179,India,Asia,19,876,93,9402,0.335863


In [74]:
ratio_d = top_univ_df.groupby(top_univ_df.region)

top_univ_df_with_ratio_a = top_univ_df.copy()
top_univ_df_with_ratio_a.loc[:, "ratio"] = ratio_a

top_univ_df_with_ratio_b = top_univ_df.copy()
top_univ_df_with_ratio_b.loc[:, "ratio"] = ratio_b

ratio_c_a = top_univ_df_with_ratio_a.groupby(top_univ_df_with_ratio_a.country)
ratio_c_b = top_univ_df_with_ratio_b.groupby(top_univ_df_with_ratio_b.region)

display(ratio_c_a.get_group("Austria"))
display(ratio_c_a.max())
#display(ratio_c_b.max("ratio"))

Unnamed: 0,name,rank,country,region,inter_staff,total_staff,inter_students,total_students,ratio
152,University of Vienna,154,Austria,Europe,1400,3411,14468,45967,0.069079
180,Vienna University of Technology,182,Austria,Europe,172,706,5199,17479,0.038823


Unnamed: 0_level_0,name,rank,region,inter_staff,total_staff,inter_students,total_students,ratio
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Argentina,Universidad de Buenos Aires (UBA),75,Latin America,3165,16421,27109,122301,0.118373
Australia,University of Technology Sydney,176,Oceania,1870,3360,20578,57433,0.099738
Austria,Vienna University of Technology,182,Europe,1400,3411,14468,45967,0.069079
Belgium,Vrije Universiteit Brussel (VUB),182,Europe,784,2520,6926,45538,0.161791
Brazil,Universidade de São Paulo,182,Latin America,279,5582,2086,65711,0.078297
Canada,Université de Montréal,152,North America,3905,9581,21910,72207,0.119964
Chile,Pontificia Universidad Católica de Chile (UC),137,Latin America,198,2260,991,27003,0.077231
China,Zhejiang University,114,Asia,1913,5506,7090,42136,0.131704
Denmark,University of Copenhagen,119,Europe,2336,7483,3762,32119,0.192542
Finland,University of Helsinki,137,Europe,535,2645,1831,22419,0.10553
