In [29]:
import pandas as pd
import requests

In [30]:
pd.set_option('display.max_rows', None)

In [31]:
countries_url = "https://restcountries.com/v3.1/all"
countries_response = requests.get(countries_url)
countries : list[dict] = countries_response.json()
countries

[{'name': {'common': 'Christmas Island',
   'official': 'Territory of Christmas Island',
   'nativeName': {'eng': {'official': 'Territory of Christmas Island',
     'common': 'Christmas Island'}}},
  'tld': ['.cx'],
  'cca2': 'CX',
  'ccn3': '162',
  'cca3': 'CXR',
  'independent': False,
  'status': 'officially-assigned',
  'unMember': False,
  'currencies': {'AUD': {'name': 'Australian dollar', 'symbol': '$'}},
  'idd': {'root': '+6', 'suffixes': ['1']},
  'capital': ['Flying Fish Cove'],
  'altSpellings': ['CX', 'Territory of Christmas Island'],
  'region': 'Oceania',
  'subregion': 'Australia and New Zealand',
  'languages': {'eng': 'English'},
  'translations': {'ara': {'official': 'جزيرة كريسماس',
    'common': 'جزيرة كريسماس'},
   'bre': {'official': 'Tiriad Enez Christmas', 'common': 'Enez Christmas'},
   'ces': {'official': 'Teritorium Vánočního ostrova',
    'common': 'Vánoční ostrov'},
   'cym': {'official': 'Tiriogaeth yr Ynys y Nadolig',
    'common': 'Ynys y Nadolig'},
  

In [32]:
countries_dict : dict = {}
for country_item in countries:
    name : dict = country_item.get("name", {})
    common_name : str = name.get("common", "")
    official_name : str = name.get("official", "")
    countries_dict[official_name] = common_name
    alt_spellings : list[str] = country_item.get("altSpellings", [])
    for alt_spelling in alt_spellings:
        countries_dict[alt_spelling] = common_name
countries_dict

{'Territory of Christmas Island': 'Christmas Island',
 'CX': 'Christmas Island',
 'State of Eritrea': 'Eritrea',
 'ER': 'Eritrea',
 'ሃገረ ኤርትራ': 'Eritrea',
 'Dawlat Iritriyá': 'Eritrea',
 'ʾErtrā': 'Eritrea',
 'Iritriyā': 'Eritrea',
 'Independent State of Samoa': 'Samoa',
 'WS': 'Samoa',
 'Malo Saʻoloto Tutoʻatasi o Sāmoa': 'Samoa',
 'Republic of North Macedonia': 'North Macedonia',
 'MK': 'North Macedonia',
 'The former Yugoslav Republic of Macedonia': 'North Macedonia',
 'Macedonia, The Former Yugoslav Republic of': 'North Macedonia',
 'Република Северна Македонија': 'North Macedonia',
 'Republic of Djibouti': 'Djibouti',
 'DJ': 'Djibouti',
 'Jabuuti': 'Djibouti',
 'Gabuuti': 'Djibouti',
 'République de Djibouti': 'Djibouti',
 'Gabuutih Ummuuno': 'Djibouti',
 'Jamhuuriyadda Jabuuti': 'Djibouti',
 'Hashemite Kingdom of Jordan': 'Jordan',
 'JO': 'Jordan',
 'al-Mamlakah al-Urdunīyah al-Hāshimīyah': 'Jordan',
 'Islamic Republic of Pakistan': 'Pakistan',
 'PK': 'Pakistan',
 'Pākistān': 'Pa

In [33]:
def trim_bracket(university : str) -> str:
    start = university.find("(")
    end = university.find(")")
    if start >= end:
        return university
    return university[:start].strip()

In [34]:
def map_university(university : str) -> str:
    university_dict = {
        "LMU Munich": "University of Munich",
        "Nanyang Technological University, Singapore": "Nanyang Technological University",
        "University of Minnesota": "University of Minnesota, Twin Cities",
        "Purdue University West Lafayette": "Purdue University - West Lafayette",
        "UNSW Sydney": "University of New South Wales",
        "UCL": "University College London",
        "Washington University in St Louis": "Washington University in St. Louis"
    }
    if university in university_dict:
        return university_dict[university]
    start_the = university.find("The ")
    if start_the == 0:
        return university.replace("The ", "")
    return university.replace('’', "'").replace(" And ", " and ")

In [35]:
times_url = "https://www.timeshighereducation.com/sites/default/files/the_data_rankings/world_university_rankings_2023_0__83be12210294c582db8740ee29673120.json"
times_headers = {
  'Cookie': 'geoCountry=VN; siteCountry=GB',
  'User-Agent': 'PostmanRuntime/7.32.3'
}
times_response = requests.request("GET", times_url, headers=times_headers, data={})
print(times_response)
times_data : dict = times_response.json()
times_items : list[dict] = times_data.get("data")
times_rankings = []
for times_index, times_item in enumerate(times_items):
    rank : str = times_item.get("rank")
    university : str = trim_bracket(times_item.get("name", "").strip())
    university : str = map_university(university)
    country_name : str = trim_bracket(times_item.get("location", ""))
    country = country_name if country_name not in countries_dict else countries_dict[country_name]
    country = country.replace("SAR", "").strip()
    times_rankings.append({
        "university": university,
        "country": country,
        "rank_times": times_index + 1
    })

<Response [200]>


In [36]:
times_rankings_data_frame = pd.DataFrame(times_rankings)
times_rankings_data_frame.to_csv("./csv/times.csv", index=False)
times_universities : list[str] = times_rankings_data_frame['university'].unique()
times_countries : list[str] = times_rankings_data_frame['country'].unique()
times_rankings_data_frame

Unnamed: 0,university,country,rank_times
0,University of Oxford,United Kingdom,1
1,Harvard University,United States,2
2,University of Cambridge,United Kingdom,3
3,Stanford University,United States,4
4,Massachusetts Institute of Technology,United States,5
5,California Institute of Technology,United States,6
6,Princeton University,United States,7
7,"University of California, Berkeley",United States,8
8,Yale University,United States,9
9,Imperial College London,United Kingdom,10


In [37]:
vietnam_times_rankings_data_frame = times_rankings_data_frame[times_rankings_data_frame['country'] == 'Vietnam']
vietnam_times_rankings_data_frame.reset_index()

Unnamed: 0,index,university,country,rank_times
0,422,Duy Tan University,Vietnam,423
1,489,Ton Duc Thang University,Vietnam,490
2,1190,"Vietnam National University, Hanoi",Vietnam,1191
3,1608,Hanoi University of Science and Technology,Vietnam,1609
4,1612,Hue University,Vietnam,1613
5,1785,Vietnam National University,Vietnam,1786


In [38]:
australia_times_rankings_data_frame = times_rankings_data_frame[times_rankings_data_frame['country'] == 'Australia']
australia_times_rankings_data_frame.reset_index()

Unnamed: 0,index,university,country,rank_times
0,33,University of Melbourne,Australia,34
1,43,Monash University,Australia,44
2,52,University of Queensland,Australia,53
3,54,University of Sydney,Australia,55
4,61,Australian National University,Australia,62
5,71,University of New South Wales,Australia,72
6,87,University of Adelaide,Australia,88
7,131,University of Western Australia,Australia,132
8,132,University of Technology Sydney,Australia,133
9,174,Macquarie University,Australia,175


In [39]:
united_kingdom_times_rankings_data_frame = times_rankings_data_frame[times_rankings_data_frame['country'] == 'United Kingdom']
united_kingdom_times_rankings_data_frame.reset_index()

Unnamed: 0,index,university,country,rank_times
0,0,University of Oxford,United Kingdom,1
1,2,University of Cambridge,United Kingdom,3
2,9,Imperial College London,United Kingdom,10
3,21,University College London,United Kingdom,22
4,28,University of Edinburgh,United Kingdom,29
5,34,King's College London,United Kingdom,35
6,36,London School of Economics and Political Science,United Kingdom,37
7,53,University of Manchester,United Kingdom,54
8,75,University of Bristol,United Kingdom,76
9,82,University of Glasgow,United Kingdom,83


In [40]:
united_states_times_rankings_data_frame = times_rankings_data_frame[times_rankings_data_frame['country'] == 'United States']
united_states_times_rankings_data_frame.reset_index()

Unnamed: 0,index,university,country,rank_times
0,1,Harvard University,United States,2
1,3,Stanford University,United States,4
2,4,Massachusetts Institute of Technology,United States,5
3,5,California Institute of Technology,United States,6
4,6,Princeton University,United States,7
5,7,"University of California, Berkeley",United States,8
6,8,Yale University,United States,9
7,10,Columbia University,United States,11
8,12,University of Chicago,United States,13
9,13,University of Pennsylvania,United States,14
