# CSV to JSON

After finishing to collect data, all files should be converted into JSON format.

In [1]:
# Outputs
OUTPUT_PATHNAME = './world-campus-220212-1.json'

In [2]:
import numpy as np
import pandas as pd
pd.options.display.max_columns = None

import json
import itertools

# Inputs
PRGM_PATHNAME = '../univ-program/program.csv'

UNIV_PATHNAME = '../qs-rankings/univ-with-qs.csv'  # With QS Rankings

UNIV_CITY_PATHNAME = '../weather/univ-city.csv'    # University - City (for weather)
WTHR_PATHNAME = '../weather/city-weather.json'

CTRY_PATHNAME = '../country/country-stat.csv'

CTRY_CODE_PATHNAME = '../covid19/country-code.csv' # Country - Code (for COVID-19 Graph)
COVD_PATHNAME = '../covid19/covid19-graph.csv'

In [3]:
ctry_mapper = {
    'UK': 'United Kingdom',
    'United States of America': 'United States'
}

def update_country(df):
    # Get columns related to country
    cols = [x for x in df.columns if x.endswith('country')]
    
    for col_name, kv in itertools.product(cols, ctry_mapper.items()):
        # Convert country
        df.loc[df[col_name] == kv[0], col_name] = kv[1]

# Initialize output
output_info = dict()

## University (+ City ID + Country Code)

In [4]:
df_univ = pd.read_csv(UNIV_PATHNAME)
print('univ', df_univ.columns)

# Merge University and City ID
df_city = pd.read_csv(UNIV_CITY_PATHNAME)
df_city.rename(columns={
    'country': 'univ-country',
    'city': 'univ-city',
    'CityId': 'city-id'
}, inplace=True)
print('city', df_city.columns)

# Merge University and Country code
df_code = pd.read_csv(CTRY_CODE_PATHNAME)
df_code.rename(columns={'code': 'country-code'}, inplace=True)
print('code', df_code.columns)

univ Index(['sequence', 'name', 'region', 'country', 'city', 'official-link',
       'established', 'student-number', 'faculty-number', 'campus-size',
       'map-iframe', 'loc', 'tot-rank', 'tot-score',
       'tot-international-students-ratio', 'tot-international-faculty-ratio',
       'tot-faculty-student ratio', 'tot-citations-per-faculty',
       'tot-academic-reputation', 'tot-employer-reputation', 'cse-rank',
       'cse-overall-score', 'cse-h-index-citations', 'cse-citations-per paper',
       'cse-academic-reputation', 'cse-employer-reputation', 'hum-rank',
       'hum-overall-score', 'hum-h-index-citations', 'hum-citations-per-paper',
       'hum-academic-reputation', 'hum-employer-reputation'],
      dtype='object')
city Index(['sequence', 'name', 'region', 'univ-country', 'univ-city', 'city-id'], dtype='object')
code Index(['univ-country', 'country-code'], dtype='object')


In [5]:
# Merge
df_univ = pd.merge(left=df_univ, right=df_city[['sequence', 'univ-city', 'city-id']],
                   left_on='sequence', right_on='sequence')
df_univ.rename(columns={'univ-city': 'city'}, inplace=True)

In [6]:
# Merge
df_univ = pd.merge(left=df_univ, right=df_code, how='left',
                   left_on='country', right_on='univ-country')

In [7]:
df_univ.drop(columns=['sequence', 'city', 'univ-country'], inplace=True)
df_univ.tail()

Unnamed: 0,name,region,country,official-link,established,student-number,faculty-number,campus-size,map-iframe,loc,tot-rank,tot-score,tot-international-students-ratio,tot-international-faculty-ratio,tot-faculty-student ratio,tot-citations-per-faculty,tot-academic-reputation,tot-employer-reputation,cse-rank,cse-overall-score,cse-h-index-citations,cse-citations-per paper,cse-academic-reputation,cse-employer-reputation,hum-rank,hum-overall-score,hum-h-index-citations,hum-citations-per-paper,hum-academic-reputation,hum-employer-reputation,city-id,country-code
83,Kuwait University,Middle East,Kuwait,,,,,,,"Kuwait City,Kuwait",1001-1200,-,28.4,19.3,5.1,3.4,11.8,6.9,501-550,,,,,,501-550,,,,,,1498.0,KWT
84,Queensland University of Technology,Oceania,Australia,,,,,,,"Brisbane,Australia",213,42.1,43.3,98.3,8.7,59.0,42.0,44.8,=132,66.4,80.8,85.6,57.1,62.1,=132,66.4,80.8,85.6,57.1,62.1,320.0,AUS
85,University of South Australia,Oceania,Australia,,,,,,,"Magill,Australia",=326,32.8,70.4,100.0,20.6,45.6,23.1,16.7,251-300,,,,,,251-300,,,,,,1676.0,AUS
86,University of New South Wales,Oceania,Australia,,,,,,,"Sydney,Australia",43,77.7,99.8,100.0,15.2,93.5,90.4,94.4,=59,73.4,87.0,83.4,61.9,76.8,=59,73.4,87.0,83.4,61.9,76.8,300.0,AUS
87,Deakin University,Oceania,Australia,,,,,,,"Geelong,Australia",283,36.1,72.4,98.7,4.4,57.1,28.1,38.0,201-250,,,85.4,,,201-250,,,85.4,,,402.0,AUS


In [8]:
# Set index
df_univ.drop_duplicates(ignore_index=True, inplace=True)
df_univ.set_index('name', inplace=True)
df_univ.head()

Unnamed: 0_level_0,region,country,official-link,established,student-number,faculty-number,campus-size,map-iframe,loc,tot-rank,tot-score,tot-international-students-ratio,tot-international-faculty-ratio,tot-faculty-student ratio,tot-citations-per-faculty,tot-academic-reputation,tot-employer-reputation,cse-rank,cse-overall-score,cse-h-index-citations,cse-citations-per paper,cse-academic-reputation,cse-employer-reputation,hum-rank,hum-overall-score,hum-h-index-citations,hum-citations-per-paper,hum-academic-reputation,hum-employer-reputation,city-id,country-code
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
GE3(Global Engineering Education Exchange),Others,,https://globale3.studioabroad.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ISEP(International Student Exchange Programs),Others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
University of Twente,Europe,Netherlands,http://www.utwente.nl/,1961.0,12544.0,3150.0,146.0,https://www.google.com/maps/embed?pb=!1m18!1m1...,"Enschede,Netherlands",=189,45.5,89.1,98.8,37.1,79.3,21.0,42.3,151-200,,,,,,151-200,,,,,,145.0,NLD
Amsterdam University of Applied Sciences,Europe,Netherlands,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NLD
Fontys University of Applied Sciences,Europe,Netherlands,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NLD


In [9]:
update_country(df_univ)
output_info['university'] = json.loads(df_univ.to_json(orient='index'))

## Program

In [10]:
df_prgm = pd.read_csv(PRGM_PATHNAME)
update_country(df_prgm)
output_info['program'] = json.loads(df_prgm.to_json(orient='records'))

## Weather

In [11]:
df_wthr = pd.read_json(WTHR_PATHNAME)
df_wthr.set_index('cityId', drop=True, inplace=True)
df_wthr.head()

Unnamed: 0_level_0,lang,cityName,cityLatitude,cityLongitude,isCapital,stationName,tourismURL,tourismBoardName,isDep,timeZone,isDST,member,forecast,climate
cityId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
145,en,Twenthe,52.27,6.87,False,Enschede,www.holland.com,Netherlands Board of Tourism and Conventions,False,100,N,"{'memId': 37, 'memName': 'Netherlands', 'short...","{'issueDate': '2022-02-09 09:30:00', 'timeZone...","{'raintype': 'PPT', 'raindef': '0.1', 'rainuni..."
312,en,Zurich,47.38,8.57,False,Zurich,,,False,100,N,"{'memId': 87, 'memName': 'Switzerland', 'short...","{'issueDate': '2022-02-08 12:00:00', 'timeZone...","{'raintype': 'PPT', 'raindef': '1', 'rainunit'..."
192,en,Luxembourg,49.616667,6.216667,True,Luxembourg,,,False,100,N,"{'memId': 98, 'memName': 'Luxembourg', 'shortM...","{'issueDate': '2022-02-09 14:30:01', 'timeZone...","{'raintype': 'PPT', 'raindef': '0.1', 'rainuni..."
607,en,Kaunas,54.88396,23.83588,False,Kaunas,www.tourism.lt,Lithuanian State Department of Tourism,False,200,N,"{'memId': 105, 'memName': 'Lithuania', 'shortM...","{'issueDate': '2022-02-09 12:37:00', 'timeZone...","{'raintype': 'PPT', 'raindef': '', 'rainunit':..."
204,en,Vilnius,54.636389,25.290833,True,Vilnius,www.tourism.lt,Lithuanian State Department of Tourism,False,200,N,"{'memId': 105, 'memName': 'Lithuania', 'shortM...","{'issueDate': '2022-02-09 12:37:00', 'timeZone...","{'raintype': 'PPT', 'raindef': '', 'rainunit':..."


In [12]:
output_info['weather'] = json.loads(df_wthr.to_json(orient='index'))

## COVID-19

In [13]:
df_covd = pd.read_csv(COVD_PATHNAME)
df_covd.set_index('label', drop=True, inplace=True)
df_covd.head()

Unnamed: 0_level_0,no,title,url
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
case,1,Biweekly confirmed COVID-19 cases per million ...,https://ourworldindata.org/grapher/biweekly-co...
death,2,Biweekly confirmed COVID-19 deaths per million...,https://ourworldindata.org/grapher/biweekly-co...
hospitalization,3,Weekly new ICU admissions for COVID-19,https://ourworldindata.org/grapher/weekly-icu-...
vaccination,4,Share of people vaccinated against COVID-19,https://ourworldindata.org/explorers/coronavir...


In [14]:
output_info['covid19'] = json.loads(df_covd.to_json(orient='index'))

In [15]:
with open(OUTPUT_PATHNAME, 'w') as f:
    json.dump(output_info, f)