In [83]:
# Deze cell importeert een aantal functies en modules die je nodig hebt om deze opdracht te maken.

from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go

# Start notebook mode
init_notebook_mode(connected=True)

import pandas as pd # we gebruiken pandas om de CSV data te laden en te visualiseren


times = pd.read_csv("timesData.csv")
shanghai = pd.read_csv("shanghaiData.csv")
cwur = pd.read_csv("cwurData.csv")

print(shanghai.loc[shanghai['university_name'] == 'Harvard University']['world_rank'])

def conv_ranking(x):
    spl = x.split('-')
    if len(spl) == 2:
        return (float(spl[0]) + float(spl[1])) / 2
    return x if x[0] != '=' else x[1:]

countries = pd.read_csv("school_and_country_table.csv")
country_universities = {}
for i, (university, country) in countries.iterrows():
    if country not in country_universities:
        country_universities[country] = [0, 0]
        
#     print(times.loc[times['university_name'] == university]['world_rank'])
    uni_ranks = times.loc[times['university_name'] == university]['world_rank'].map(conv_ranking).astype(float)
#     print(i, university, country, sum(uni_ranks), len(uni_ranks), sep='|||')
    avg_rank = sum(uni_ranks) / len(uni_ranks)
    country_universities[country][0] += avg_rank
    country_universities[country][1] += 1
    
country_universities = {k: i[0] / i[1] for k, i in country_universities.items()}
    
univ_amounts=pd.DataFrame.from_dict(country_universities, orient='index', columns=['n'])
print(univ_amounts)
print(univ_amounts.index)

0       1
500     1
1000    1
1510    1
2013    1
2514    1
3014    1
3514    1
3798    1
3897    1
4397    1
Name: world_rank, dtype: object
                                   n
United States of America  246.913561
United Kingdom            330.060684
Switzerland               174.498333
Canada                    267.344000
Hong Kong                 157.508333
Japan                     535.998984
South Korea               479.750000
Singapore                  66.750000
Australia                 311.956452
China                     519.575658
France                    355.539583
Sweden                    196.922727
Germany                   229.002991
Republic of Ireland       369.623148
Finland                   338.916667
Taiwan                    541.835069
South Africa              386.333333
Turkey                    468.300000
Netherlands               122.823077
Belgium                   205.642857
Denmark                   210.750000
Norway                    253.458333
Spain  

In [85]:

data = [go.Choropleth(
    colorscale = scl,
    autocolorscale = False,
    locations = univ_amounts.index, #countries['country'],
    z = univ_amounts['n'].astype(float),
    locationmode = 'country names',
    text = univ_amounts.index,
    marker = go.choropleth.Marker(
        line = go.choropleth.marker.Line(
            color = 'rgb(0,0,0)',
            width = .25
        )),
    colorbar = go.choropleth.ColorBar(
        title = "Average Ranking (high i)")
)]

layout = go.Layout(
    title = go.layout.Title(
        text = 'Average ranking of universities by country'
    ),
    geo = go.layout.Geo(
        scope = 'world',
        projection = go.layout.geo.Projection(type = 'equirectangular'),
        showlakes = True,
        lakecolor = 'rgb(255, 255, 255)'),
)

fig = go.Figure(data = data, layout = layout)
iplot(fig, filename = 'd3-cloropleth-map')



rankings = {} # rankings[universiteit] = [times, shanghai, cwur]

def rename(university_name):
    univ = university_name.split(',')[0].strip()
    univ = univ.split('(')[0].strip()
        
    return univ

print('times')
for world_rank, year, university_name in zip(times['world_rank'], times['year'], times['university_name']):
    if year not in rankings:
        rankings[year] = {}
    rankings[year][rename(university_name)] = world_rank
    
print('shanghai')
for world_rank, year, university_name in zip(shanghai['world_rank'], shanghai['year'], shanghai['university_name']):
    if year not in rankings :
        continue
        
    if univ
    rankings[year][] += world_rank
    
print('cwur')
for world_rank, year, university_name in zip(cwur['world_rank'], cwur['year'], cwur['institution']):
    if year not in rankings:
        continue
    rankings[year][rename(university_name)] += world_rank
    
for year in rankings:
    print('year')
    for univ in rankings[year]:
        print(univ + ' ' + rankings[year][univ])

# print(times['world_rank'], shanghai['world_rank'], cwur['world_rank'])

# mashed = pd.DataFrame()
