In [3]:

from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import math

# Start notebook mode
init_notebook_mode(connected=True)

import pandas as pd # we gebruiken pandas om de CSV data te laden en te visualiseren


times = pd.read_csv("timesData.csv")
shanghai = pd.read_csv("shanghaiData.csv")
cwur = pd.read_csv("cwurData.csv")

# print(shanghai.loc[shanghai['university_name'] == 'Harvard University']['world_rank'])

def conv_ranking(x):
    spl = x.split('-')
    if len(spl) == 2:
        return (float(spl[0]) + float(spl[1])) / 2
    s = (x if x[0] != '=' else x[1:])
    f = float(s)
    return f

def conv_other(x):
    try:
        return float(x)
    except Exception:
        return 50

# countries = pd.read_csv("school_and_country_table.csv")

# df = times.set_index('university_name').join(countries.set_index('school_name'))
# print(df)

times['world_rank'] = times['world_rank'].map(conv_ranking)
times['total_score'] = times['total_score'].map(conv_other)
# print(times.loc[times['total_score'] == '-'].iloc[0])
country_stats = times.groupby(['year', 'country']).mean()
# print(list(x for x in country_stats.index.levels[0]))
# print(country_stats)

In [2]:

def choro(raw, year, statistic):
    return go.Choropleth(
        colorscale = [[0, 'rgb(50,50,255)'], [1, 'rgb(255,50,50)']],
        autocolorscale = False,
        locations = raw.index, #countries['country'],
        z = raw[statistic].astype(float),
        locationmode = 'country names',
#         text = raw.index,
        marker = go.choropleth.Marker(
            line = go.choropleth.marker.Line(
                color = 'rgb(0,0,0)',
                width = .25
            )),
        colorbar = go.choropleth.ColorBar(
            title = "Average (%s)" % str(year)),
        customdata = [year]
    )

def worldmap(statistic):
    data = [choro(country_stats.loc[year], year, statistic) for year in country_stats.index.levels[0]]

    steps = []
    for i, d in enumerate(data):
        step = dict(method='restyle',
                    args=['visible', [False] * (len(data))],
                    label='Year {}'.format(d.customdata[0]))
        step['args'][1][i] = True
        steps.append(step)

    sliders = [dict(active=(len(data)-1),
                    pad={"t": 1},
                    steps=steps)]  

    layout = go.Layout(
        title = go.layout.Title(
            text = 'Average of universities by country (%s)' % statistic
        ),
        geo = go.layout.Geo(
            scope = 'world',
            projection = go.layout.geo.Projection(type = 'equirectangular'),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
        sliders = sliders
    )

    fig = go.Figure(data = data, layout = layout)
    iplot(fig)

for stat in country_stats.columns:
    worldmap(stat)

In [4]:
Japan = times[times['country'] == 'Japan']
HONGKONG = times[times['country'] == 'Hong Kong']
South_Korea = times[times['country'] == 'South Korea']
China = times[times['country'] == 'China']
Singapore = times[times['country'] == 'Singapore']
Taiwan = times[times['country'] == 'Taiwan']
Thailand = times[times['country'] == 'Thailand']
USA = times[times['country'] == 'United States of America']
asias = pd.concat([Japan, HONGKONG, South_Korea, China, Singapore, Taiwan, Thailand], ignore_index=True)


rankasia = asias[['world_rank', 'year']].groupby(['year']).mean()
rankUSA = USA[['world_rank', 'year']].groupby(['year']).mean()

In [5]:
trace1 = go.Bar(
    x=rankasia.index,
    y=rankasia['world_rank'],
    name='Azië'
)

trace2 = go.Bar(
    x=rankUSA.index,
    y=rankUSA['world_rank'],
    name='USA'
)
data = [trace1, trace2]

layout = go.Layout(
    
    barmode='group',
    
    title=go.layout.Title(
        text='Vergeljking tussen Amerikaanse en Aziatische universiteiten over de jaren',
        
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Jaar'
        ),
        type='category' # het type van de X as is categorisch
    ),
    yaxis = go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Gemiddelde world ranking'
        )
    )
)

fig = go.Figure(data=data, layout=layout)
iplot(fig)


In [6]:
count_asia= asias['year'].value_counts()
count_USA= USA['year'].value_counts()
count_asia= count_asia.to_frame(name=None)
count_USA= count_USA.to_frame(name=None)
count_asia= count_asia.iloc[::-1]
count_USA= count_USA.iloc[::-1]


In [7]:
trace1 = go.Bar(
    x=count_asia.index,
    y=count_asia['year'],
    name='Azië'
)

trace2 = go.Bar(
    x=count_USA.index,
    y=count_USA['year'],
    name='USA'
)
data = [trace1, trace2]

layout = go.Layout(
    
    barmode='group',
    
    title=go.layout.Title(
        text='Vergeljking tussen de hoeveelheid Amerikaanse en Aziatische universiteiten over de jaren',
        
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Jaar'
        ),
        type='category' # het type van de X as is categorisch
    ),
    yaxis = go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Hoeveelheid universiteiten'
        )
    )
)

fig = go.Figure(data=data, layout=layout)
iplot(fig)
