# Imports and Setup

In [274]:
import numpy as np
import pandas as pd
from re import sub

import matplotlib.pyplot as plt
import seaborn as sns

import requests
from bs4 import BeautifulSoup

In [2]:
import plotly.plotly as py
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [3]:
init_notebook_mode(connected=True)

# Scraping Partner Universities and Data Wrangling

In [4]:
page = requests.get('http://www.international.wiso.uni-koeln.de/international-profile/partner-universities/')

In [8]:
soup = BeautifulSoup(page.content,'html.parser')

In [132]:
numbers = [str(x) for x in range(1,6)]
continent_id_1 = ['c189440_akkordeon_' for x in range (1,6)]
continent_id_2 = ['-titel1' for x in range (1,6)]

In [156]:
continent_ids = [id1+num+id2 for id1,num,id2 in zip(continent_id_1,numbers,continent_id_2)]

In [157]:
continent_ids

['c189440_akkordeon_1-titel1',
 'c189440_akkordeon_2-titel1',
 'c189440_akkordeon_3-titel1',
 'c189440_akkordeon_4-titel1',
 'c189440_akkordeon_5-titel1']

In [158]:
continent_input_ids = [id1+num+'-1' for id1,num in zip(continent_id_1,numbers)]

In [159]:
continent_input_ids

['c189440_akkordeon_1-1',
 'c189440_akkordeon_2-1',
 'c189440_akkordeon_3-1',
 'c189440_akkordeon_4-1',
 'c189440_akkordeon_5-1']

In [164]:
continent_names = [soup.find(id=id).get_text().strip() for id in continent_ids]

In [319]:
continent_names

['Africa', 'Americas', 'Asia', 'Europe', 'Middle East']

In [379]:
df1 = pd.DataFrame(continent_names)
df1.columns = ['Continent Names']
df2 = pd.DataFrame(continent_ids)
df2.columns = ['Continent IDs']
df3 = pd.DataFrame(continent_input_ids)
df3.columns = ['Continent Input IDs']
df4 = df1.join(df2)
df5 = df4.join(df3)

In [380]:
df5

Unnamed: 0,Continent Names,Continent IDs,Continent Input IDs
0,Africa,c189440_akkordeon_1-titel1,c189440_akkordeon_1-1
1,Americas,c189440_akkordeon_2-titel1,c189440_akkordeon_2-1
2,Asia,c189440_akkordeon_3-titel1,c189440_akkordeon_3-1
3,Europe,c189440_akkordeon_4-titel1,c189440_akkordeon_4-1
4,Middle East,c189440_akkordeon_5-titel1,c189440_akkordeon_5-1


In [410]:
country_names = list(soup.find(id='c189440_akkordeon_1-1').find_all('b'))

country_names = [str(item) for item in country_names]
country_names = [sub('<b>','',item) for item in country_names]
country_names = [sub('</b>','',item) for item in country_names]
country_names = [sub('<br/>','',item) for item in country_names]

#pd.DataFrame(country_names)
country_names = [item+' c189440_akkordeon_1-1' for item in country_names]
country_names

['Egypt c189440_akkordeon_1-1',
 'Ghana c189440_akkordeon_1-1',
 'Senegal c189440_akkordeon_1-1',
 'South Africa c189440_akkordeon_1-1']

In [514]:
countries = []

for id in continent_input_ids:
    country_names = list(soup.find(id=id).find_all('b'))

    country_names = [str(item) for item in country_names]
    country_names = [sub('<b>','',item) for item in country_names]
    country_names = [sub('</b>','',item) for item in country_names]
    country_names = [sub('<br/>','',item) for item in country_names]
    country_names = [item+'split_here'+id for item in country_names]
    country_names = [item.split('split_here') for item in country_names]
    
    countries.extend(country_names)

In [519]:
df6 = pd.DataFrame(countries)
df6.columns = ['Country','Continent Input IDs']
df6.head()

Unnamed: 0,Country,Continent Input IDs
0,Egypt,c189440_akkordeon_1-1
1,Ghana,c189440_akkordeon_1-1
2,Senegal,c189440_akkordeon_1-1
3,South Africa,c189440_akkordeon_1-1
4,Argentina,c189440_akkordeon_2-1


In [520]:
pd.merge(df6,df5,how='left',on='Continent Input IDs').drop()

Unnamed: 0,Country,Continent Input IDs,Continent Names,Continent IDs
0,Egypt,c189440_akkordeon_1-1,Africa,c189440_akkordeon_1-titel1
1,Ghana,c189440_akkordeon_1-1,Africa,c189440_akkordeon_1-titel1
2,Senegal,c189440_akkordeon_1-1,Africa,c189440_akkordeon_1-titel1
3,South Africa,c189440_akkordeon_1-1,Africa,c189440_akkordeon_1-titel1
4,Argentina,c189440_akkordeon_2-1,Americas,c189440_akkordeon_2-titel1
5,Brazil,c189440_akkordeon_2-1,Americas,c189440_akkordeon_2-titel1
6,Canada,c189440_akkordeon_2-1,Americas,c189440_akkordeon_2-titel1
7,Chile,c189440_akkordeon_2-1,Americas,c189440_akkordeon_2-titel1
8,Mexico,c189440_akkordeon_2-1,Americas,c189440_akkordeon_2-titel1
9,Peru,c189440_akkordeon_2-1,Americas,c189440_akkordeon_2-titel1


# Exploratory Data Analysis

# Visualizations