In [47]:
%matplotlib widget

In [48]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df_pop = pd.read_csv('population.csv', sep = '\t', encoding='ISO-8859-1')
df_pop.set_index('region',inplace=True)
df_pop.drop(labels = ['civilstånd','ålder','kön'], axis =1, inplace=True)
df_pop = df_pop.groupby('region').apply(np.sum, axis = 0)
df_pop.loc['Total population'] = df_pop.apply(np.sum, axis = 0)

In [49]:
df_tax = pd.read_csv('taxation.csv', header = 1,sep = ',', encoding='ISO-8859-1',usecols=list(range(30)))
df_tax.set_index('region',inplace=True)
years = list(range(1992,2021))
df_tax.columns = years
df_tax.loc['Total taxation'] = df_tax.apply(np.sum, axis = 0)
df_tax = df_tax[df_tax.columns[2:-1]]

In [50]:
df_study = pd.read_csv('studies.csv', sep = '\t', encoding='ISO-8859-1')
df_study.set_index('region', inplace=True)
df_study.drop(labels=['ålder', 'kön'], axis =1 , inplace=True)
df_study.rename(mapper={'utbildningsnivå':'education level'},axis= 1, inplace=True)
replacement = {'eftergymnasial utbildning, 3 år eller mer':'College degree, more than 3 years','eftergymnasial utbildning, mindre än 3 år':'College degree, less than 3 years','forskarutbildning':'Researcher','förgymnasial utbildning kortare än 9 år':'Pre-high school education, less than 9 years','förgymnasial utbildning, 9 (10) år':'Pre-high school education, 9 years','gymnasial utbildning, 3 år':'3 years high school','gymnasial utbildning, högst 2 år':'Up to 2 years high school','uppgift om utbildningsnivå saknas':'No info'}
df_study = df_study.groupby('education level').apply(np.sum, axis = 0)
df_study.rename(mapper=replacement,axis= 0, inplace=True)
df_study.drop(labels='education level',inplace=True, axis =1)
df_study = df_study[df_study.columns[4:]]

In [51]:
df_conc = df_pop.loc['Total population']
df_conc = df_conc.to_frame()
df = pd.concat([df_study, df_conc.T], axis = 0)
years = list(range(1994,2020))
df.columns=years

In [52]:
df_conc =  df_tax.iloc[-1]
df_conc  = df_conc.to_frame()
df = pd.concat([df, df_conc.T], axis = 0)
df = df.T
df

Unnamed: 0,"College degree, more than 3 years","College degree, less than 3 years",Researcher,"Pre-high school education, less than 9 years","Pre-high school education, 9 years",3 years high school,Up to 2 years high school,No info,Total population,Total taxation
1994,558884,738639,33965,1151246,931912,847166,1942388,125713,8548733,1025881878
1995,573676,763950,34995,1088759,962841,889211,1899835,111524,8571485,1094690428
1996,589743,786492,36083,1027655,969492,931466,1864989,116429,8580179,1136634171
1997,611036,808380,37377,965522,996521,962020,1835186,106549,8585308,1170078511
1998,631803,826139,38741,912349,1011262,994116,1798957,113692,8593849,1241425072
1999,660142,846876,41619,863332,1025322,1024001,1768378,105466,8603332,1292617035
2000,807121,791186,44868,801088,940118,1107494,1763748,99960,8626554,1383679413
2001,847781,802160,47650,754533,938641,1142297,1746902,106051,8654395,1519683554
2002,891655,819107,49658,713858,949488,1173464,1728115,98651,8687156,1512096653
2003,930142,837377,51550,675213,953411,1205500,1700932,107994,8722796,1533441225


In [53]:
df['High education'] = df[['College degree, more than 3 years',	'College degree, less than 3 years', 'Researcher']].apply(np.sum, axis = 1)

In [54]:
df['Low education']  = df.iloc[:,3:7].apply(np.sum, axis = 1)

In [55]:
df = df.iloc[:,-4:]

In [56]:
df.head()

Unnamed: 0,Total population,Total taxation,High education,Low education
1994,8548733,1025881878,1331488,4872712
1995,8571485,1094690428,1372621,4840646
1996,8580179,1136634171,1412318,4793602
1997,8585308,1170078511,1456793,4759249
1998,8593849,1241425072,1496683,4716684


In [57]:
base_year = 1994
base_pop = df.at[base_year,'Total population']
base_tax = df.at[base_year,'Total taxation']/base_pop
base_lower = df.at[base_year,'Low education']/base_pop
base_higher = df.at[base_year,'High education']/base_pop

In [58]:
series1 = df['Total taxation']/df['Total population']/base_tax
series2 = df['High education']/df['Total population']/base_higher
series3 = df['Low education']/df['Total population']/base_lower

In [59]:
plt.figure(figsize=(10,10))
plt.plot(series1, ':', linewidth =2, color = 'black')
plt.plot(series2)
plt.plot(series3,'-', color = 'red')
sns.despine()
plt.gca().margins(0.0) #stretch to the ends
plt.xlabel('Year')
plt.xticks(ticks = list(range(1994,2020,2)),rotation=45)
plt.ylabel('% variation from 1994 per capita')
plt.gca().fill_between(series3.index,np.ones(len(series3))*0.75,series3,alpha =.2,color = 'pink')
plt.gca().fill_between(series3.index, series2,series3, alpha=.2, color =  'green')
plt.title('Increased taxation effects on education in Sweden', fontweight =500,fontsize = 'xx-large')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Increased taxation effects on education in Sweden')

In [60]:
from matplotlib.font_manager import FontProperties

fontP = FontProperties()
fontP.set_size('medium')
plt.legend(labels = ['Taxation per capita','% Graduated people', '% Undergraduated people'],prop=fontP,bbox_to_anchor=(0.05,0.95), loc='upper left')


<matplotlib.legend.Legend at 0x25e818470a0>