In [3]:
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
slash = '/'
df = pd.read_csv('..'+slash+'data'+slash+'df.csv',sep=';',index_col=0)

In [4]:
f, (ax2) = plt.subplots(1, 1)#, sharey=True)
#f.figsize=(6,4)
plt.subplots_adjust(top=0.85,bottom=0.2)
language = 'german'# 'eng' or german
if language == 'eng':
    title_left = 'Overindebtedness 2019\nvs. Voteshare'
    ylabel_left = 'Overindebtedness'
    xlabel_left = 'AfD result in %'
    title_right = 'Change in Overindebtedness\nvs. Voteshare'
    ylabel_right = 'Change of Overindebtedness'
    xlabel_right = 'Change of AfD result in %'
    figname = 'figures\\Overindebtedness and AfD-electoral results.png'
    txt = """Change of percentage of overindebted adults in Germany from 2013 to 2018 
    (source: creditreform) against change in AfD's electoral results European 
    Elections 2014 and 2019 in percent (source: Bundeswahlleiter)"""
if language == 'german':
    title_left = 'Ãœberschuldung \nvs. EU-Wahlergebnis 2019'
    ylabel_left = 'Ãœberschuldung'
    xlabel_left = 'AfD-Ergebnis in %'
    title_right = 'VerÃ¤nderung Ãœberschuldung \nvs. EU-Wahlergebnis 2019'
    ylabel_right = 'VerÃ¤nderung Ãœberschuldung in %'
    xlabel_right = 'VerÃ¤nderung des AfD-Ergebnisses in %'
    figname = 'figures\\Ãœberschuldung und AfD Wahlergebnis Europawahl 2019.png'
    txt = """VerÃ¤nderung des Anteils Ã¼berschuldeter Erwachsener in Deutschland von 2013 
    bis 2018 (Quelle: creditreform) vs. VerÃ¤nderung des AfD Stimmanteils bei den
    Europawahlen 2013 und 2019 (Quelle: Bundeswahlleiter) auf Kreisebene. 
    Die Linien sind Kleinste-Quadrate SchÃ¤tzungen separat fÃ¼r Ost und West"""

#ax1.set_title(title_left)
#ax1.scatter( df.afd_val19,df['2018'],color='black',alpha=0.5)
#ax1.set_ylabel(ylabel_left)
#ax1.set_xlabel(xlabel_left)
fontsize_labels = 12

ax2.set_title(title_right, fontsize = (fontsize_labels+2))
ax2.scatter(df[df.east==1].vot19_14,
            df[df.east==1].ove18_13, label=('West'),alpha=0.5 )
ax2.scatter(df[df.east==0].vot19_14, 
            df[df.east==0].ove18_13, label=('Ost'),alpha=0.5 )
ax2.axhline(color='black',lw=0.5)
ax2.set_ylabel(ylabel_right, fontsize=fontsize_labels)
ax2.set_xlabel(xlabel_right, fontsize=fontsize_labels)
ax2.legend()
#plt.subplots_adjust(wspace=0.4)

f.text(.5, .05, txt, ha='center', fontsize=fontsize_labels)

# resize the figure to match the aspect ratio of the Axes    
f.set_size_inches(7, 8, forward=True)


# estimate slopes
y_west = df[df.east==1].vot19_14
y_east = df[df.east==0].vot19_14
x_west = np.array(df[df.east==1].ove18_13.values).reshape(-1, 1)
x_east = np.array(df[df.east==0].ove18_13.values).reshape(-1, 1)

reg_west = LinearRegression().fit(x_west, y_west)


y_hat_west = reg_west.predict(x_west)

ax2.plot(y_hat_west,x_west,color='grey')

reg_east = LinearRegression().fit(x_east, y_east)

y_hat_east = reg_east.predict(x_east)

ax2.plot(y_hat_east,x_east,color='grey')

<IPython.core.display.Javascript object>

AttributeError: 'DataFrame' object has no attribute 'east'

In [92]:
print('Score East-Germany: ',reg_east.score(x_east, y_east))
print('Score West-Germany: ',reg_west.score(x_west, y_west))

Score East-Germany:  0.10402299668939365
Score West-Germany:  0.3831140133080897


# Plotting maps

There is some useful information on changes of regions over time (in German) https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/_inhalt.html.

In [8]:
# installing geopandas on a 32-bit, windows machine and python 3.7 was unexpectedly cumbersome. 
# downloading and installing spacely, fiona, gdal, rtree and pyproj wheels and pip installing 
# it worked. As 3.01 didn't work, using 2.4.1, pip install --upgrade setuptools and Microsoft 
# Build Tools for Visual Studio 2019 (https://wiki.python.org/moin/WindowsCompilers)
# the did the trick.
import geopandas
import descartes
from mpl_toolkits.axes_grid1 import make_axes_locatable


df_geo = geopandas.read_file('..'+slash+'data'+slash+'geodata'+slash+'vg2500_krs.shx')
# key for merge with df is RS
df_geo.RS = df_geo.RS.str.replace(r'^0','').astype(int)
df_geo =df_geo.to_crs({'init':'epsg:31467'})
df_geo.plot(column='RS')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a23a05ac8>

In [9]:
# gÃ¶ttingen (3152) and osterode (3156) are now one region
df_geo.RS[df_geo.RS==3156] = 3159
df_geo.RS[df_geo.RS==3152] = 3159
df_geo = df_geo.dissolve(by='RS')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [11]:
df_geo = df_geo.merge(df, left_index=True, right_on='Nr',how='left')
# change projection for "usual" aspect of maps of Germany
df_geo =df_geo.to_crs({'init':'epsg:31467'})

df_geo.head()

Unnamed: 0_level_0,geometry,USE,RS_ALT,GEN,SHAPE_LENG,SHAPE_AREA,region,subregion,state,vot19_14,...,f_crime_2015,total_suspects_2014,foreign_suspects_2014,f_crime_2014,total_suspects_2013,foreign_suspects_2013,f_crime_2013,total_suspects_2012,foreign_suspects_2012,f_crime_2012
Nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,"POLYGON ((3531551.016 6077191.723, 3532515.909...",4,10010000000,Flensburg,32467.034276,53248990.0,"Flensburg, Stadt",1.0,Schleswig-Holstein,-0.003027,...,32.5,233.0,66.0,28.3,207.0,45.0,21.7,277.0,42.0,15.2
1002,"POLYGON ((3577409.674 6029439.547, 3576962.967...",4,10020000000,Kiel,73027.302434,121865400.0,"Kiel, Landeshauptstadt",1.0,Schleswig-Holstein,0.060316,...,26.4,463.0,80.0,17.3,439.0,113.0,25.7,490.0,95.0,19.4
1003,"POLYGON ((3624323.897 5982346.682, 3625006.427...",4,10030000000,LÃ¼beck,104861.09422,221138600.0,"LÃ¼beck, Hansestadt",1.0,Schleswig-Holstein,0.551817,...,23.4,512.0,109.0,21.3,509.0,85.0,16.7,526.0,101.0,19.2
1004,"POLYGON ((3567698.625 6000346.812, 3566961.127...",4,10040000000,NeumÃ¼nster,39858.818376,80923630.0,"NeumÃ¼nster, Stadt",1.0,Schleswig-Holstein,2.392481,...,31.6,241.0,59.0,24.5,241.0,54.0,22.4,247.0,45.0,18.2
1051,"MULTIPOLYGON (((3479613.059 5992136.472, 34799...",4,10510000000,Dithmarschen,202206.675707,1450311000.0,Dithmarschen,1.0,Schleswig-Holstein,3.374651,...,12.2,167.0,19.0,11.4,151.0,25.0,16.6,227.0,23.0,10.1


In [14]:
fig, (ax1) = plt.subplots(1, 1)

#plot.title()
ax1.title.set_text('Prozentuale VerÃ¤nderung der Hartz IV EmpfÃ¤nger')
divider = make_axes_locatable(ax1)
cax = divider.append_axes("right", size="5%", pad=0.1)
df_geo.plot(column='hartz_total_2018_2013', ax=ax1, legend=True, cax=cax)
ax1.axes.get_xaxis().set_ticks([])
ax1.axes.get_yaxis().set_ticks([])
ax1.axis('off')


fig, (ax2) = plt.subplots(1, 1)
ax2.title.set_text('% Ã¼berschuldung 2018')
divider = make_axes_locatable(ax2)
cax = divider.append_axes("right", size="5%", pad=0.1)
df_geo.plot(column='debt_2018', ax=ax2, legend=True, cax=cax)
ax2.axes.get_xaxis().set_ticks([])
ax2.axes.get_yaxis().set_ticks([])
ax2.axis('off')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

(3248294.5408406756, 3953310.382441411, 5194242.485414251, 6146624.286540029)

In [112]:
fig, (ax1) = plt.subplots(1, 1)
plt.subplots_adjust(top=0.85,bottom=0.2)
txt="""Quelle: Statistische Ã„mter des Bundes und der LÃ¤nder, Regionaldatenbank 
Deutschland. GÃ¶ttingen ist aufgrund einer Gebietszusammenlegung mit 
Osterrode nicht direkt vergleichbar und 0 gesetzt. Grafik: Dirk Ulbricht
"""
#plot.title()
ax1.title.set_text('% of overindebted adults\nChange from 2014 to 2018')
ax1.title.set_text('VerÃ¤nderung der Anzahl der Hartz IV EmpfÃ¤nger\n 2013 auf 2018 je tsd. Einwohner')
divider = make_axes_locatable(ax1)
cax = divider.append_axes("right", size="5%", pad=0.1)
df_geo.plot(column='hartz_total_2018', ax=ax1, legend=True, cax=cax,cmap='RdYlGn_r')
ax1.axes.get_xaxis().set_ticks([])
ax1.axes.get_yaxis().set_ticks([])
ax1.axis('off')
fig.text(0.5, .06, txt, ha='center', fontsize=8)
# resize the figure to match the aspect ratio of the Axes    
#fig.set_size_inches(7, 8, forward=True)

<IPython.core.display.Javascript object>

Text(0.5, 0.06, 'Quelle: Statistische Ã„mter des Bundes und der LÃ¤nder, Regionaldatenbank \nDeutschland. GÃ¶ttingen ist aufgrund einer Gebietszusammenlegung mit \nOsterrode nicht direkt vergleichbar und 0 gesetzt. Grafik: Dirk Ulbricht\n')

In [113]:
df.iloc[:,df.columns.str.contains('hartz')].sort_values(by='hartz_total_2018_2013')

Unnamed: 0_level_0,hartz_total_2018,hartz_no_empl_2018,hartz_foreign_2018,hartz_total_2018_2013,hartz_no_empl_2018_2013,hartz_total_2013,hartz_no_empl_2013
Nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
9471,18.9,25.1,34.2,-80.434783,-12.543554,17.6,29.9
9371,22.6,26.7,37.2,-74.776786,-9.797297,21.9,28.9
12069,36.1,23.1,15.2,-70.769231,-0.858369,54.0,23.8
9571,15.0,27.4,33.9,-69.939880,-7.744108,19.5,33.2
8235,32.0,27.4,41.6,-63.636364,-17.220544,29.9,28.8
16063,39.1,23.6,14.7,-62.973485,-10.943396,48.2,25.3
16066,41.7,23.8,20.7,-62.398557,-5.179283,56.1,24.7
8225,33.1,26.4,35.7,-61.645423,-8.333333,30.5,28.0
9271,25.5,26.1,26.9,-61.128049,-6.451613,31.3,28.6
16061,40.3,24.5,24.4,-60.412574,-13.732394,50.3,25.6


In [114]:
fig, (ax1) = plt.subplots(1, 1)

#plot.title()
ax1.title.set_text('% of overindebted adults\nChange from 2013 to 2018')
ax1.title.set_text('VerÃ¤nderung der Anzahl der Hartz IV EmpfÃ¤nger\n 2013 auf 2018 je tsd. Einwohner')
divider = make_axes_locatable(ax1)
cax = divider.append_axes("right", size="5%", pad=0.1)
df_geo['Hartz_VerÃ¤nderung']=(df_geo.hartz_total_2018 - df_geo.hartz_total_2013)
df_geo.plot(column='Hartz_VerÃ¤nderung', ax=ax1, legend=True, cax=cax,cmap='RdYlGn_r')
ax1.axes.get_xaxis().set_ticks([])
ax1.axes.get_yaxis().set_ticks([])
ax1.axis('off')


<IPython.core.display.Javascript object>

(3248292.344351501, 3953309.37174341, 5194242.9478586465, 6146626.145265488)