In [117]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from pathlib import Path
from functools import reduce

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [118]:
path = Path('./corona_data_sets') # use your path
all_files = list(path.rglob("*.csv"))

In [119]:
d = {}

for p in all_files:
    df = pd.read_csv(p, index_col=None, header=0, converters={'ags2': str, 'ags5': str})
    df = df.drop(['bundesland', '_id'], 1)
    d[p.stem] = df

In [120]:
d["firmenveraenderungen"]

Unnamed: 0,ags2,ags5,kreis,variable,d201901,d201902,d201903,d201904,d201905,d201906,...,d202007,d202008,d202009,d202010,d202011,d202012,d202101,d202102,d202103,d202104
0,01,01001,"Flensburg, Stadt",kr_firm_br_a_m,14,-99,12,9,12,12,...,15,15,15,15,15,15,15,15,15,15
1,01,01002,"Kiel, Landeshauptstadt",kr_firm_br_a_m,36,-99,23,31,34,33,...,34,35,35,35,35,35,35,34,34,35
2,01,01003,"Lübeck, Hansestadt",kr_firm_br_a_m,100,-99,80,80,88,88,...,74,75,74,74,75,73,73,73,74,74
3,01,01004,"Neumünster, Stadt",kr_firm_br_a_m,30,-99,22,23,28,28,...,28,28,28,28,28,29,29,29,29,28
4,01,01051,Dithmarschen,kr_firm_br_a_m,410,-99,362,383,411,413,...,385,383,380,383,388,387,387,387,386,387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10023,16,16074,Saale-Holzland-Kreis,kr_firm_liq_m,4,1,3,2,2,2,...,1,0,2,2,0,0,8,0,3,3
10024,16,16075,Saale-Orla-Kreis,kr_firm_liq_m,4,2,0,2,0,2,...,1,1,1,1,1,0,5,2,1,3
10025,16,16076,Greiz,kr_firm_liq_m,6,4,1,3,1,0,...,2,3,4,3,0,3,2,2,3,2
10026,16,16077,Altenburger Land,kr_firm_liq_m,4,1,1,0,0,0,...,1,1,0,0,1,1,3,0,0,1


## firmenveraenderungen - company changes

In [121]:
company_changes = d["firmenveraenderungen"]
filter_col = [col for col in company_changes if col.startswith('d20')]
company_changes["value"] = company_changes[filter_col].mean(axis=1)
company_changes = company_changes.drop(filter_col, 1)
company_changes['value'] = company_changes['value'].astype(float)
#company_changes = company_changes.pivot_table(index=['ags2','ags5'], columns='variable', values='value')
company_changes


Unnamed: 0,ags2,ags5,kreis,variable,value
0,01,01001,"Flensburg, Stadt",kr_firm_br_a_m,5.821429
1,01,01002,"Kiel, Landeshauptstadt",kr_firm_br_a_m,24.464286
2,01,01003,"Lübeck, Hansestadt",kr_firm_br_a_m,65.535714
3,01,01004,"Neumünster, Stadt",kr_firm_br_a_m,18.428571
4,01,01051,Dithmarschen,kr_firm_br_a_m,359.250000
...,...,...,...,...,...
10023,16,16074,Saale-Holzland-Kreis,kr_firm_liq_m,1.750000
10024,16,16075,Saale-Orla-Kreis,kr_firm_liq_m,1.285714
10025,16,16076,Greiz,kr_firm_liq_m,1.964286
10026,16,16077,Altenburger Land,kr_firm_liq_m,1.214286


In [122]:
#company_changes.pivot_table(index=['ags2','ags5'], columns='variable', values='value')


In [123]:
#company_changes.to_csv('./corona_data_sets_modified/firmenveraenderungen.csv')


## ausbildungssituation - training situation

In [124]:
training_situation = d["ausbildungssituation"]
training_situation.rename(columns={"kr_schuel":"number_of_students"}, inplace=True)
training_situation = training_situation[["ags2","ags5","kreis","number_of_students"]]
training_situation

Unnamed: 0,ags2,ags5,kreis,number_of_students
0,01,01001,"Flensburg, Stadt",11610
1,01,01002,"Kiel, Landeshauptstadt",23898
2,01,01003,"Lübeck, Hansestadt",20478
3,01,01004,"Neumünster, Stadt",10380
4,01,01051,Dithmarschen,13158
...,...,...,...,...
396,16,16073,Saalfeld-Rudolstadt,8772
397,16,16074,Saale-Holzland-Kreis,7190
398,16,16075,Saale-Orla-Kreis,7441
399,16,16076,Greiz,8403


In [125]:
#training_situation.to_csv('./corona_data_sets_modified/training_situation.csv')


## krankenhausdaten - hospital data


In [126]:
hospital_data = d["krankenhausdaten"]
hospital_data.rename(columns={"kr_kh":"number_of_hospitals",
                              "kr_kh_bett": "number_of_hospital_beds", 
                             "kr_kh_bett_ew":"number_of_hospital_beds_adj", 
                             "kr_khp": "hospital_patiants"}, 
                     inplace=True)
hospital_data = hospital_data[["ags2","ags5","kreis","number_of_hospitals","number_of_hospital_beds","number_of_hospital_beds_adj","hospital_patiants"]]
hospital_data

Unnamed: 0,ags2,ags5,kreis,number_of_hospitals,number_of_hospital_beds,number_of_hospital_beds_adj,hospital_patiants
0,01,01001,"Flensburg, Stadt",3,860,9.6,18796
1,01,01002,"Kiel, Landeshauptstadt",16,2498,10.1,53016
2,01,01003,"Lübeck, Hansestadt",13,1975,9.1,53406
3,01,01004,"Neumünster, Stadt",5,693,8.7,19792
4,01,01051,Dithmarschen,3,656,4.9,32195
...,...,...,...,...,...,...,...
396,16,16073,Saalfeld-Rudolstadt,3,994,9.3,30316
397,16,16074,Saale-Holzland-Kreis,3,654,7.9,20517
398,16,16075,Saale-Orla-Kreis,1,120,1.5,22511
399,16,16076,Greiz,2,403,4.1,29220


In [127]:
#hospital_data.to_csv('./corona_data_sets_modified/hospital_data.csv')

## private_finanzen - private finance

In [128]:
d["private_finanzen"]

Unnamed: 0,ags2,ags5,kreis,kr_ein_md,kr_schu_qu,kr_hh_eink_kl1,kr_hh_eink_kl2,kr_hh_eink_kl3,kr_hh_eink_kl4,kr_hh_eink_kl5,kr_hh_eink_kl6,kr_kk_hh,kr_kk_ew
0,01,01001,"Flensburg, Stadt",2986,16.0,13209,12385,13092,5441,3096,3497,34496,19556
1,01,01002,"Kiel, Landeshauptstadt",3304,12.1,22126,38523,39483,18567,11162,7841,35246,19612
2,01,01003,"Lübeck, Hansestadt",3036,15.1,22881,30159,32571,15805,10999,9039,37219,20820
3,01,01004,"Neumünster, Stadt",2842,17.9,7208,13291,10435,3812,3201,2805,38141,19561
4,01,01051,Dithmarschen,2914,12.8,11806,11575,17816,9873,7834,7676,46945,23486
...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,16,16073,Saalfeld-Rudolstadt,2466,8.3,12237,13267,19810,5653,3271,2547,39242,20959
397,16,16074,Saale-Holzland-Kreis,2288,7.6,5131,9216,14445,5254,3446,2379,42956,20631
398,16,16075,Saale-Orla-Kreis,2253,7.6,7388,7994,17082,4532,2501,1632,40511,20610
399,16,16076,Greiz,2233,7.8,11098,10354,18760,5324,3370,2287,40784,21275


In [129]:
private_finance = d["private_finanzen"]
private_finance.rename(columns={"kr_ein_md":"median_income",
                              "kr_kk_hh": "purchasing_power_per_household", 
                              "kr_kk_ew": "purchasing_power_per_person", 
                              "kr_schu_qu":"debtor_quota", 
                             "kr_hh_eink_kl1": "household_in_income_calss_1",
                             "kr_hh_eink_kl2": "household_in_income_calss_2",
                             "kr_hh_eink_kl3": "household_in_income_calss_3",                                
                             "kr_hh_eink_kl4": "household_in_income_calss_4",
                             "kr_hh_eink_kl5": "household_in_income_calss_5",
                             "kr_hh_eink_kl6": "household_in_income_calss_6"}, 
                     inplace=True)
private_finance = private_finance[["ags2","ags5","kreis","median_income","purchasing_power_per_household","purchasing_power_per_person"
                                   ,"debtor_quota","household_in_income_calss_1", "household_in_income_calss_2", "household_in_income_calss_3",
                                  "household_in_income_calss_4", "household_in_income_calss_5", "household_in_income_calss_6"]]
private_finance

Unnamed: 0,ags2,ags5,kreis,median_income,purchasing_power_per_household,purchasing_power_per_person,debtor_quota,household_in_income_calss_1,household_in_income_calss_2,household_in_income_calss_3,household_in_income_calss_4,household_in_income_calss_5,household_in_income_calss_6
0,01,01001,"Flensburg, Stadt",2986,34496,19556,16.0,13209,12385,13092,5441,3096,3497
1,01,01002,"Kiel, Landeshauptstadt",3304,35246,19612,12.1,22126,38523,39483,18567,11162,7841
2,01,01003,"Lübeck, Hansestadt",3036,37219,20820,15.1,22881,30159,32571,15805,10999,9039
3,01,01004,"Neumünster, Stadt",2842,38141,19561,17.9,7208,13291,10435,3812,3201,2805
4,01,01051,Dithmarschen,2914,46945,23486,12.8,11806,11575,17816,9873,7834,7676
...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,16,16073,Saalfeld-Rudolstadt,2466,39242,20959,8.3,12237,13267,19810,5653,3271,2547
397,16,16074,Saale-Holzland-Kreis,2288,42956,20631,7.6,5131,9216,14445,5254,3446,2379
398,16,16075,Saale-Orla-Kreis,2253,40511,20610,7.6,7388,7994,17082,4532,2501,1632
399,16,16076,Greiz,2233,40784,21275,7.8,11098,10354,18760,5324,3370,2287


In [130]:
#private_finance.to_csv('./corona_data_sets_modified/private_finance.csv')

## arbeitsmarktentwicklung - labor market development

In [None]:
labor_market_development = d["arbeitsmarktentwicklung"]
labor_market_development.rename(columns={"kr_ein_md":"median_income",
                              "kr_kk_hh": "purchasing_power_per_household", 
                              "kr_kk_ew": "purchasing_power_per_person", 
                              "kr_schu_qu":"debtor_quota", 
                             "kr_hh_eink_kl1": "household_in_income_calss_1",
                             "kr_hh_eink_kl2": "household_in_income_calss_2",
                             "kr_hh_eink_kl3": "household_in_income_calss_3",                                
                             "kr_hh_eink_kl4": "household_in_income_calss_4",
                             "kr_hh_eink_kl5": "household_in_income_calss_5",
                             "kr_hh_eink_kl6": "household_in_income_calss_6"}, 
                     inplace=True)
labor_market_development = labor_market_development[["ags2","ags5","kreis","median_income","purchasing_power_per_household","purchasing_power_per_person"
                                   ,"debtor_quota","household_in_income_calss_1", "household_in_income_calss_2", "household_in_income_calss_3",
                                  "household_in_income_calss_4", "household_in_income_calss_5", "household_in_income_calss_6"]]
labor_market_development

In [131]:
d["arbeitsmarktentwicklung"]

Unnamed: 0,ags2,ags5,kreis,kr_ka_au_202001,kr_ka_au_202002,kr_ka_au_202003,kr_ka_au_202004,kr_ka_au_202005,kr_ka_au_202006,kr_ka_au_202007,...,kr_rlb_202007,kr_rlb_202008,kr_rlb_202009,kr_rlb_202010,kr_rlb_202011,kr_rlb_202012,kr_rlb_202101,kr_rlb_202102,kr_rlb_202103,kr_rlb_202104
0,01,01001,"Flensburg, Stadt",0,0,185,758,70,21,8,...,11528,11401,11211,11018,10941,10988,11066,11165.082749,11193.617211,11111.999397
1,01,01002,"Kiel, Landeshauptstadt",5,4,344,1812,169,49,22,...,32153,31937,31493,31075,31018,30842,30738,30950.568347,30985.965972,30847.303052
2,01,01003,"Lübeck, Hansestadt",0,0,370,1722,177,41,20,...,24463,24097,23613,23157,23179,23354,23491,23601.855952,23703.798710,23599.499787
3,01,01004,"Neumünster, Stadt",0,0,142,642,55,15,5,...,8997,8893,8847,8730,8707,8667,8838,8870.375277,8954.945551,8825.197032
4,01,01051,Dithmarschen,0,0,181,1046,80,34,15,...,10188,10002,9781,9613,9614,9706,9775,9817.517458,9890.821675,9906.267154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,16,16073,Saalfeld-Rudolstadt,0,5,704,510,81,18,12,...,4985,4934,4824,4750,4751,4721,4727,4793.027079,4938.773207,4762.884313
397,16,16074,Saale-Holzland-Kreis,0,3,441,432,45,14,10,...,3324,3255,3181,3077,3022,2978,2994,3032.964802,3040.304609,3036.686847
398,16,16075,Saale-Orla-Kreis,5,7,348,611,75,17,9,...,3445,3327,3270,3102,3060,3042,3060,3089.952200,3178.729884,3102.771109
399,16,16076,Greiz,3,0,471,738,84,31,20,...,4126,4025,3949,3829,3826,3826,3891,3881.375161,3890.171929,3874.523927
