In [45]:
import pandas as pd

In [46]:
personen=pd.read_csv("C:\Daten\Daten\MiD2023_B1_Datensatzpaket\CSV\MiD2023_Personen.csv")

Read the Data from the 2023 MID
- Filter for RegioStar only urban regions
- Filter for above 18
- Filter for Employed
- Filter for Home Office Days mentioned


In [47]:
print("Dropped {} Personen living in very small municipalities.".format(
    personen['RegioStaR17'].astype(str).str.endswith('5').sum()
))
personen=personen[~personen['RegioStaR17'].astype(str).str.endswith('5')]
# Filter for Alter
print("Dropped {} Personen younger than 18.".format(
    (personen['HP_ALTER']<18).sum() 
))
personen=personen[personen['HP_ALTER']>=18]
# Filter for employment status
print("Dropped {} Personen not employed.".format(
    (~personen['taet'].isin([1,2])).sum()
))
personen=personen[personen['taet'].isin([1,2])]
# Filter for home office question
print("Dropped {} Personen with no home office information.".format(
    (~personen['P_HOFF2'].isin([0,1,2,3,4,5,6,7,96,99,412])).sum()
))
personen=personen[personen['P_HOFF2'].isin([0,1,2,3,4,5,6,7,96,99,412])]

Dropped 60773 Personen living in very small municipalities.
Dropped 44515 Personen younger than 18.
Dropped 122690 Personen not employed.
Dropped 133268 Personen with no home office information.


Mapping to Korean Metrics
- home_province dropped
- home_administrative dropped
- sex = sex (Minor Mappings)
- age = age (Mapping)
- housetype = haustype (!! Not equal but currently not important/used)
- driver_license = P_FS_PKW
- drive regularly = P_NUTZ_AUTO mapped
- fixed_workplace = P_ARB_VM1 mapped
- occupancy = Does not have an equivalent which is a big problem!
- home_office_days = P_HOFF2 mapped
- car_group = P_VAUTO
- bycicle Group =vpedrad
- other group = dropped as not relevant


In [48]:
personen.HP_SEX=personen.HP_SEX.map({
    1:1,
    2:2,
    3:2,
    4:2
})
def map_age(age):
    if age<25:
        return 1
    elif age<35:
        return 2
    elif age<45:
        return 3
    elif age<55:
        return 4
    elif age<65:
        return 5
    else:
        return 6
personen.HP_ALTER=personen.HP_ALTER.map(map_age)
personen.P_FS_PKW=personen.P_FS_PKW.map({
    1:1,
    2:2,
    3:2,    
})
def map_drive_regularly(value):
    if value < 3:
        return 1
    else:
        return 2
personen.P_NUTZ_AUTO=personen.P_NUTZ_AUTO.map(map_drive_regularly)
personen['P_ARB_VM1'] = personen.loc[:,'P_ARB_VM1'].apply(lambda x: 1 if x not in [96,99,404] else 2)
def map_home_office(value):
    if value >5:
        return 1
    elif value ==5:
        return 2
    elif value in [3,4]:
        return 3
    elif value in [1,2]:
        return 4
    elif value == 96:
        return 4
    elif value == 99:
        return 5
    elif value == 412:
        return 5
personen['P_HOFF2'] = personen.loc[:,'P_HOFF2'].apply(map_home_office)
personen['P_VAUTO']=personen['P_VAUTO'].replace({9:2})
personen['vpedrad']=personen['vpedrad'].replace({2:1,3:1,4:2,9:2,402:2})



In [49]:
personen_synth=personen.loc[:,['HP_SEX','HP_ALTER','haustyp','P_FS_PKW','P_NUTZ_AUTO','P_ARB_VM1','P_TAET','P_HOFF2','P_VAUTO','vpedrad','arb_vm2']]
personen_synth.columns=['sex','age','housetype','driver_license','drive_regularly','commute_to_fixed_workplace','occupation','home_office_days','car_group','bicycle_group','mode_choice']

In [50]:
personen_synth.to_csv('personen_synth_germany.csv', index=False)