In [22]:
import pandas as pd
import numpy as np
import datetime
import plotly
from plotly import graph_objects as go
pd.set_option('display.max_rows', 100)

%load_ext autoreload

from charts import plot_line, plot_bar, plot_peak, print_charts_country
from features import add_variables_covid, add_variables_apple, join_series_day_since, join_series_date

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [667]:
date_apple = '2020-05-14'

In [488]:
def apple_filter_region(df, region):
    df_region = df[df.region == region].T
    df_region.columns = df_region.loc['transportation_type', :]
    df_region = df_region[~df_region.index.isin(['geo_type', 'region', 'transportation_type', 'alternative_name'])]
    
    df_region['dates'] = pd.to_datetime(df_region.index)
    df_region.set_index('dates', inplace=True)
    df_region = df_region.astype('float')
    df_region['region'] = region
    
    return df_region

In [489]:
def add_forecast_prophet(df_in, column, window=60, ):
    from fbprophet import Prophet
    df = df_in.loc[:, [column]].dropna()
    df['ds'] = df.index
    df.columns = ['y', 'ds']
    m = Prophet()
    m.fit(df)
    future = m.make_future_dataframe(periods=window)
    forecast = m.predict(future)
    forecast.set_index(forecast.ds, inplace=True)
    forecast = forecast.loc[:, ['yhat', 'yhat_lower', 'yhat_upper']]
    df_extra_dates = pd.DataFrame({ 'day': pd.Series([max(df_in.index) + datetime.timedelta(1), max(df_in.index) + datetime.timedelta(window)])})
    df_extra_dates.set_index('day',inplace=True)
    df_extra_dates = df_extra_dates.asfreq('D')
    df_in = df_in.append(df_extra_dates)
    df_result = pd.concat([df_in, forecast ], axis=1)
    df_result[f'{column}_pred'] = df_result.loc[df_result[column].isnull() == True, ['yhat']]
    
    return df_result


In [490]:
def prepare_df_country(df_confirmed, df_dead, country, date_cutoff='2020-03-15'):
    try:
        pop = df_population.loc[df_population.country == country, 'population'].values[0]
    except:
        print('No population data for :', country)
        return False
    
    df = df_confirmed.loc[df_confirmed.index >= date_cutoff, [country]]
    df.columns = ['confirmed']
    df = add_variables_covid(df, population=pop)

    df_d = df_dead.loc[df_dead.index >= date_cutoff, [country]]
    df_d.columns = ['dead']
    df = df.merge(df_d, left_index=True, right_index=True)
    df = add_variables_covid(df, column='dead', population=pop)
    
    return df

In [684]:
def melt_apple_df(dfapple):
    _list = list()
    for region in dfapple.region.unique():
        dfapple_region = apple_filter_region(dfapple, region)
        df = add_variables_apple(dfapple_region)
        _list.append(df)
    ret = pd.concat([df for df in _list])
    return ret

In [492]:
def melt_rki_df(df_rki_germany):
    _list = list()
    for land in df_rki_germany.land.unique():
        df = df_rki_germany.loc[df_rki_germany.land == land, :].copy()
        pop = int(df.loc[df.land==land, 'population'][0])
        df = add_variables_covid(df, 'confirmed', population=pop)
        df = add_variables_covid(df, 'dead', population=pop)
        _list.append(df)
    return pd.concat([df for df in _list])

In [494]:
def melt_jhu_df(df_confirmed, df_dead):
    _list = list()
    for country in df_confirmed.columns:
        df = prepare_df_country(df_confirmed, df_dead, country, date_cutoff='2020-03-15')
        if type(df) == pd.core.frame.DataFrame:
            _list.append(df)
    pd.concat([df for df in _list])

# World Bank Data

In [28]:
import world_bank_data as wb

countries = wb.get_countries()

# population = wb.get_series('SP.POP.TOTL', mrv=1)  # Most recent value
population = wb.get_series('SP.POP.TOTL', id_or_value='id', simplify_index=True, mrv=1)

# Aggregate region, country and population
df_population = countries[['region', 'name']].rename(columns={'name': 'country_wb'}).loc[countries.region != 'Aggregates']
df_population['population'] = population
df_population['country_wb'] = df_population['country_wb'].astype('string')
df_population['iso_code'] = df_population.index
df_population.head(1)

Unnamed: 0_level_0,region,country_wb,population,iso_code
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABW,Latin America & Caribbean,Aruba,105845.0,ABW


In [724]:
date_now = datetime.datetime.strftime(datetime.datetime.utcnow(), "%Y-%m-%d")

dfapple = pd.read_csv(f"../AppleMobilty/applemobilitytrends-{date_apple}.csv")

dfcovid_conf = pd.read_csv("../covid-19-JHU CSSE/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
dfcovid_dead = pd.read_csv("../covid-19-JHU CSSE/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
df_uid = pd.read_csv("../covid-19-JHU CSSE/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv")

df_uid = df_uid.loc[df_uid['Province_State'].isnull() == True, ['iso3', 'Country_Region']]
df_uid.columns = ['iso_code', 'country']

dfcovid_conf.columns = ['state', 'country', 'lat', 'lng'] + list(dfcovid_conf.columns[4:])
dfcovid_dead.columns = ['state', 'country', 'lat', 'lng'] + list(dfcovid_dead.columns[4:])
dfcovid_conf = dfcovid_conf.merge(df_uid, how='outer', on='country', suffixes=('_x', '_y'))
dfcovid_dead = dfcovid_dead.merge(df_uid, how='outer', on='country', suffixes=('_x', '_y'))

df_rki_germany = pd.read_csv("data-RKI-parse/RKI-reports.csv")
df_rki_germany.drop('data', axis=1, inplace=True)

df_population_de = pd.read_csv('german_lander_population.csv')

## load and write Germany GeoJSON

In [622]:
geojson_path =  "../deutschlandGeoJSON/2_bundeslaender/3_mittel.geo.json"
df_geojson = gpd.read_file(geojson_path)
df_geojson.columns = ["iso_code", 'name', 'type','geometry']
df_geojson = df_geojson.loc[:, ["iso_code", 'geometry']]
df_geojson['iso_code'] = df_geojson.iso_code.str.replace('DE-', '')
geojson = json.loads(df_geojson.set_index('iso_code').to_json())
json.dump(geojson, open('data_geo_de.json', 'w'))

In [548]:
dfcovid_conf = dfcovid_conf.loc[dfcovid_conf['state'].isnull() == True, :]
dfcovid_dead = dfcovid_dead.loc[dfcovid_dead['state'].isnull() == True, :]

In [725]:
df_population = df_population.merge(dfcovid_conf.loc[:, ['iso_code', 'country']].drop_duplicates(), how='inner',  on='iso_code',  suffixes=('_x', '_y'), left_index=False, right_index=False,)

In [726]:
df_population.head()

Unnamed: 0,region,country_wb,population,iso_code,country_x,country_y,country_x.1,country_y.1,country
0,South Asia,Afghanistan,37172386.0,AFG,Afghanistan,Afghanistan,Afghanistan,Afghanistan,Afghanistan
1,Sub-Saharan Africa,Angola,30809762.0,AGO,Angola,Angola,Angola,Angola,Angola
2,Europe & Central Asia,Albania,2866376.0,ALB,Albania,Albania,Albania,Albania,Albania
3,Europe & Central Asia,Andorra,77006.0,AND,Andorra,Andorra,Andorra,Andorra,Andorra
4,Middle East & North Africa,United Arab Emirates,9630959.0,ARE,United Arab Emirates,United Arab Emirates,United Arab Emirates,United Arab Emirates,United Arab Emirates


### Transpose Confirmed Cases

In [553]:
dfcovid_conf_t = dfcovid_conf.loc[dfcovid_conf['state'].isnull() == True, :].T
dfcovid_conf_t.columns = dfcovid_conf_t.loc['country', :].astype(str)
dfcovid_conf_t.drop(['country'], axis=0, inplace=True)
df_country_coords = dfcovid_conf_t.loc[['lat', 'lng'], :]
dfcovid_conf_t.drop(['lat', 'lng', 'state', 'iso_code'], axis=0, inplace=True)
dfcovid_conf_t['dates'] = dfcovid_conf_t.index
dfcovid_conf_t['dates'] = dfcovid_conf_t['dates'].astype('datetime64[ns]')
dfcovid_conf_t = dfcovid_conf_t.sort_values('dates', ascending=True)
dfcovid_conf_t.set_index('dates', inplace=True)
dfcovid_conf_t = dfcovid_conf_t.astype(float)

### Transpose Dead Cases

In [554]:
dfcovid_dead_t = dfcovid_dead.loc[dfcovid_dead['state'].isnull() == True, :].T
dfcovid_dead_t.columns = dfcovid_dead_t.loc['country', :].astype(str)
dfcovid_dead_t.drop(['country'], axis=0, inplace=True)


dfcovid_dead_t.drop(['lat', 'lng', 'state', 'iso_code'], axis=0, inplace=True)
dfcovid_dead_t['dates'] = dfcovid_dead_t.index
dfcovid_dead_t['dates'] = dfcovid_dead_t['dates'].astype('datetime64[ns]')
dfcovid_dead_t = dfcovid_dead_t.sort_values('dates', ascending=True)
dfcovid_dead_t.set_index('dates', inplace=True)
dfcovid_dead_t = dfcovid_dead_t.astype(float)


# Prepare Datasets

### RKI

In [727]:
df_rki_germany = df_rki_germany.merge(df_population_de, 
                                            how='inner',
                                            left_on='land',
                                            right_on='name',
                                            left_index=False,
                                            right_index=False,
                                            suffixes=('_x', '_y'),)

df_rki_germany['date'] = df_rki_germany['date'].astype('datetime64[ns]')
df_rki_germany = df_rki_germany.sort_values('date', ascending=True)
df_rki_germany.set_index('date', inplace=True, drop=False)

df_rki_germany_processed = melt_rki_df(df_rki_germany)

In [728]:
df_rki_germany_processed.columns

Index(['land', 'confirmed', 'dead', 'date', 'name', 'iso_code', 'capital',
       'area', 'population', 'dow', 'weekend', 'confirmed_avg3',
       'confirmed_change', 'confirmed_change_avg3', 'confirmed_change_3w',
       'confirmed_change_pct', 'confirmed_change_pct_avg3',
       'confirmed_change_pct_3w', 'confirmed_change_acceleration',
       'confirmed_change_acceleration_avg3', 'confirmed_doubling_days',
       'confirmed_doubling_days_3w', 'confirmed_doubling_days_avg3',
       'confirmed_doubling_days_3w_avg3', 'confirmed_active_cases',
       'confirmed_peak', 'confirmed_day_since_10', 'confirmed_per_100k',
       'confirmed_change_per_100k', 'dead_avg3', 'dead_change',
       'dead_change_avg3', 'dead_change_3w', 'dead_change_pct',
       'dead_change_pct_avg3', 'dead_change_pct_3w',
       'dead_change_acceleration', 'dead_change_acceleration_avg3',
       'dead_doubling_days', 'dead_doubling_days_3w',
       'dead_doubling_days_avg3', 'dead_doubling_days_3w_avg3',
       'd

### Apple

In [698]:
df_apple_processed = melt_apple_df(dfapple)
apple_lands = {'Baden-Württemberg': 'Baden-Wuerttemberg',
               'The Free Hanseatic City of Bremen':'Bremen',
               'Mecklenburg-Vorpommern': 'Mecklenburg-Western Pomerania'
              }
df_apple_processed['region'] = df_apple_processed['region'].apply(lambda x: apple_lands.get(x) if apple_lands.get(x) is not None else x)

In [738]:
df_apple_processed_de = df_apple_processed.loc[df_apple_processed.region.isin(df_rki_germany_processed.land.unique()), ['region', 'driving', 'walking', 'transit']]
df_apple_processed_de['date'] = df_apple_processed_de.index
df_apple_processed_de = df_apple_processed_de.rename(columns={'region': 'land'})

In [739]:
df_apple_processed.head(1)

Unnamed: 0_level_0,driving,walking,region,walking_avg3,walking_l1,walking_l6,change_walking_l6,change_walking,driving_avg3,driving_l1,driving_l6,change_pct_driving_l6,change_driving,transit,transit_avg3,transit_l1,transit_l6,change_transit_l6,change_transit
dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2020-01-13,100.0,100.0,Albania,,,,,,,,,,,,,,,,


In [740]:
df_apple_processed_de.head(1)

Unnamed: 0_level_0,land,driving,walking,transit,date
dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-13,Berlin,100.0,100.0,100.0,2020-01-13


### JHU

In [671]:
df_jhu_processed = melt_jhu_df(dfcovid_conf_t, dfcovid_dead_t)

In [672]:
df_jhu_processed.head(1)

In [515]:
print("Mobility max date:", max(df_apple_processed.index))
print("RKI max date", max(df_rki_germany_processed.index))
print("Covi world max date", max(dfcovid_conf_t.index))
print("Covi world max date", max(dfcovid_dead_t.index))

Mobility max date: 2020-05-10 00:00:00
RKI max date 2020-05-14 00:00:00
Covi world max date 2020-05-12 00:00:00
Covi world max date 2020-05-12 00:00:00


# Save processed data

In [731]:
df_rki_germany_processed.to_csv('data_rki_prepared.csv')
df_apple_processed.to_csv('data_apple_prepared.csv')

DASH_COLUMNS = ['land', 'date','iso_code', 'confirmed_change', 'confirmed', 'confirmed_active_cases', 'confirmed_change_per_100k', 
                'confirmed_change_pct_3w', 'confirmed_doubling_days_3w_avg3', 'dead_change', 'dead', 'dead_change_per_100k', 'dead_doubling_days']
df_rki_germany_processed.loc[:, DASH_COLUMNS].to_csv('data_rki_prepared_dash.csv')
df_apple_processed_de.to_csv('data_apple_prepared_de.csv')

----

----

----

In [730]:
df_rki_germany_processed.columns

Index(['land', 'confirmed', 'dead', 'date', 'name', 'iso_code', 'capital',
       'area', 'population', 'dow', 'weekend', 'confirmed_avg3',
       'confirmed_change', 'confirmed_change_avg3', 'confirmed_change_3w',
       'confirmed_change_pct', 'confirmed_change_pct_avg3',
       'confirmed_change_pct_3w', 'confirmed_change_acceleration',
       'confirmed_change_acceleration_avg3', 'confirmed_doubling_days',
       'confirmed_doubling_days_3w', 'confirmed_doubling_days_avg3',
       'confirmed_doubling_days_3w_avg3', 'confirmed_active_cases',
       'confirmed_peak', 'confirmed_day_since_10', 'confirmed_per_100k',
       'confirmed_change_per_100k', 'dead_avg3', 'dead_change',
       'dead_change_avg3', 'dead_change_3w', 'dead_change_pct',
       'dead_change_pct_avg3', 'dead_change_pct_3w',
       'dead_change_acceleration', 'dead_change_acceleration_avg3',
       'dead_doubling_days', 'dead_doubling_days_3w',
       'dead_doubling_days_avg3', 'dead_doubling_days_3w_avg3',
       'd

In [712]:
df_apple_processed_de.index.min()

Timestamp('2020-01-13 00:00:00')

In [743]:
df_rki_germany_processed

Unnamed: 0_level_0,land,confirmed,dead,date,name,iso_code,capital,area,population,dow,...,dead_change_pct_3w,dead_change_acceleration,dead_change_acceleration_avg3,dead_doubling_days,dead_doubling_days_3w,dead_doubling_days_avg3,dead_doubling_days_3w_avg3,dead_day_since_10,dead_per_100k,dead_change_per_100k
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-18,Hesse,432,0,2020-03-18,Hesse,HE,Wiesbaden,21115,6265809,2,...,,,,0.000,0.000,,,0,0.000,
2020-03-19,Hesse,682,0,2020-03-19,Hesse,HE,Wiesbaden,21115,6265809,3,...,,,,0.000,0.000,,,0,0.000,0.000
2020-03-20,Hesse,813,1,2020-03-20,Hesse,HE,Wiesbaden,21115,6265809,4,...,,-inf,,0.000,0.000,0.0,0.0,0,0.016,0.016
2020-03-21,Hesse,1080,2,2020-03-21,Hesse,HE,Wiesbaden,21115,6265809,5,...,,0.000,,1.000,0.000,0.0,0.0,0,0.032,0.016
2020-03-22,Hesse,1175,2,2020-03-22,Hesse,HE,Wiesbaden,21115,6265809,6,...,,1.000,0.000,100.000,0.000,26.0,0.0,0,0.032,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-11,Baden-Wuerttemberg,33190,1545,2020-05-11,Baden-Wuerttemberg,BW,Stuttgart,35677,11069533,0,...,0.005,0.625,0.375,100.000,100.000,89.0,58.0,51,13.957,0.027
2020-05-12,Baden-Wuerttemberg,33359,1568,2020-05-12,Baden-Wuerttemberg,BW,Stuttgart,35677,11069533,1,...,0.041,-6.667,0.100,46.907,17.311,87.0,68.0,52,14.165,0.208
2020-05-13,Baden-Wuerttemberg,33518,1594,2020-05-13,Baden-Wuerttemberg,BW,Stuttgart,35677,11069533,2,...,0.048,-0.130,-1.111,42.148,14.660,59.0,37.0,53,14.400,0.235
2020-05-14,Baden-Wuerttemberg,33670,1608,2020-05-14,Baden-Wuerttemberg,BW,Stuttgart,35677,11069533,3,...,0.026,0.462,-0.158,79.266,26.635,53.0,18.0,54,14.526,0.126


In [744]:
df_rki_germany_processed.index.name = None
df_apple_processed_de.merge(df_rki_germany_processed, on=['date', 'land'], how='right')

Unnamed: 0,land,driving,walking,transit,date,confirmed,dead,name,iso_code,capital,...,dead_change_pct_3w,dead_change_acceleration,dead_change_acceleration_avg3,dead_doubling_days,dead_doubling_days_3w,dead_doubling_days_avg3,dead_doubling_days_3w_avg3,dead_day_since_10,dead_per_100k,dead_change_per_100k
0,Berlin,61.48,49.84,37.04,2020-03-18,391,0,Berlin,BE,Berlin,...,,,,0.000,0.000,,,0,0.000,
1,Berlin,54.14,38.72,29.56,2020-03-19,573,0,Berlin,BE,Berlin,...,,,,0.000,0.000,,,0,0.000,0.000
2,Berlin,47.77,33.53,24.92,2020-03-20,731,0,Berlin,BE,Berlin,...,,,,0.000,0.000,0.0,0.0,0,0.000,0.000
3,Berlin,39.78,33.29,21.68,2020-03-21,866,1,Berlin,BE,Berlin,...,,-inf,,0.000,0.000,0.0,0.0,0,0.027,0.027
4,Berlin,39.65,33.19,21.06,2020-03-22,1024,1,Berlin,BE,Berlin,...,,1.000,,100.000,0.000,25.0,0.0,0,0.027,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,Berlin,,,,2020-05-15,6397,177,Berlin,BE,Berlin,...,0.059,-0.333,-0.333,30.324,12.127,33.0,13.0,47,4.856,0.110
940,Bavaria,,,,2020-05-15,45143,2260,Bavaria,BY,München,...,0.041,-0.550,0.040,50.185,17.181,65.0,24.0,56,17.283,0.237
941,Mecklenburg-Western Pomerania,,,,2020-05-15,740,20,Mecklenburg-Western Pomerania,MV,Schwerin,...,0.000,,,100.000,100.000,100.0,100.0,38,1.242,0.000
942,Thuringia,,,,2020-05-15,2681,135,Thuringia,TH,Erfurt,...,0.057,0.200,-0.200,23.045,12.473,17.0,9.0,40,6.299,0.187


In [3]:
import pandas as pd
import geopandas as gpd
import json
from features import add_variables_covid, add_variables_apple

date_apple = '2020-05-14'


def apple_filter_region(df, region):
    df_region = df[df.region == region].T
    df_region.columns = df_region.loc['transportation_type', :]
    df_region = df_region[~df_region.index.isin(['geo_type', 'region', 'transportation_type', 'alternative_name'])]

    df_region['dates'] = pd.to_datetime(df_region.index)
    df_region.set_index('dates', inplace=True)
    df_region = df_region.astype('float')
    df_region['region'] = region

    return df_region


def melt_apple_df(dfapple):
    _list = list()
    for region in dfapple.region.unique():
        dfapple_region = apple_filter_region(dfapple, region)
        df = add_variables_apple(dfapple_region)
        _list.append(df)
    ret = pd.concat([df for df in _list])
    return ret


def melt_rki_df(df_rki_germany):
    _list = list()
    for land in df_rki_germany.land.unique():
        df = df_rki_germany.loc[df_rki_germany.land == land, :].copy()
        pop = int(df.loc[df.land==land, 'population'][0])
        df = add_variables_covid(df, 'confirmed', population=pop)
        df = add_variables_covid(df, 'dead', population=pop)
        _list.append(df)
    return pd.concat([df for df in _list])


# ============================== LOAD DATA ==============================
df_rki_germany = pd.read_csv("data-RKI-parse/RKI-reports.csv")
df_population_de = pd.read_csv('german_lander_population.csv')
geojson_path = "../deutschlandGeoJSON/2_bundeslaender/3_mittel.geo.json"
dfapple = pd.read_csv(f"../AppleMobilty/applemobilitytrends-{date_apple}.csv")

# ============================== PREPARE LOADED DATA ==============================
df_rki_germany.drop('data', axis=1, inplace=True)

df_geojson = gpd.read_file(geojson_path)
df_geojson.columns = ["iso_code", 'name', 'type','geometry']
df_geojson = df_geojson.loc[:, ["iso_code", 'geometry']]
df_geojson['iso_code'] = df_geojson.iso_code.str.replace('DE-', '')
geojson = json.loads(df_geojson.set_index('iso_code').to_json())
json.dump(geojson, open('data_geo_de.json', 'w'))

# ============================== JOIN RKI REPORT AND POPULATION ==============================

df_rki_germany = df_rki_germany.merge(df_population_de,
                                        how='inner',
                                        left_on='land',
                                        right_on='name',
                                        left_index=False,
                                        right_index=False,
                                        suffixes=('_x', '_y'),)

df_rki_germany['date'] = df_rki_germany['date'].astype('datetime64[ns]')
df_rki_germany = df_rki_germany.sort_values('date', ascending=True)
df_rki_germany.set_index('date', inplace=True, drop=False)

# ============================== PROCESS RKI REPORT FOR EACH COUNTRY ==============================
df_rki_germany_processed = melt_rki_df(df_rki_germany)
print("RKI max date", max(df_rki_germany_processed.index))

# ============================== PROCESS APPLE DATA FOR EACH REGION ==============================
df_apple_processed = melt_apple_df(dfapple)
apple_lands = {'Baden-Württemberg': 'Baden-Wuerttemberg',
               'The Free Hanseatic City of Bremen':'Bremen',
               'Mecklenburg-Vorpommern': 'Mecklenburg-Western Pomerania'
              }
df_apple_processed['region'] = df_apple_processed['region'].apply(lambda x: apple_lands.get(x) if apple_lands.get(x) is not None else x)

df_apple_processed_de = df_apple_processed.loc[df_apple_processed.region.isin(df_rki_germany_processed.land.unique()), ['region', 'driving', 'walking', 'transit']]
df_apple_processed_de['date'] = df_apple_processed_de.index
df_apple_processed_de = df_apple_processed_de.rename(columns={'region': 'land'})

# ============================== SAVE DATA ==============================
# # RKI
# df_rki_germany_processed.to_csv('data_rki_prepared.csv')


RKI max date 2020-05-15 00:00:00


In [22]:
DASH_COLUMNS = ['land', 'date', 'iso_code', 'confirmed_change', 'confirmed', 'confirmed_active_cases', 'confirmed_change_per_100k',
                'confirmed_change_pct_3w', 'confirmed_doubling_days_3w_avg3', 'dead_change', 'dead', 'dead_change_per_100k', 'dead_doubling_days']


df_rki_germany_processed_dash = df_rki_germany_processed.loc[:, DASH_COLUMNS]
# df_rki_germany_processed_dash.to_csv('data_rki_prepared_dash.csv')

# # APPLE
# df_apple_processed.to_csv('data_apple_prepared.csv')
# df_apple_processed_de.to_csv('data_apple_prepared_de.csv')

# RKI & APPLE
df_rki_germany_processed_dash.index.name = None
df_rki_de_apple = df_apple_processed_de.merge(df_rki_germany_processed_dash, on=['date', 'land'], how='right')
# df_rki_de_apple.loc[:, DASH_COLUMNS].to_csv('data_rki_apple_prepared_dash.csv')



In [23]:
df_rki_de_apple.set_index('date', inplace=True, drop=False)
df_rki_de_apple.loc[(df_rki_de_apple.date == '2020-05-12') & (df_rki_de_apple.land == 'Hamburg'), ['driving']].index[0]

Timestamp('2020-05-12 00:00:00')

In [40]:
df.count()

land                               944
driving                            944
walking                            118
transit                            118
date                               944
iso_code                           944
confirmed_change                   928
confirmed                          944
confirmed_active_cases             752
confirmed_change_per_100k          928
confirmed_change_pct_3w            592
confirmed_doubling_days_3w_avg3    912
dead_change                        928
dead                               944
dead_change_per_100k               928
dead_doubling_days                 944
dtype: int64

In [36]:
df = df_rki_de_apple.copy()

In [38]:
for l in df.land.unique():
    df.loc[(df_rki_de_apple.land == l), ['driving', 'walking', 'transit']] = df.loc[ (df_rki_de_apple.land == l), ['driving', 'walking', 'transit']].fillna(method='ffill')

In [39]:
df.loc[ (df.land == 'Bremen'), ['driving', 'transit']]

Unnamed: 0_level_0,driving,transit
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-18,57.14,
2020-03-19,54.01,
2020-03-20,49.99,
2020-03-21,40.06,
2020-03-22,39.18,
2020-03-23,49.15,
2020-03-24,47.4,
2020-03-25,47.49,
2020-03-26,50.28,
2020-03-27,51.82,
