In [130]:
import pandas as pd
df = pd.read_excel('../../data/interim/iva_kumulativ.xlsx')
df = df[df['Region'] != 'Hela riket']
befolkning = pd.read_excel('../../data/raw/befolkning.xlsx', skiprows=9)
befolkning = befolkning[['Hela riket', 10327589, 41.313715]]
befolkning.dropna(inplace=True)
befolkning.columns = ['Region', 'Befolkning', 'Medelålder']
befolkning = befolkning[befolkning['Region'] != 'Gotlands län']
befolkning.reset_index(inplace=True)
befolkning.drop('index', axis=1, inplace=True)
befolkning

Unnamed: 0,Region,Befolkning,Medelålder
0,Stockholms län,2377081.0,39.378654
1,Uppsala län,383713.0,40.103728
2,Södermanlands län,297540.0,42.370851
3,Östergötlands län,465495.0,41.404762
4,Jönköpings län,363599.0,41.432354
5,Kronobergs län,201469.0,41.480067
6,Kalmar län,245446.0,44.083072
7,Blekinge län,159606.0,43.358574
8,Skåne län,1377827.0,40.937559
9,Hallands län,333848.0,42.233528


### Change from län to region

In [131]:
lan_to_region = {'Stockholms län': 'Region Stockholm', 'Södermanlands län': 'Region Sörmland', 
                 'Östergötlands län': 'Region Östergötland', 'Jönköpings län': 'Region Jönköpings län',
                 'Kronobergs län': 'Region Kronoberg', 'Kalmar län': 'Region Kalmar län',
                 'Blekinge län': 'Region Blekinge', 'Skåne län': 'Region Skåne',
                 'Hallands län': 'Region Halland', 'Västra Götalands län': 'Västra Götalandsregionen',
                 'Värmlands län': 'Region Värmland', 'Örebro län': 'Region Örebro län', 
                 'Västmanlands län': 'Region Västmanland', 'Dalarnas län': 'Region Dalarna',
                 'Gävleborgs län': 'Region Gävleborg', 'Västernorrlands län': 'Region Västernorrland',
                 'Jämtlands län': 'Region Jämtland Härjedalen', 'Västerbottens län': 'Region Västerbotten',
                 'Norrbottens län': 'Region Norrbotten', 'Uppsala län': 'Region Uppsala'
                }

befolkning['Region'] = [lan_to_region[val] for val in befolkning['Region']]


In [132]:
df = df.merge(befolkning, on='Region', how='inner')

## Prediktera totala antalet inläggningar den 31 mars. Använd data fram till 28 mars.

In [144]:
# Skapa data per region
def create_dataframe_per_region(df, region):
    tmp_df = df[df['Region'] == region]
    #tmp_df.drop(['2020-04-01', '2020-03-31', '2020-03-30', '2020-03-29'], axis=1, inplace=True)
    dates = tmp_df.drop(['Region', 'Befolkning', 'Medelålder'], axis=1).columns
    values = tmp_df.drop(['Region', 'Befolkning', 'Medelålder'], axis=1).iloc[0].values
    befolkning = tmp_df['Befolkning'].values[0]
    mean_age = tmp_df['Medelålder'].values[0]
    tmp_df = pd.DataFrame({'date': dates, 'iva': values})
    tmp_df['befolkning'] = befolkning 
    tmp_df['mean_age'] =  mean_age
    tmp_df = add_change_coming_x_days(tmp_df, 3)
    tmp_df = add_change_since_x_days_ago(tmp_df, 1)
    tmp_df = add_change_since_x_days_ago(tmp_df, 3)
    tmp_df = add_change_since_x_days_ago(tmp_df, 7)
    return tmp_df

In [145]:
def add_change_since_x_days_ago(df, x):
    change = []
    for i in range(len(df)):
        if i < x:
            change.append(None)
        else:
            change.append(df['iva'].iloc[i]/df['iva'].iloc[i-x])
    df['change_since_'+str(x)+'_days'] = change
    return df


def add_change_coming_x_days(df, x):
    change = []
    for i in range(len(df)):
        if len(df) - i - 1 < x:
            change.append(None)
        else:
            change.append(df['iva'].iloc[i+x]/df['iva'].iloc[i])
    df['change_coming_'+str(x)+'_days'] = change
    return df

In [147]:
# Create dataframe with all regions

regions = list(df['Region'].unique())

for i, region in enumerate(regions):
    if i == 0:
        data = create_dataframe_per_region(df, region)
        
    else:
        tmp_data = create_dataframe_per_region(df, region)
        data = pd.concat([data, tmp_data])
        
data
    

  import sys
  import sys


Unnamed: 0,date,iva,befolkning,mean_age,change_coming_3_days,change_since_1_days,change_since_3_days,change_since_7_days
0,2020-03-06,,159606.0,43.358574,,,,
1,2020-03-07,,159606.0,43.358574,,,,
2,2020-03-08,0.0,159606.0,43.358574,,,,
3,2020-03-09,0.0,159606.0,43.358574,,,,
4,2020-03-10,0.0,159606.0,43.358574,,,,
...,...,...,...,...,...,...,...,...
22,2020-03-28,49.0,1725881.0,41.079673,1.428571,1.113636,1.484848,2.722222
23,2020-03-29,60.0,1725881.0,41.079673,1.166667,1.224490,1.500000,2.857143
24,2020-03-30,65.0,1725881.0,41.079673,,1.083333,1.477273,2.500000
25,2020-03-31,70.0,1725881.0,41.079673,,1.076923,1.428571,2.333333


In [150]:
data['iva_per_1000'] = data['iva']/data['befolkning']*1000
data

Unnamed: 0,date,iva,befolkning,mean_age,change_coming_3_days,change_since_1_days,change_since_3_days,change_since_7_days,iva_per_1000
0,2020-03-06,,159606.0,43.358574,,,,,
1,2020-03-07,,159606.0,43.358574,,,,,
2,2020-03-08,0.0,159606.0,43.358574,,,,,0.000000
3,2020-03-09,0.0,159606.0,43.358574,,,,,0.000000
4,2020-03-10,0.0,159606.0,43.358574,,,,,0.000000
...,...,...,...,...,...,...,...,...,...
22,2020-03-28,49.0,1725881.0,41.079673,1.428571,1.113636,1.484848,2.722222,0.028391
23,2020-03-29,60.0,1725881.0,41.079673,1.166667,1.224490,1.500000,2.857143,0.034765
24,2020-03-30,65.0,1725881.0,41.079673,,1.083333,1.477273,2.500000,0.037662
25,2020-03-31,70.0,1725881.0,41.079673,,1.076923,1.428571,2.333333,0.040559


In [152]:
from sklearn.linear_models import LinearRegression

ModuleNotFoundError: No module named 'sklearn.linear_models'