In [1]:
import pandas as pd
import math
import numpy as np
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, FactorRange, Legend, HoverTool, Label
from bokeh.transform import dodge
import bokeh.palettes
output_notebook()
# Standard plotly imports
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
import os
from sklearn.linear_model import LinearRegression

In [2]:
path = '/Users/tuethomsen28/Google Drev/SocialDataVizz/Data'

focus_municipality = ["Albertslund","Ballerup","Brøndby","Dragør","Frederiksberg","Gentofte","Gladsaxe",
                           "Glostrup","Herlev","Hvidovre","Høje-Taastrup","Ishøj","København",
                           "Rødovre","Tårnby","Vallensbæk"]

df_speed_feb = pd.read_csv(os.path.join(path,'Fremkommelighedsanalyse_med_fraktil_og_potentialeHast_Feb.csv'),sep=";",
                 header= 2,index_col=False,decimal=",")

df_speed_mar_apr = pd.read_csv(os.path.join(path,'Fremkommelighedsanalyse_med_fraktil_og_potentiale_Hast_MarApr.csv'),sep=";",
                 header= 2,index_col=False,decimal=",")

df_passenger = pd.read_csv(os.path.join(path,"passenger_decrease.csv"))

df_age_distribution = pd.read_excel(os.path.join(path,'Data_aldersfordeling.xlsx'))

df_unemployment = pd.read_excel(os.path.join(path,'Data_fuldtidsledige.xlsx'))
df_unemployment = df_unemployment[df_unemployment.Municipality.isin(focus_municipality)].set_index("Municipality").loc[focus_municipality]

df_income = pd.read_excel(os.path.join(path,'Income2.xlsx'))
df_income.columns = ["Kommune","Income"]
df_income = df_income.set_index("Kommune")
df_income = df_income.loc[focus_municipality]

df_covid = pd.read_csv(os.path.join(path,"Corona_hosp_1603_2204.csv"),index_col=0)
df_covid["Total"] = df_covid.sum(1)
df_covid = df_covid.rename(columns={"Taarnby":"Tårnby"})

df_mean_hosp = pd.read_csv(os.path.join(path,"mean_hosp_per_mun.csv"),index_col=0)
df_mean_hosp = df_mean_hosp.rename(index={"Taarnby":"Tårnby"})

df_passenger = pd.read_csv(os.path.join(path,"passenger_decrease.csv"),index_col=0)


In [3]:
# extract the data for average speed for the covid periode
cols = ['StopPointSectionMunicipalityDisplayName','Hastighed_observeret__netto__km_t_','Hastighed_observeret__brutto__km_t_']
df_speed_mar_apr_avg_hast = df_speed_mar_apr[df_speed_mar_apr['StopPointSectionMunicipalityDisplayName'].isin(focus_municipality)][cols]

df_speed_mar_apr_avg_hast = df_speed_mar_apr_avg_hast.groupby('StopPointSectionMunicipalityDisplayName', as_index=False).agg(['sum', 'count'])

df_speed_mar_apr_avg_hast['avg_speed_netto'] = df_speed_mar_apr_avg_hast['Hastighed_observeret__netto__km_t_']['sum'].values/df_speed_mar_apr_avg_hast['Hastighed_observeret__netto__km_t_']['count'].values
df_speed_mar_apr_avg_hast['avg_speed_brutto'] = df_speed_mar_apr_avg_hast['Hastighed_observeret__brutto__km_t_']['sum'].values/df_speed_mar_apr_avg_hast['Hastighed_observeret__brutto__km_t_']['count'].values

df_speed_mar_apr_avg_hast = df_speed_mar_apr_avg_hast[['avg_speed_netto','avg_speed_brutto']]

# extract the data for average speed febuar (prior to covid)
df_speed_feb_avg_hast = df_speed_feb[df_speed_feb['StopPointSectionMunicipalityDisplayName'].isin(focus_municipality)][cols]

df_speed_feb_avg_hast = df_speed_feb_avg_hast.groupby('StopPointSectionMunicipalityDisplayName', as_index=False).agg(['sum', 'count'])

df_speed_feb_avg_hast['avg_speed_netto'] = df_speed_feb_avg_hast['Hastighed_observeret__netto__km_t_']['sum'].values/df_speed_feb_avg_hast['Hastighed_observeret__netto__km_t_']['count'].values
df_speed_feb_avg_hast['avg_speed_brutto'] = df_speed_feb_avg_hast['Hastighed_observeret__brutto__km_t_']['sum'].values/df_speed_feb_avg_hast['Hastighed_observeret__brutto__km_t_']['count'].values

df_speed_feb_avg_hast = df_speed_feb_avg_hast[['avg_speed_netto','avg_speed_brutto']]

# extract average length of stopping time for covid periode
cols = ['StopPointSectionMunicipalityDisplayName','Opholdstid_observeret_ved_stop']
df_speed_mar_apr_stop_t = df_speed_mar_apr[df_speed_mar_apr['StopPointSectionMunicipalityDisplayName'].isin(focus_municipality)][cols]

df_speed_mar_apr_stop_t = df_speed_mar_apr_stop_t.groupby('StopPointSectionMunicipalityDisplayName', as_index=False).agg(['sum', 'count'])

df_speed_mar_apr_stop_t['avg_stop_t'] = df_speed_mar_apr_stop_t['Opholdstid_observeret_ved_stop']['sum'].values/df_speed_mar_apr_stop_t['Opholdstid_observeret_ved_stop']['count'].values

# extract average length of stopping time for febuar (prior to covid)
df_speed_feb_stop_t = df_speed_feb[df_speed_feb['StopPointSectionMunicipalityDisplayName'].isin(focus_municipality)][cols]

df_speed_feb_stop_t = df_speed_feb_stop_t.groupby('StopPointSectionMunicipalityDisplayName', as_index=False).agg(['sum', 'count'])

df_speed_feb_stop_t['avg_stop_t'] = df_speed_feb_stop_t['Opholdstid_observeret_ved_stop']['sum'].values/df_speed_feb_stop_t['Opholdstid_observeret_ved_stop']['count'].values

# joining avarage speed for febuar and covid periode
df_speed_feb_avg_hast.columns = ['avg_speed_netto_feb', 'avg_speed_brutto_feb']
df_speed_mar_apr_avg_hast.columns = ['avg_speed_netto_mar_apr', 'avg_speed_brutto_mar_apr']
df_avg_hast = df_speed_feb_avg_hast.merge(df_speed_mar_apr_avg_hast, how = 'inner',left_index = True, right_index = True)
df_avg_hast = df_avg_hast[['avg_speed_brutto_feb','avg_speed_brutto_mar_apr']]
df_avg_hast.reset_index(inplace=True)

def ageInt(x):
    return (int(x.split()[0]))

df_age_distribution['Age'] = df_age_distribution['Age'].apply(ageInt)

df_age_distribution = df_age_distribution[df_age_distribution['Municipality'].isin(focus_municipality)]
df_age_distribution = df_age_distribution.sort_values(['Municipality','Age'])
focus_municipality.sort()

median_ages = []
for m in focus_municipality:
    df_tmp = df_age_distribution[df_age_distribution['Municipality']== m]
    median_num = np.round(sum(df_tmp['Number'].values)/2)
    count = 0
    for a in np.unique(df_tmp['Age']):
        count += df_tmp.iloc[a]['Number']
        if count >= median_num:
            median_ages.append(a)
            break

data = {'Municipality': focus_municipality,
        'Median age': median_ages
        }

df_median_age = pd.DataFrame(data, columns = ['Municipality', 'Median age'])



# Plotting of covid-19 vs. location

In [4]:
#################################################################################################################################################
#################################################################################################################################################
#################### Jitter plot to spot correlation between covid-19 and distance from center of Copenhagen. #############################
#################################################################################################################################################
#################################################################################################################################################

####################################################### set up data frame for the plot ######################################################
# coordinates for city halls
coor_copehagen = [55.675579, 12.570196]
coor_gentofte = [55.749205, 12.557447]
coor_Dragør = [55.596778, 12.637316]
coor_Frederiksberg = [55.678651, 12.532559]
coor_HøjeTaastrup = [55.652174,12.264740]
coor_Tårnby = [55.634091, 12.619329]
coor_Gladsaxe = [55.743183, 12.497172]
coor_Herlev = [55.725144, 12.432609]
coor_Ballerup = [55.734340, 12.364389]
coor_Glostrup = [55.665082, 12.394101]
coor_Hvidovre = [55.643127, 12.477458]
coor_Rødovre = [55.682674, 12.451599]
coor_Brøndby = [55.652989, 12.419191]
coor_Albertslund = [55.657088, 12.357865]
coor_Ishøj = [55.614225, 12.355672]
coor_Vallensbæk = [55.624284,12.386697]

# create a dict with coordinates
coor_town_halls = {'Albertslund':coor_Albertslund,
                  'Ballerup':coor_Ballerup,
                  'Brøndby': coor_Brøndby,
                  'Dragør': coor_Dragør,
                  'Frederiksberg': coor_Frederiksberg,
                  'Gentofte': coor_gentofte,
                  'Gladsaxe': coor_Gladsaxe,
                  'Glostrup': coor_Glostrup,
                  'Herlev': coor_Herlev,
                  'Hvidovre': coor_Hvidovre,
                  'Høje-Taastrup': coor_HøjeTaastrup,
                  'Ishøj': coor_Ishøj,
                  'København': coor_copehagen,
                  'Rødovre': coor_Rødovre,
                  'Tårnby':coor_Tårnby,
                  'Vallensbæk':coor_Vallensbæk}

# sort df and list of municipality to make the addition of the new col easy
df_hast_dist = df_avg_hast.sort_values('StopPointSectionMunicipalityDisplayName')
focus_municipality.sort()

# calculate the distance between copenhagen city hall and the other city halls
# list to store distances
distances = []

# radium of the earth
R = 6373.0

# coordinates for copenhagen city hall 
lat_center, lon_center = coor_town_halls['København']

# convert to radians:
lat_center = math.radians(lat_center)
lon_center = math.radians(lon_center)

for m in focus_municipality:
  # get coordinates for curent Municipality
    lat, lon = coor_town_halls[m]
  
  # convert to radians
    lat = math.radians(lat)
    lon = math.radians(lon)

  # difference i the coordinates
    dlon = lon - lon_center
    
    dlat = lat - lat_center
    
    a = math.sin(dlat / 2)**2 + math.cos(lat_center) *math.cos(lat) * math.sin(dlon / 2)**2
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    d = R*c
    
    distances.append(d)

# prepare the data frame
df_covid_dist = pd.DataFrame(index=focus_municipality)
df_covid_dist['distance'] = distances
df_covid_dist["covid"] = df_mean_hosp["mean_hosp"].to_list()
df_covid_dist = df_covid_dist[['covid','distance']]
df_covid_dist = df_covid_dist.reset_index().rename(columns={"index":"Kommune"})

#### Linear regression ###########################################################################################
X = df_covid_dist.distance.to_numpy().reshape(-1,1)
Y = df_covid_dist.covid.to_numpy().reshape(-1,1)

linear_regressor = LinearRegression()

linear_regressor.fit(X, Y)  # perform linear regression
Y_pred = linear_regressor.predict(X)

Y_pred = list(Y_pred.squeeze())

df_covid_dist["prediction"] = Y_pred


############################################ create the plot ##################################################################################
# define source
source = ColumnDataSource(data = df_covid_dist)
# define tooltip
TOOLTIPS = [("Municipality: ", "@Kommune"),
            ("Distance", "@distance{0.1f} km"),
            ("Average hospitalisations per 100.000: ","@covid")]

# create figure
f = figure(title = 'Distances to city center vs covid-19',
           x_axis_label='Distance in km',
           y_axis_label='Covid-19 hospitalisations per 100.000',      
           background_fill_color = "beige",background_fill_alpha = 0.8, 
           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS)

## Apply R^2 label ##############################################################################################

cor = df_covid_dist[["distance","covid"]].corr()

R_sqrt = round(cor.iloc[0,1]**2,3)


R_sqrt_Label = Label(x=0, y=495, x_units='screen', y_units='screen',
                 text=f'  R^2 value: {R_sqrt}  ', render_mode='canvas',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0,
                 text_font_size='14pt',text_font="times")
f.add_layout(R_sqrt_Label)


## plotting #######################################################################################################

# center title
f.title.align = 'center'
# set title font size
f.title.text_font_size = '20px'
# set title font type
f.title.text_font = 'times'
# set axes labels font size
f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
f.axis.axis_label_text_font = 'times'

# Create plot
f.circle(x = 'distance', y = 'covid', size=10,source=source)
#Create linear regression line
f.line(x="distance", y = "prediction",source=source,color="red")
# show plot
show(f)

In [5]:
############ prepare data ##############################################################################################
covid_mean = df_mean_hosp.mean_hosp.to_list()
df_median_age_covid = df_median_age.copy()
df_median_age_covid['covid'] = covid_mean
df_median_age_covid.columns = ['Municipality','Median_age','mean_hosp']

#### Linear regression ###########################################################################################
X = df_median_age_covid.Median_age.to_numpy().reshape(-1,1)
Y = df_median_age_covid.mean_hosp.to_numpy().reshape(-1,1)

linear_regressor = LinearRegression()

linear_regressor.fit(X, Y)  # perform linear regression
Y_pred = linear_regressor.predict(X)

Y_pred = list(Y_pred.squeeze())

df_median_age_covid["prediction"] = Y_pred


########### plot #########################################################################################################
source = ColumnDataSource(data = df_median_age_covid)

TOOLTIPS = [("Municipality: ", "@Municipality"),
            ("Median age", "@Median_age"),
            ("Average hospitalisations per 100.000: ","@mean_hosp")]
# create figure
f = figure(title = 'Median age vs covid-19 hospitalisations',
           x_axis_label='Median age',
           y_axis_label='covid 19 hospitalisations per 100.000',      
           background_fill_color = "beige",background_fill_alpha = 0.8, 
           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS)

## Apply R^2 label ##############################################################################################

cor = df_median_age_covid[["Median_age","mean_hosp"]].corr()

R_sqrt = round(cor.iloc[0,1]**2,3)


R_sqrt_Label = Label(x=0, y=495, x_units='screen', y_units='screen',
                 text=f'  R^2 value: {R_sqrt}  ', render_mode='canvas',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0,
                 text_font_size='14pt',text_font="times")
f.add_layout(R_sqrt_Label)

# center title
f.title.align = 'center'
# set title font size
f.title.text_font_size = '20px'
# set title font type
f.title.text_font = 'times'
# set axes labels font size
f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
f.axis.axis_label_text_font = 'times'

# Create plot
f.circle(x = 'Median_age', y = 'mean_hosp', size=10,source=source)
#Create linear regression line
f.line(x="Median_age", y = "prediction",source=source,color="red")
# show plot
show(f)

In [6]:
############ prepare data ##############################################################################################

pct_over_65 = []

for m in focus_municipality:
    df_temp = df_age_distribution[df_age_distribution.Municipality.isin([m])]
    ages = [int(age) for age in df_temp.Age]
    df_temp["Age"] = ages
    
    pct_over_65 += [round(df_temp[df_temp.Age >= 65].Number.sum()/df_temp.Number.sum(),3)*100]
    
df_over_65 = pd.DataFrame({"Kommune":focus_municipality,
                          "pct_over_65":pct_over_65})

covid_mean = df_mean_hosp.mean_hosp.to_list()
df_over_65_covid = df_over_65.copy()
df_over_65_covid['covid'] = covid_mean
df_over_65_covid.columns = ['Kommune','Over_65','mean_hosp']

#### Linear regression ###########################################################################################
X = df_over_65_covid.Over_65.to_numpy().reshape(-1,1)
Y = df_over_65_covid.mean_hosp.to_numpy().reshape(-1,1)

linear_regressor = LinearRegression()

linear_regressor.fit(X, Y)  # perform linear regression
Y_pred = linear_regressor.predict(X)

Y_pred = list(Y_pred.squeeze())

df_over_65_covid["prediction"] = Y_pred


######### Create figure #########################################################################################################
source = ColumnDataSource(data = df_over_65_covid)

TOOLTIPS = [("Municipality: ", "@Kommune"),
            ("Over 65 years", "@Over_65{0.2f} %"),
            ("Average hospitalisations per 100.000: ","@mean_hosp")]
# create figure
f = figure(title = 'Median age vs covid-19 hospitalisations per 100.000',
           x_axis_label='% over 65',
           y_axis_label='covid 19 hospitalisations per 100.000',      
           background_fill_color = "beige",background_fill_alpha = 0.8, 
           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS)


## Apply R^2 label ##############################################################################################

cor = df_over_65_covid[["Over_65","mean_hosp"]].corr()

R_sqrt = round(cor.iloc[0,1]**2,3)


R_sqrt_Label = Label(x=0, y=495, x_units='screen', y_units='screen',
                 text=f'  R^2 value: {R_sqrt}  ', render_mode='canvas',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0,
                 text_font_size='14pt',text_font="times")
f.add_layout(R_sqrt_Label)

# center title
f.title.align = 'center'
# set title font size
f.title.text_font_size = '20px'
# set title font type
f.title.text_font = 'times'
# set axes labels font size
f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
f.axis.axis_label_text_font = 'times'

# Create plot
f.circle(x = 'Over_65', y = 'mean_hosp', size=10,source=source)
#Create linear regression line
f.line(x="Over_65", y = "prediction",source=source,color="red")

# show plot
show(f)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Covid-19 vs. Disposable income

In [20]:
############ prepare data ##############################################################################################
covid_mean = df_mean_hosp.mean_hosp.to_list()
df_income_covid = df_income.copy()
df_income_covid['covid'] = covid_mean
df_income_covid = df_income_covid.reset_index()
df_income_covid.columns = ['Kommune','Income','mean_hosp']


#### Linear regression ###########################################################################################

X = df_income_covid.Income.to_numpy().reshape(-1,1)

Y = df_income_covid.mean_hosp.to_numpy().reshape(-1,1)
#transform Y to fit linear regression 
Y = np.log(Y)

linear_regressor = LinearRegression()

linear_regressor.fit(X, Y)  # perform linear regression
Y_pred = linear_regressor.predict(X)

#Transform Y back
Y_pred = np.exp(list(Y_pred.squeeze()))


df_income_covid["prediction"] = Y_pred




########### Create figure #########################################################################################################
source = ColumnDataSource(data = df_income_covid)

TOOLTIPS = [("Municipality: ", "@Kommune"),
            ("Disposable", "@Income"),
            ("Average hospitalisations per 100.000: ","@mean_hosp")]

# create figure
f = figure(title = 'Disposable Income vs covid-19 hospitalisations per 100.000',
           x_axis_label='Disposable Income',
           y_axis_label='Log covid 19 hospitalisations per 100.000',
           background_fill_color = "beige",background_fill_alpha = 0.8, 
           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
           y_axis_type="log"
          )

## Apply R^2 label ##############################################################################################

df_income_covid["log_mean_hosp"] = np.log(df_income_covid["mean_hosp"])

cor = df_income_covid[["Income","log_mean_hosp"]].corr()

R_sqrt = round(cor.iloc[0,1]**2,3)


R_sqrt_Label = Label(x=0, y=495, x_units='screen', y_units='screen',
                 text=f'  R^2 value: {R_sqrt}  ', render_mode='canvas',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0,
                 text_font_size='14pt',text_font="times")

f.add_layout(R_sqrt_Label)

# center title
f.title.align = 'center'
# set title font size
f.title.text_font_size = '20px'
# set title font type
f.title.text_font = 'times'
# set axes labels font size
f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
f.axis.axis_label_text_font = 'times'

# Create plot
f.circle(x = 'Income', y = 'mean_hosp', size=10,source=source)
#Create linear regression line
f.line(x="Income", y = "prediction",source=source,color="red")
# show plot
show(f)

Another method: https://kite.com/python/answers/how-to-do-exponential-and-logarithmic-curve-fitting-in-python

# Covid-19 vs. Unemployment

In [None]:
############ prepare data ##############################################################################################
people_per_mun = pd.read_csv(os.path.join(path,"aldersfordeling_total.csv"),index_col=0)

df_pct_unemployed = df_unemployment["2020M02"] / people_per_mun.Indbyggertal

covid_mean = df_mean_hosp.mean_hosp.to_list()
df_unemployment_covid = pd.DataFrame(df_pct_unemployed*100)
df_unemployment_covid['covid'] = covid_mean
df_unemployment_covid = df_unemployment_covid.reset_index()
df_unemployment_covid.columns = ['Kommune','Unemployment','mean_hosp']


#### Linear regression ###########################################################################################
X = df_unemployment_covid.Unemployment.to_numpy().reshape(-1,1)
Y = df_unemployment_covid.mean_hosp.to_numpy().reshape(-1,1)

linear_regressor = LinearRegression()

linear_regressor.fit(X, Y)  # perform linear regression
Y_pred = linear_regressor.predict(X)

Y_pred = list(Y_pred.squeeze())

df_unemployment_covid["predicted_hosp"] = Y_pred


########### Create figure #########################################################################################################
source = ColumnDataSource(data = df_unemployment_covid)

TOOLTIPS = [("Municipality: ", "@Kommune"),
            ("Unemployment rate", "@Unemployment{0.2f}%"),
            ("Average hospitalisations per 100.000: ","@mean_hosp")]

# create figure
f = figure(title = '% unempoyment vs covid-19 hospitalisations per 100.000',
           x_axis_label='Unemployment rate (%)',
           y_axis_label='Covid 19 hospitalisations per 100.000',
           background_fill_color = "beige",background_fill_alpha = 0.8, 
           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
           #y_axis_type="log"
          )


## Apply R^2 label ##############################################################################################

cor = df_unemployment_covid[["Unemployment","mean_hosp"]].corr()

R_sqrt = round(cor.iloc[0,1]**2,3)


R_sqrt_Label = Label(x=0, y=495, x_units='screen', y_units='screen',
                 text=f'  R^2 value: {R_sqrt}  ', render_mode='canvas',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0,
                 text_font_size='14pt',text_font="times")
f.add_layout(R_sqrt_Label)

## Plot ####################################################################################################

# center title
f.title.align = 'center'
# set title font size
f.title.text_font_size = '20px'
# set title font type
f.title.text_font = 'times'
# set axes labels font size
f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
f.axis.axis_label_text_font = 'times'

# Create plot
f.circle(x = 'Unemployment', y = 'mean_hosp', size=10,source=source)

#Create linear regression line
f.line(x="Unemployment", y = "predicted_hosp",source=source,color="red")

# show plot
show(f)

# Covid-19 vs. Speed increase

In [None]:
############ prepare data ##############################################################################################
df_avg_hast["pct increase"] = (df_avg_hast["avg_speed_brutto_mar_apr"] - df_avg_hast["avg_speed_brutto_feb"])/df_avg_hast["avg_speed_brutto_feb"]

df_temp = df_avg_hast.set_index("StopPointSectionMunicipalityDisplayName")

df_covid_max = df_covid[df_covid.index < "2020-04-01"].max()

df_covid_avg_after = df_covid[df_covid.index > "2020-04-01"].mean(0)

df_covid_decrease = pd.DataFrame((df_covid_max - df_covid_avg_after) / df_covid_max,columns=["covid_decrease"])

df_covid_decrease["speed_increase"] = df_temp["pct increase"]

df_covid_decrease = df_covid_decrease.iloc[:-1]

df_covid_decrease = df_covid_decrease.reset_index()

df_covid_decrease.columns = ['Kommune','Covid_decrease','Speed_increase']

#### Linear regression ###########################################################################################
X = df_covid_decrease.Speed_increase.to_numpy().reshape(-1,1)
Y = df_covid_decrease.Covid_decrease.to_numpy().reshape(-1,1)

linear_regressor = LinearRegression()

linear_regressor.fit(X, Y)  # perform linear regression
Y_pred = linear_regressor.predict(X)

Y_pred = list(Y_pred.squeeze())

df_covid_decrease["predicted_decrease"] = Y_pred


########### Create figure #########################################################################################################
source = ColumnDataSource(data = df_covid_decrease)

TOOLTIPS = [("Municipality: ", "@Kommune"),
            ("Increase in speed", "@Speed_increase{%0.2f}"),
            ("Decrease in hospilitations: ","@Covid_decrease{%0.2f}")]

# create figure
f = figure(title = '% increase in speed vs. % decrease in hospitalisations',
           x_axis_label='Increase in speed (%)',
           y_axis_label='Decrease in hospitalisations (%)',
           background_fill_color = "beige",background_fill_alpha = 0.8, 
           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
           #y_axis_type="log"
          )

## Apply R^2 label ##############################################################################################

cor = df_covid_decrease[["Speed_increase","Covid_decrease"]].corr()

R_sqrt = round(cor.iloc[0,1]**2,3)


R_sqrt_Label = Label(x=0, y=495, x_units='screen', y_units='screen',
                 text=f'  R^2 value: {R_sqrt}  ', render_mode='canvas',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0,
                 text_font_size='14pt',text_font="times")
f.add_layout(R_sqrt_Label)


## Create style and plot circles and line ###################################################################################

# center title
f.title.align = 'center'
# set title font size
f.title.text_font_size = '20px'
# set title font type
f.title.text_font = 'times'
# set axes labels font size
f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
f.axis.axis_label_text_font = 'times'

# Create circle
f.circle(x = 'Speed_increase', y = 'Covid_decrease', size=10,source=source)

#Create linear regression line
f.line(x="Speed_increase", y = "predicted_decrease",source=source,color="red")

# show plot
show(f)

# Covid-19 vs. passenger decrease

In [None]:
############ prepare data ##############################################################################################
df_covid_passenger = df_passenger/100

df_temp = df_covid_decrease.copy().set_index("Kommune")["Covid_decrease"]

df_covid_passenger["covid_decrease"] = df_temp

df_covid_passenger = df_covid_passenger.reset_index()

#### Linear regression ###########################################################################################
X = df_covid_passenger.per_2020.to_numpy().reshape(-1,1)
Y = df_covid_passenger.covid_decrease.to_numpy().reshape(-1,1)

linear_regressor = LinearRegression()

linear_regressor.fit(X, Y)  # perform linear regression
Y_pred = linear_regressor.predict(X)

Y_pred = list(Y_pred.squeeze())

df_covid_passenger["predicted_decrease"] = Y_pred



########### Create figure #########################################################################################################
source = ColumnDataSource(data = df_covid_passenger)

TOOLTIPS = [("Municipality: ", "@Kommune"),
            ("Decrease in passengers", "@per_mar{%0.2f}"),
            ("Decrease in hospilitations: ","@covid_decrease{%0.2f}")]

# create figure
f = figure(title = '% decrease in passengers vs. % decrease in hospitalisations',
           x_axis_label='Decrease in passengers (%)',
           y_axis_label='Decrease in hospitalisations (%)',
           background_fill_color = "beige",background_fill_alpha = 0.8, 
           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
           #y_axis_type="log"
          )

## Apply R^2 label ##############################################################################################

cor = df_covid_passenger[["per_2020","covid_decrease"]].corr()

R_sqrt = round(cor.iloc[0,1]**2,3)


R_sqrt_Label = Label(x=50, y=495, x_units='screen', y_units='screen',
                 text=f'  R^2 value: {R_sqrt}  ', render_mode='canvas',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0,
                 text_font_size='14pt',text_font="times")

f.add_layout(R_sqrt_Label)

## plot #########################################################################################################

# center title
f.title.align = 'center'
# set title font size
f.title.text_font_size = '20px'
# set title font type
f.title.text_font = 'times'
# set axes labels font size
f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
f.axis.axis_label_text_font = 'times'

# Create plot
f.circle(x = 'per_2020', y = 'covid_decrease', size=10,source=source)

#Create linear regression line
f.line(x="per_2020", y = "predicted_decrease",source=source,color="red")


# show plot
show(f)

In [9]:
df_covid_passenger[["per_mar","covid_decrease"]].corr()**2

NameError: name 'df_covid_passenger' is not defined

In [10]:
df_covid_decrease[["Speed_increase","Covid_decrease"]].corr()**2

NameError: name 'df_covid_decrease' is not defined

In [11]:
df_unemployment_covid.corr()**2

NameError: name 'df_unemployment_covid' is not defined

In [14]:
df_income_covid["mean_hosp"] = np.log(df_income_covid["mean_hosp"])

df_income_covid.corr()**2

Unnamed: 0,Income,mean_hosp,prediction,log_mean_hosp
Income,1.0,0.722956,0.961663,0.384577
mean_hosp,0.722956,1.0,0.579876,0.733754
prediction,0.961663,0.579876,1.0,0.332305
log_mean_hosp,0.384577,0.733754,0.332305,1.0


In [None]:
R_sqrt

In [None]:
############ prepare data ##############################################################################################
df_avg_hast["pct increase"] = (df_avg_hast["avg_speed_brutto_mar_apr"] - df_avg_hast["avg_speed_brutto_feb"])/df_avg_hast["avg_speed_brutto_feb"]

df_temp = df_avg_hast.set_index("StopPointSectionMunicipalityDisplayName")

df_covid_max = df_covid[df_covid.index < "2020-04-01"].max()

df_covid_avg_after = df_covid[df_covid.index > "2020-04-01"].mean(0)

df_covid_decrease = pd.DataFrame((df_covid_max - df_covid_avg_after) / df_covid_max,columns=["covid_decrease"])

df_covid_decrease["speed_increase"] = df_temp["pct increase"]

df_covid_decrease = df_covid_decrease.iloc[:-1]

df_covid_decrease = df_covid_decrease.reset_index()

df_covid_decrease.columns = ['Kommune','Covid_decrease','Speed_increase']

#### Linear regression ###########################################################################################
X = df_covid_decrease.Speed_increase.to_numpy().reshape(-1,1)
Y = df_covid_decrease.Covid_decrease.to_numpy().reshape(-1,1)

linear_regressor = LinearRegression()

linear_regressor.fit(X, Y)  # perform linear regression
Y_pred = linear_regressor.predict(X)

Y_pred = list(Y_pred.squeeze())

df_covid_decrease["predicted_decrease"] = Y_pred

df_covid_decrease


########### plot #########################################################################################################
source = ColumnDataSource(data = df_covid_decrease)

TOOLTIPS = [("Municipality: ", "@Kommune"),
            ("Increase in speed", "@Speed_increase{%0.2f}"),
            ("Decrease in hospilitations: ","@Covid_decrease{%0.2f}")]

# create figure
f = figure(title = '% increase in speed vs. % decrease in hospitalisations',
           x_axis_label='Increase in speed (%)',
           y_axis_label='Decrease in hospitalisations (%)',
           background_fill_color = "beige",background_fill_alpha = 0.8, 
           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
           #y_axis_type="log"
          )

## Apply R^2 label ##############################################################################################

cor = df_covid_decrease[["Speed_increase","Covid_decrease"]].corr()

R_sqrt = round(cor.iloc[0,1]**2,2)


R_sqrt_Label = Label(x=0, y=495, x_units='screen', y_units='screen',
                 text=f'  R^2 value: {R_sqrt}  ', render_mode='canvas',
                 border_line_color='black', border_line_alpha=1.0,
                 background_fill_color='white', background_fill_alpha=1.0,
                 text_font_size='14pt',text_font="times")
f.add_layout(R_sqrt_Label)

# center title
f.title.align = 'center'
# set title font size
f.title.text_font_size = '20px'
# set title font type
f.title.text_font = 'times'
# set axes labels font size
f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
f.axis.axis_label_text_font = 'times'

# Create circle
f.circle(x = 'Speed_increase', y = 'Covid_decrease', size=10,source=source)

#Create linear regression line
f.line(x="Speed_increase", y = "predicted_decrease",source=source,color="red")

# show plot
show(f)

In [None]:
############ prepare data ##############################################################################################

#df_covid_decrease = pd.DataFrame((df_covid_max - df_covid_avg_after) / df_covid_max,columns=["covid_decrease"])

#df_covid_decrease = df_covid_decrease.iloc[:-1]

#df_covid_decrease["Population"] = popu

#df_covid_decrease = df_covid_decrease.reset_index()

#df_covid_decrease.columns = ['Kommune','Covid_decrease','Population']


########### plot #########################################################################################################
#source = ColumnDataSource(data = df_covid_decrease)

#TOOLTIPS = [("Municipality: ", "@Kommune"),
#            ("Increase in speed", "@Population"),
#            ("Decrease in hospilitations: ","@Covid_decrease{%0.2f}")]

# create figure
#f = figure(title = '% increase in speed vs. % decrease in hospitalisations',
#           x_axis_label='Decrease in hospitalisations (%)',
#           y_axis_label='Increase in speed (%)',
#           background_fill_color = "beige",background_fill_alpha = 0.8, 
#           border_fill_color = "beige", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
#           x_axis_type="log"
#          )

# center title
#f.title.align = 'center'
# set title font size
#f.title.text_font_size = '20px'
# set title font type
#f.title.text_font = 'times'
# set axes labels font size
#f.axis.axis_label_text_font_size = '16px'
# set axes labels font type
#f.axis.axis_label_text_font = 'times'

# Create plot
#f.circle(x = 'Population', y = 'Covid_decrease', size=10,source=source)
# show plot
#show(f)