# Statistics plotted on a map of municipalities in Norway
This notebook is a first attempt to display municipality statistics as a color coded map.
At the moment it shows COVID-19 cases for 2021 for the area around Eidskog.

## Installing requirements
If you are on Windows and not using Anaconda, you need to run the following commands before installing packages from
requirements.txt:
```shell
pip install wheel pipwin
pipwin install gdal
pipwin install fiona
```
Install dependencies from requirements.txt:
```shell
pip install -r requirements.txt
```

In [None]:
# Imports and common functions
import ast
import json

import geopandas as gpd
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, HoverTool
from bokeh.palettes import brewer
from bokeh.plotting import figure

%matplotlib inline


def get_municipality(municipality_string):
    res = ast.literal_eval(municipality_string)
    return res[0]['navn']

## Get municipality borders

The geometry of the Norwegian municipalities in GeoJSON format can be found on
[this blog](https://www.eriksmistad.no/norges-fylker-og-kommuner-i-geojson-format/).
The source is [GeoNorge](https://www.geonorge.no/), but it is using the EPSG:3857-projection (instead of UTM zone 33),
and has a reduced number of coordinates.

In [None]:
#df_geo = gpd.read_file('kommuner_komprimert.json')[['kommunenummer', 'navn', 'geometry']]
df_geo = gpd.read_file('Kommuner-large.json')

# Convert to UTM Zone 33 North
df_geo = df_geo.to_crs('epsg:3395')

# Extract the municipality name and use it as a column name
#df_geo['navn'] = df_geo['navn'].apply(lambda x: get_municipality(x))
df_geo.rename(columns = {'navn': 'kommune_geo'}, inplace = True)
df_geo.tail()

#df_geo.dropna(inplace=True)
#df_geo.info()
#df_geo[df_geo['geometry'].isnull()]

In [None]:
df_geo.plot()

eidskog_area = [
    'Eidskog',
    'Kongsvinger',
    'Grue',
    'Aurskog-Høland',
    'Sør-Odal',
    'Nord-Odal',
    'Nes'
]

filtered_df_geo = df_geo[df_geo.kommune_geo.isin(eidskog_area)]
#filtered_df_geo = df_geo
filtered_df_geo.head()
filtered_df_geo.plot()

# Plot Eidskog only
#df_geo['geometry'][96]

## Get COVID-19 cases

The official numbers of COVID-19 cases for Norway can be found at https://statistikk.fhi.no/.
I haven't found any api, but you can download cvs files.

Select "Smittsomme sykdommer (MSIS)" and "Lag din egen tabell". In the left selection box,
select "Sykdom", "Alvorlige, systemiske sykdommer", "Koronavirus med utbruddspotensial".
In the right selection box, select "Geografi".

It is not possible to download data for all municipalities at once. Select municipalities in
two to three counties and push the "Last ned"-button. The downloaded csv files must be merged manually.

In [None]:
msis_df = pd.read_csv('2021-04-02.Tilfeller-etter-sykdom-fordelt-paa-geografi-2021-Totalt.csv', sep = ';', skiprows = 1)
new_column_names = {'Unnamed: 0': 'kommune',
                    'Koronavirus med utbruddspotensial, Mars': 'Mars',
                    'Koronavirus med utbruddspotensial, Januar': 'Januar',
                    'Koronavirus med utbruddspotensial, Februar': 'Februar'}
msis_df.rename(columns = new_column_names, inplace = True)
msis_df.head()
#msis_df.info()

## Get population

The population numbers for each municipality is retrieved from Statistics Norway, using the api for
predefined datasets: https://data.ssb.no/api/v0/dataset/, table id 26975.

In [None]:
# Get latest municipality population
population = pd.read_csv("https://data.ssb.no/api/v0/dataset/26975.csv?lang=en", encoding="ISO-8859-1", header=0,
                         names=['region', 'year', 'contents', 'population'], usecols=['region', 'year', 'population'])

# Only keep data for latest year
latest_year = population['year'].max()
population = population[population.year == latest_year]

# Drop non-mainland municipalities and categories
population = population[~population.region.str.startswith('K-21-22 ')]
population = population[~population.region.str.startswith('K-23 ')]
population = population[~population.region.str.startswith('K-Rest ')]

# Extract kommunenummer and kommune, and clean up
population[['kommunenummer', 'kommune_ssb']] = population['region'].str.split(' ', 1, expand=True)
population['kommunenummer'] = population['kommunenummer'].str[2:]
population['kommunenummer'] = population['kommunenummer'].astype('int64')
population.drop('region', axis=1, inplace=True)
population = population[['kommunenummer', 'kommune_ssb', 'year', 'population']]

# Fix bug in municipality Steinkjer
if population.iloc[239,1] == 'teinkj':
    population.iloc[239, 1] = 'Steinkjer'

In [None]:
# Municipality population and border (mpb)
mbp = df_geo.merge(population, left_on='kommunenummer', right_on='kommunenummer')
#mbp[mbp['kommune_ssb'] != mbp['kommune_geo']]

# Keep municipality names from geo (without Sami names),
# except for municipalities with the same names (parenthesis in ssb).
def alter(kommune_ssb, kommune_geo):
    if '(' in kommune_ssb:
        return kommune_ssb
    else:
        return kommune_geo

mbp['kommune_ssb'] = mbp.apply(lambda x: alter(x['kommune_ssb'], x['kommune_geo']), axis=1)
mbp.drop(columns=['kommune_geo'], inplace=True)
mbp.rename(columns={'kommune_ssb': 'kommune'}, inplace=True)
mbp.tail()
mbp.to_csv('municipality_border_and_population.csv')


In [None]:
covid_df = mbp.merge(msis_df, left_on='kommune', right_on='kommune', how='left')

# For MSIS data: Replace NaN with zero and convert to int datatype
covid_df.fillna(0, inplace=True)
covid_df = covid_df.astype({"Januar": int, "Februar": int, "Mars": int})

covid_df['smittetrykk'] = covid_df['Mars'] / covid_df['population'] * 100000 * 14 / 31
#covid_df.tail()


Convert to GeoJSON:

In [None]:
#Read data to json.
covid_df_json = json.loads(covid_df.to_json())
#Convert to String like object.
json_data = json.dumps(covid_df_json)

Render our choropleth map using Bokeh:

In [None]:

#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)

#Define a sequential multi-hue color palette.
palette = brewer['YlGnBu'][8]

#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]

#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
#color_mapper = LinearColorMapper(palette = palette, low = 0, high = covid_df['Mars'].max())
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 300)

#Define custom tick labels for color bar.
tick_labels = {'0': '0', '5': '5', '10':'10', '15':'15', '20':'20', '25':'25', '30':'30','35':'35', '40': '>40'}

#Add hover tool
hover = HoverTool(tooltips = [ ('Kommune','@kommune'), ('Smittede', '@Mars'), ('Smittetrykk', '@smittetrykk')])

#Create color bar.
# color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width=500, height=20,
#                      border_line_color=None, location=(0, 0), orientation='horizontal',
#                      major_label_overrides=tick_labels)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8, width=500, height=20,
                     border_line_color=None, location=(0, 0), orientation='horizontal')


#Create figure object.
#p = figure(title = 'Koronasmitte i kommunene rundt Eidskog, mars 2021', plot_height = 600 , plot_width = 950, toolbar_location = 'below', tools = [hover])
p = figure(title = 'Smittetrykk mars 2021 (antall tilfeller pr. 100 000)', sizing_mode='scale_width', toolbar_location = 'left', tools = 'pan, wheel_zoom, reset')
p.add_tools(hover)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.axis.visible = False

#Add patch renderer to figure.
p.patches('xs','ys', source = geosource,fill_color = {'field' :'smittetrykk', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

#Specify figure layout.
p.add_layout(color_bar, 'below')

#Display figure inline in Jupyter Notebook.
output_notebook()

#Display figure.
show(p)