In [1]:
# Importig the libraries we need
import folium, json
import pandas as pd

In [2]:
# Storing the paths to the datafiles
# Remember to change this if your folder structre is different
geo_json_path = "../data/raw/shapefiles/be.geojson"
corona_df = pd.read_csv("../data/raw/corona/be_corona.csv", sep = "\t")

# Loading the country metadata directly, since it is a JSON file
with open("../data/raw/metadata/be_metadata.json", 'r') as f:
   country_metadata = json.load(f)

In [3]:
# First task, we need to convert the region names in the corona data with the iso3166-2 codes
# To do so, first we need a dictionary mapping the region name to the code
region_map = {country_metadata["country_metadata"][i]["covid_region_code"]: country_metadata["country_metadata"][i]["iso3166-2_code"] for i in range(len(country_metadata["country_metadata"]))}

# Then, we use the Series.map function
corona_df["region"] = corona_df["PROVINCE"].map(region_map)

# Second task, we can now calculate how many cases there were in total for the region
corona_df_by_region = corona_df.groupby(by = "region")["CASES"].sum().reset_index()

# If we want to add population data, we need to create another dictionary from the country metadata
# In this case, we need to map from the region iso3166-2 code to the region's population
population_map = {country_metadata["country_metadata"][i]["iso3166-2_code"]: country_metadata["country_metadata"][i]["population"] for i in range(len(country_metadata["country_metadata"]))}

# Then we can map, just like before
corona_df_by_region["population"] = corona_df_by_region["region"].map(population_map)

corona_df_by_region

Unnamed: 0,region,CASES,population
0,BE-BRU,92197,1208542
1,BE-VAN,94773,1857986
2,BE-VBR,58153,1146175
3,BE-VLI,37661,874048
4,BE-VOV,80867,1515064
5,BE-VWV,67490,1197000
6,BE-WBR,29320,403599
7,BE-WHT,114394,1344241
8,BE-WLG,104381,1106992
9,BE-WLX,21428,284638


In [4]:
# Third task, mapping the number of cases in a choropeth map
# We start by initializing the folium map
m_cases = folium.Map(location = [50.5, 4.7], zoom_start = 8)

# Then we use the Choropleth function, which requires: the path to the geoJSON (the shapes),
# a name for the layer, the dataframe containing the data, the columns we want to focus on,
# the attribute from the geoJSON which we use to map the data with the corresponding geometry
# in our case this is the iso code. Finally, we specify the color map, and the opacity (so that
# we can still see the map beneath), and a title.
folium.Choropleth(
    geo_data = geo_json_path,
    name = "cases",
    data = corona_df_by_region,
    columns = ["region", "CASES"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd",
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Number of Cases",
).add_to(m_cases)

m_cases

In [5]:
# Fourth task: is more cases bad, or could it be explained simpyl by a larger population?
# Let's make another map using the exact same function, this time we plot population
# instead of cases. We also use a different color map.
m_pop = folium.Map(location = [50.5, 4.7], zoom_start = 8)

folium.Choropleth(
    geo_data = geo_json_path,
    name = "population",
    data = corona_df_by_region,
    columns = ["region", "population"],
    key_on = "properties.iso_3166_2",
    fill_color = "YlGn",
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Population",
).add_to(m_pop)

m_pop

In [7]:
# Fifth task, we create a new column in our data frame with the cases per capita
corona_df_by_region["cases_pc"] = corona_df_by_region["CASES"] / corona_df_by_region["population"]

# And we plot it just like we did before
m_cases_pc = folium.Map(location = [50.5, 4.7], zoom_start = 8)

folium.Choropleth(
    geo_data = geo_json_path,
    name = "cases",
    data = corona_df_by_region,
    columns = ["region", "cases_pc"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd",
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Number of Cases",
).add_to(m_cases_pc)

m_cases_pc