# Aggregation of population by region

In this notebook, we create a new dataset `population_by_region.pkl` from the two datasets `mapping.pkl` and `population_by_brick.pkl`. This new dataset gives us the population in 2022 for all BC and melanoma regions.

In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os

#pd.set_option('display.max_rows', None)
#pd.set_option('display.max_columns', None)

## Define helper function

In [2]:
def aggregate_population_by_region(data, column):
    """
    Aggregate the population per brick for each cancer region.
    """
    # Group and sum by 'sweden_bc'
    data_bc = data.groupby(by = 'sweden_bc').sum().reset_index()
    # Rename 'sweden_bc' to 'territory'
    data_bc = data_bc.rename(columns = {"sweden_bc": column})

    # Group and sum by 'sweden_me'
    data_me = data.groupby(by = 'sweden_me').sum().reset_index()
    # Rename 'sweden_me' to 'territory'
    data_me = data_me.rename(columns = {"sweden_me": column})

    # Concatenate data_bc and data_me
    data = pd.concat([data_bc, data_me], ignore_index = True)
    
    return(data)

## Load data

In [3]:
mapping = pd.read_pickle("../processed_data/mapping.pkl")
population_by_brick = pd.read_pickle("../processed_data/population_by_brick.pkl")

In [4]:
mapping

Unnamed: 0,brick,sweden_bc,sweden_me
0,02 Norrtälje,Stockholm,Stockholm ONCO
1,04 Uppsala,Uppsala,Uppsala ONCO
2,03 Enköping,Uppsala,Uppsala ONCO
3,05 Nyköping,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO
4,06 Katrineholm,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO
...,...,...,...
73,85 Kungälv,Västra Götaland-Göteborg,Västra Götaland-Göteborg ONCO
74,86 Lerum/Alingsås,Västra Götaland-Alingsås,Västra Götaland-SÄS ONCO
75,91 Malmö,Skåne-Lund,Skåne ONCO
76,92 Lund,Skåne-Lund,Skåne ONCO


In [5]:
population_by_brick

Unnamed: 0,brick,population
0,73 Stockholm-V,457418.0
1,72 Stockholm-NV,404184.0
2,75 Stockholm-S,641403.0
3,76 Stockholm-SV,414460.0
4,71 Stockholm-NO,281467.0
...,...,...
73,15 Ljungby,38790.0
74,22 Karlskrona,96295.0
75,23 Karlshamn,63256.0
76,63 Östersund,130191.0


## Aggregation steps

In [6]:
# Merge mapping and population_by_brick
population_bc_me = pd.merge(
    mapping, 
    population_by_brick, 
    on = 'brick',  
    how = 'left'
)

population_bc_me

Unnamed: 0,brick,sweden_bc,sweden_me,population
0,02 Norrtälje,Stockholm,Stockholm ONCO,61689.0
1,04 Uppsala,Uppsala,Uppsala ONCO,287170.0
2,03 Enköping,Uppsala,Uppsala ONCO,58281.0
3,05 Nyköping,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO,92517.0
4,06 Katrineholm,Sörmland-Eskilstuna,Sörmland-Eskilstuna ONCO,60313.0
...,...,...,...,...
73,85 Kungälv,Västra Götaland-Göteborg,Västra Götaland-Göteborg ONCO,88901.0
74,86 Lerum/Alingsås,Västra Götaland-Alingsås,Västra Götaland-SÄS ONCO,125696.0
75,91 Malmö,Skåne-Lund,Skåne ONCO,338230.0
76,92 Lund,Skåne-Lund,Skåne ONCO,222013.0


In [7]:
population_bc_me = aggregate_population_by_region(population_bc_me, 'territory')
population_bc_me

Unnamed: 0,territory,population
0,Blekinge,159551.0
1,Dalarna,286928.0
2,Gävleborg-Gävle,130274.0
3,Halland-Halmstad,137517.0
4,Halland-Varberg-Falkenberg,108276.0
5,Jämtland,130191.0
6,Jönköping-Jönköping,198325.0
7,Jönköping-Nässjö-Eksjö,88216.0
8,Jönköping-Värnamo,74005.0
9,Kalmar,244488.0


In [8]:
# Save the new dataset
route0 = "../processed_data"

if not os.path.exists(route0):
    os.mkdir(route0)
    
print("saving file corresponding to population_by_region.pkl")
population_bc_me.to_pickle(f"{route0}/population_by_region.pkl")
pd.read_pickle(f"{route0}/population_by_region.pkl")

saving file corresponding to population_by_region.pkl


Unnamed: 0,territory,population
0,Blekinge,159551.0
1,Dalarna,286928.0
2,Gävleborg-Gävle,130274.0
3,Halland-Halmstad,137517.0
4,Halland-Varberg-Falkenberg,108276.0
5,Jämtland,130191.0
6,Jönköping-Jönköping,198325.0
7,Jönköping-Nässjö-Eksjö,88216.0
8,Jönköping-Värnamo,74005.0
9,Kalmar,244488.0
