# Config

In [23]:
# parameters
SAVE_OUTPUT = True
SIZE_PLOTS = (12,10)
BUFFER_SIZE = 402.336 
YEAR=2024

#Location of the data
INPUT_DATA_PATH = "../data/external/"
OUTPUT_DATA_PATH = "../data/interim/population"

In [7]:
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path


# Load data

In [20]:
gender_raw = pd.read_csv(Path(INPUT_DATA_PATH) / f'population/{YEAR}_pad_mdbas_sexe.csv')
# Pivot the DataFrame to have 'MALE' and 'FEMALE' columns
gender = gender_raw.pivot_table(index=[ 'Seccio_Censal'], columns='SEXE', values='Valor').reset_index().copy()
gender.columns = [ 'Seccio_Censal', 'FEMALE', 'MALE']
gender['TOTAL'] = gender['MALE'] + gender['FEMALE']
gender['YEAR'] = YEAR

In [10]:
# Load censal areas
censal_areas = gpd.read_file(Path(INPUT_DATA_PATH)/'bcn_unitats_adm/0301040100_SecCens_UNITATS_ADM.shp')
censal_areas['Seccio_Censal']=(censal_areas['DISTRICTE']+censal_areas['SEC_CENS']).astype(int)
censal_areas = censal_areas[['Seccio_Censal','geometry']]
censal_areas = censal_areas.to_crs('EPSG:4326')

# Data management

## Explore data

In [21]:
#Explore
print(gender.shape)
print(gender.columns)
gender.head()

(1068, 5)
Index(['Seccio_Censal', 'FEMALE', 'MALE', 'TOTAL', 'YEAR'], dtype='object')


Unnamed: 0,Seccio_Censal,FEMALE,MALE,TOTAL,YEAR
0,1001,628,681,1309,2024
1,1002,625,657,1282,2024
2,1003,1663,1837,3500,2024
3,1004,1396,1603,2999,2024
4,1005,1109,1217,2326,2024


## Join data

In [22]:
# Merge the two dataframes
g_gender = pd.merge(censal_areas,gender, on=['Seccio_Censal'])
g_gender = gpd.GeoDataFrame(g_gender, geometry='geometry')


## Save output

In [24]:
if SAVE_OUTPUT:
    g_gender.to_parquet(f'{OUTPUT_DATA_PATH}/population{YEAR}_.parquet')

## Watermark

In [None]:
!python -m pip install watermark --quiet

In [None]:
%load_ext watermark

In [None]:
%watermark

Last updated: 2024-08-23T15:55:33.641180+00:00

Python implementation: CPython
Python version       : 3.10.12
IPython version      : 7.34.0

Compiler    : GCC 11.4.0
OS          : Linux
Release     : 6.1.85+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 2
Architecture: 64bit



In [None]:
%watermark --iversions

json  : 2.0.9
pandas: 2.1.4
google: 2.0.3
numpy : 1.26.4



In [None]:
!lsb_release -a

No LSB modules are available.
Distributor ID:	Ubuntu
Description:	Ubuntu 22.04.3 LTS
Release:	22.04
Codename:	jammy
