In [12]:
import geopandas as gpd
import pandas as pd
from glob import glob

# Dataset description

Detail of administrative units of the city of Barcelona : districts, neighbourhoods, and census tracts.

# Columns description

1) CONJ_DESCR: type of administrative unit (district, neighbourhood, or census tract).
2) DISTRICTE: district code.
3) BARRI: neighbourhood code.
4) SEC_CENS: census tract code.
5) NOM: name of the district or the neighbourhood (census tracts do not have name).
6) PERIMETRE: perimeter of the administrative unit polygon (meters).
7) AREA: area of the administrative unit polygon (square meters).
8) RANGESCALA: scale of the polygons.
9) geometry: administrative unit polygon.

# Loading the data

In [7]:
# Set options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
# Read Shapefile into a GeoDataFrame
file_path = r"..\data\administrative_units\0301040100_Districtes_UNITATS_ADM.shp"
gdf = gpd.read_file(file_path, encoding="latin-1")

# Display the first few rows
gdf.head(2)

Unnamed: 0,ID_ANNEX,ANNEXDESCR,ID_TEMA,TEMA_DESCR,ID_CONJUNT,CONJ_DESCR,ID_SUBCONJ,SCONJ_DESC,ID_ELEMENT,ELEM_DESCR,NIVELL,NDESCR_CA,NDESCR_ES,NDESCR_EN,TERME,DISTRICTE,BARRI,AEB,SEC_CENS,GRANBARRI,ZUA,AREA_I,LITERAL,PERIMETRE,AREA,ORD_REPRES,CODI_UA,TIPUS_UA,NOM,WEB1,WEB2,WEB3,DOCUMENTA,RANGESCALA,TIPUS_POL,GRUIX_ID,GRUIXDIMEN,ESTIL_ID,ESTIL_QGIS,VALOR1QGIS,VALOR2QGIS,COL_FARCIT,FCOL_DESCR,FHEX_COLOR,COL_DESCR,HEX_COLOR7,geometry
0,1,Grup - I,104,Unitats Administratives,10412,Districtes,1041201,Districte,104120101,Límit de districte,ADM_02_PL,Límit de districte (polígon),Límite de distrito (polígono),District boundary (polygon),80193,1,-,-,-,-,-,-,1,21366.962,4204930.807,5,1,DISTRICTE,Ciutat Vella,http://www.bcn.cat/ciutatvella,http://www.bcn.cat/estadistica/catala/dades/gu...,http://www.bcn.cat/estadistica/catala/document...,,1-150000,,6,70.0,0,Sòlid,0,0,1,Negre,#000000,Negre,#000000,"POLYGON ((431733.736 4582441.816, 431827.673 4..."
1,1,Grup - I,104,Unitats Administratives,10412,Districtes,1041201,Districte,104120101,Límit de districte,ADM_02_PL,Límit de districte (polígon),Límite de distrito (polígono),District boundary (polygon),80193,2,-,-,-,-,-,-,2,13931.644,7464303.216,5,2,DISTRICTE,Eixample,http://www.bcn.cat/eixample,http://www.bcn.cat/estadistica/catala/dades/gu...,http://www.bcn.cat/estadistica/catala/document...,,1-150000,,6,70.0,0,Sòlid,0,0,1,Negre,#000000,Negre,#000000,"POLYGON ((432033.184 4583665.032, 432032.069 4..."


In [46]:
# Folder containing the shapefiles
folder_path = "../data/administrative_units"

# List all shapefiles in the folder
shp_files = glob(f"{folder_path}/*.shp")

# Columns to keep
cols_to_keep = [
    "CONJ_DESCR",
    "DISTRICTE",
    "BARRI",
    "SEC_CENS",
    "NOM",
    "PERIMETRE",
    "AREA",
    "RANGESCALA",
    "geometry"
]

# Read each shapefile, subset the columns, and store in a list
gdf_list = []
for shp in shp_files:
    gdf = gpd.read_file(shp, encoding="latin-1")
    # Only keep the desired columns (if they exist in the file)
    gdf = gdf[[col for col in cols_to_keep if col in gdf.columns]]
    gdf_list.append(gdf)

# Concatenate all GeoDataFrames into one
full_gdf = gpd.GeoDataFrame(pd.concat(gdf_list, ignore_index=True))

print(len(full_gdf))

# Display the first few rows of the concatenated GeoDataFrame
full_gdf.head()

1151


Unnamed: 0,CONJ_DESCR,DISTRICTE,BARRI,SEC_CENS,NOM,PERIMETRE,AREA,RANGESCALA,geometry
0,Barris,1,1,-,el Raval,5521.647,1100286.137,1-50000,"POLYGON ((430162.188 4581936.984, 430550.104 4..."
1,Barris,1,2,-,el Barri Gòtic,5198.0,815593.938,1-50000,"POLYGON ((431189.907 4581851.448, 431332.429 4..."
2,Barris,1,3,-,la Barceloneta,13853.129,1179381.956,1-50000,"POLYGON ((432798.734 4582081.26, 432805.852 45..."
3,Barris,1,4,-,"Sant Pere, Santa Caterina i la Ribera",4664.483,1109668.777,1-50000,"POLYGON ((431733.736 4582441.816, 431827.673 4..."
4,Barris,2,5,-,el Fort Pienc,4137.329,929355.787,1-50000,"POLYGON ((431741.815 4582625.649, 431645.093 4..."


# Data Types

In [17]:
full_gdf.dtypes

CONJ_DESCR      object
DISTRICTE       object
BARRI           object
SEC_CENS        object
PERIMETRE      float64
AREA           float64
RANGESCALA      object
geometry      geometry
dtype: object

# EDA

In [18]:
full_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1151 entries, 0 to 1150
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   CONJ_DESCR  1151 non-null   object  
 1   DISTRICTE   1151 non-null   object  
 2   BARRI       1151 non-null   object  
 3   SEC_CENS    1151 non-null   object  
 4   PERIMETRE   1151 non-null   float64 
 5   AREA        1151 non-null   float64 
 6   RANGESCALA  1151 non-null   object  
 7   geometry    1151 non-null   geometry
dtypes: float64(2), geometry(1), object(5)
memory usage: 72.1+ KB


## Summary statistics for numerical variables

In [19]:
# Quick summary of numeric columns
full_gdf.describe()

Unnamed: 0,PERIMETRE,AREA
count,1151.0,1151.0
mean,1666.549462,265079.5
std,3308.390847,1349198.0
min,426.279,10458.69
25%,744.3275,26950.25
50%,898.0,37748.16
75%,1253.5945,66854.53
max,46711.857,22879850.0


## Missing values

In [20]:
full_gdf.isna().sum()

CONJ_DESCR    0
DISTRICTE     0
BARRI         0
SEC_CENS      0
PERIMETRE     0
AREA          0
RANGESCALA    0
geometry      0
dtype: int64

In [25]:
# Count the number of unique values in the columns "Nom_Barri", "Nom_Districte" and "Seccio_Censal"
full_gdf["CONJ_DESCR"].value_counts()

CONJ_DESCR
Secció censal    1068
Barris             73
Districtes         10
Name: count, dtype: int64

In [27]:
import sys
!{sys.executable} -m pip install -U folium

Collecting folium
  Using cached folium-0.19.5-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting branca>=0.6.0 (from folium)
  Using cached branca-0.8.1-py3-none-any.whl.metadata (1.5 kB)
Using cached folium-0.19.5-py2.py3-none-any.whl (110 kB)
Using cached branca-0.8.1-py3-none-any.whl (26 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.8.1 folium-0.19.5



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [29]:
!{sys.executable} -m pip install --upgrade pip

Collecting pip
  Using cached pip-25.0.1-py3-none-any.whl.metadata (3.7 kB)
Using cached pip-25.0.1-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.3.1
    Uninstalling pip-24.3.1:
      Successfully uninstalled pip-24.3.1
Successfully installed pip-25.0.1


In [36]:
full_gdf['CONJ_DESCR'].unique()

array(['Barris', 'Districtes', 'Secció censal'], dtype=object)

In [None]:
import folium

# Ensure your GeoDataFrame is in EPSG:4326
full_gdf = full_gdf.to_crs(epsg=4326)

# Filter by administrative unit levels and make copies to avoid warnings
districts = full_gdf[full_gdf["CONJ_DESCR"] == "Districtes"].copy()
neighbourhoods = full_gdf[full_gdf["CONJ_DESCR"] == "Barris"].copy()
censustracts = full_gdf[full_gdf["CONJ_DESCR"] == "Secció censal"].copy()

# Create formatted string columns for AREA and PERIMETRE with thousand separators
districts["AREA_fmt"] = districts["AREA"].apply(lambda x: format(x, ','))
districts["PERIMETRE_fmt"] = districts["PERIMETRE"].apply(lambda x: format(x, ','))

neighbourhoods["AREA_fmt"] = neighbourhoods["AREA"].apply(lambda x: format(x, ','))
neighbourhoods["PERIMETRE_fmt"] = neighbourhoods["PERIMETRE"].apply(lambda x: format(x, ','))

censustracts["AREA_fmt"] = censustracts["AREA"].apply(lambda x: format(x, ','))
censustracts["PERIMETRE_fmt"] = censustracts["PERIMETRE"].apply(lambda x: format(x, ','))

# Compute the union of all geometries and get the centroid for centering the map.
union_geom = full_gdf.geometry.union_all()
centroid = union_geom.centroid
map_center = [centroid.y, centroid.x]

# Create a folium map
m = folium.Map(location=map_center, zoom_start=12)

# Define style functions with low-opacity fill, distinct border colors, and thicker borders.
def style_districts(feature):
    return {
        'fillColor': '#7570b3',
        'fillOpacity': 0.2,
        'color': '#7570b3',
        'weight': 6,
    }

def style_neighbourhoods(feature):
    return {
        'fillColor': '#e7298a',
        'fillOpacity': 0.2,
        'color': '#e7298a',
        'weight': 3,
    }

def style_censustracts(feature):
    return {
        'fillColor': '#a6761d',
        'fillOpacity': 0.2,
        'color': '#a6761d',
        'weight': 2,
    }

# Create feature groups for each administrative level.
fg_districts = folium.FeatureGroup(name="Districts")
fg_neighbourhoods = folium.FeatureGroup(name="Neighbourhoods")
fg_censustracts = folium.FeatureGroup(name="Census Tracts")

# Add GeoJSON layers with tooltips.
# For districts and neighbourhoods, include "NOM", "AREA_fmt", and "PERIMETRE_fmt".
fg_districts.add_child(
    folium.GeoJson(
        districts.__geo_interface__,
        name="Districts",
        style_function=style_districts,
        tooltip=folium.GeoJsonTooltip(
            fields=['NOM', 'AREA_fmt', 'PERIMETRE_fmt'],
            aliases=['Name:', 'Area:', 'Perimeter:']
        )
    )
)

fg_neighbourhoods.add_child(
    folium.GeoJson(
        neighbourhoods.__geo_interface__,
        name="Neighbourhoods",
        style_function=style_neighbourhoods,
        tooltip=folium.GeoJsonTooltip(
            fields=['NOM', 'AREA_fmt', 'PERIMETRE_fmt'],
            aliases=['Name:', 'Area:', 'Perimeter:']
        )
    )
)

# For census tracts, include only "AREA_fmt" and "PERIMETRE_fmt".
fg_censustracts.add_child(
    folium.GeoJson(
        censustracts.__geo_interface__,
        name="Census Tracts",
        style_function=style_censustracts,
        tooltip=folium.GeoJsonTooltip(
            fields=['AREA_fmt', 'PERIMETRE_fmt'],
            aliases=['Area:', 'Perimeter:']
        )
    )
)

# Add the feature groups to the map.
m.add_child(fg_districts)
m.add_child(fg_neighbourhoods)
m.add_child(fg_censustracts)

# Add a layer control to toggle between layers.
m.add_child(folium.LayerControl())

# Save the interactive map to an HTML file.
m.save("../notebooks/outputs/administrative_units_map.html")