In [1]:
# LISBON GEOSPATIAL ANALYSIS
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import os
import numpy as np
import json
import warnings
warnings.filterwarnings('ignore')

# Set Plotly default theme
import plotly.io as pio
pio.templates.default = "plotly_white"

def normalize_freguesia(df, col_name):
    if col_name in df.columns:
        df['Freguesia_Norm'] = df[col_name].str.strip().str.upper()
    return df

def plot_choropleth(gdf, column, title, cmap='viridis', vmin=None, vmax=None, categorical=False):
    """
    Consistent plotting function using Plotly for interactivity.
    """
    # Check if column exists
    if column not in gdf.columns:
        print(f"Column {column} not found. Skipping.")
        return

    # Reproject to WGS84 for Mapbox
    gdf_4326 = gdf.to_crs("EPSG:4326")
    
    # Create Hover Data
    hover_data = {'Freguesia_Norm': True, column: True}
    
    # Handle Colormap mapping (Matplotlib names to Plotly names/lists)
    color_scale = cmap
    if cmap == 'RdBu': color_scale = 'RdBu'
    elif cmap == 'OrRd': color_scale = 'OrRd'
    elif cmap == 'YlGn': color_scale = 'YlGn'
    elif cmap == 'Purples': color_scale = 'Purples'
    elif cmap == 'Blues': color_scale = 'Blues'
    elif cmap == 'Greens': color_scale = 'Greens'
    elif cmap == 'magma': color_scale = 'Magma'
    elif cmap == 'viridis': color_scale = 'Viridis'
    
    # Determine range
    range_color = [vmin, vmax] if vmin is not None and vmax is not None else None

    if categorical:
        fig = px.choropleth_mapbox(
            gdf_4326,
            geojson=gdf_4326.geometry,
            locations=gdf_4326.index,
            color=column,
            hover_name='Freguesia_Norm',
            hover_data={column: True},
            title=title,
            mapbox_style="carto-positron",
            center={"lat": 38.7223, "lon": -9.1393},
            zoom=11,
            opacity=0.7,
            color_discrete_sequence=px.colors.qualitative.Bold
        )
    else:
        fig = px.choropleth_mapbox(
            gdf_4326,
            geojson=gdf_4326.geometry,
            locations=gdf_4326.index,
            color=column,
            hover_name='Freguesia_Norm',
            hover_data={column: True},
            title=title,
            mapbox_style="carto-positron",
            center={"lat": 38.7223, "lon": -9.1393},
            zoom=11,
            opacity=0.7,
            color_continuous_scale=color_scale,
            range_color=range_color
        )
        
    fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
    fig.show()


In [2]:
# 0. LOAD BASE GEOMETRY
from shapely.geometry import Polygon

freguesias_path = "../data/boundaries/lisboa_freguesias_oficial.geojson"
freguesias_gdf = gpd.read_file(freguesias_path).to_crs("EPSG:3763")
name_col = 'Des_Simpli' if 'Des_Simpli' in freguesias_gdf.columns else 'Freguesia'
freguesias_gdf = normalize_freguesia(freguesias_gdf, name_col)

# --- MANUAL CLIP WATER (Tagus River) ---
# Since we lack a precise land mask, we approximate the river boundary to fix densities.
# Coordinates are approximate trace of the coastline.
coast_points = [
    (-9.300, 38.690), # West limit
    (-9.235, 38.691), # Belém Tower area
    (-9.180, 38.695), # Alcântara
    (-9.150, 38.703), # Terreiro do Paço
    (-9.120, 38.710), # Santa Apolónia
    (-9.100, 38.735), # Beato/Marvila
    (-9.090, 38.750), # Braço de Prata
    (-9.085, 38.790), # Parque das Nações
    (-9.085, 38.850), # North limit (river side)
    (-8.900, 38.850), # East
    (-8.900, 38.500), # South East
    (-9.300, 38.500)  # South West
]

water_poly = Polygon(coast_points)
water_gdf = gpd.GeoDataFrame({'geometry': [water_poly]}, crs="EPSG:4326").to_crs("EPSG:3763")

# Clip (Difference)
freguesias_gdf = gpd.overlay(freguesias_gdf, water_gdf, how='difference')

# Recalculate Area
freguesias_gdf['Area_km2'] = freguesias_gdf.geometry.area / 10**6

# Master DataFrame for aggregations
master_stats = freguesias_gdf[['Freguesia_Norm', 'geometry', 'Area_km2', name_col]].copy()

print("Map clipped to remove water areas. Areas recalculated.")


Map clipped to remove water areas. Areas recalculated.


In [3]:
# 9. CLUSTER ANALYSIS (The "Coolest" Neighborhood)
print("--- 9. Cluster Analysis ---")
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Select features
features = ['Pop_Density', 'Tourism_Density', 'Transport_Score']
if 'Avg_Noise' in master_stats.columns: features.append('Avg_Noise')
if 'Culture_Density' in master_stats.columns: features.append('Culture_Density')
if 'Green_Score' in master_stats.columns: features.append('Green_Score')
if 'Service_Density' in master_stats.columns: features.append('Service_Density')
if 'Rent_Price_m2' in master_stats.columns: features.append('Rent_Price_m2')

print(f"Clustering with features: {features}")
data_for_clustering = master_stats.set_index('Des_Simpli')[features].dropna()

if not data_for_clustering.empty:
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data_for_clustering)

    # Simple K-Means
    kmeans = KMeans(n_clusters=4, random_state=42)
    data_for_clustering['Cluster'] = kmeans.fit_predict(scaled_data)

    # Join back
    master_stats = master_stats.merge(data_for_clustering[['Cluster']], left_on='Des_Simpli', right_index=True, how='left')

    # Map Clusters
    master_stats['Cluster'] = master_stats['Cluster'].fillna(-1).astype(str) # -1 for missing
    
    fig_clus = px.choropleth_mapbox(
        master_stats, geojson=master_stats.geometry, locations=master_stats.index,
        color='Cluster',
        center={"lat": 38.7223, "lon": -9.1393},
        mapbox_style="carto-positron", zoom=11,
        opacity=0.7,
        hover_name='Des_Simpli',
        hover_data=features,
        title="9.1 Neighborhood Clusters"
    )
    fig_clus.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
    fig_clus.show()

    # Profiling Clusters
    numeric_cols = features
    cluster_profile = data_for_clustering.groupby('Cluster')[numeric_cols].mean()
    print("Cluster Profiles (Mean Values):")
    print(cluster_profile)

    # Identify "Coolest" (e.g., High Culture, High Green, Moderate Tourism)
    # This is subjective, but we can define a score
    # Score = Culture + Green + Transport - Noise - (Tourism * 0.5)
    
    # Normalize again for scoring
    norm_df = data_for_clustering.copy()
    for c in numeric_cols:
        norm_df[c] = (norm_df[c] - norm_df[c].min()) / (norm_df[c].max() - norm_df[c].min())
    
    norm_df['Cool_Score'] = (
        norm_df.get('Culture_Density', 0) + 
        norm_df.get('Green_Score', 0) + 
        norm_df.get('Transport_Score', 0) +
        norm_df.get('Service_Density', 0) -
        norm_df.get('Avg_Noise', 0)
    )
    
    top_hoods = norm_df.sort_values('Cool_Score', ascending=False).head(5)
    print(" Top 5 'Coolest' Neighborhoods based on composite score:")
    print(top_hoods[['Cool_Score']])
else:
    print("Not enough data for clustering.")


--- 9. Cluster Analysis ---
Clustering with features: ['Pop_Density', 'Tourism_Density', 'Transport_Score']


KeyError: "None of [Index(['Pop_Density', 'Tourism_Density', 'Transport_Score'], dtype='object')] are in the [columns]"

# Conclusion
This notebook integrates multiple geospatial datasets to analyze Lisbon's neighborhoods. 
We explored:
- **Demographics**: Where people live and how it's changing.
- **Tourism**: The impact of short-term rentals.
- **Mobility**: Accessibility via public transport.
- **Culture & Leisure**: Distribution of cultural venues and green spaces.
- **Services**: Availability of essential services.

The **Cluster Analysis** helps group similar neighborhoods, identifying areas that might be gentrified (high tourism, high rent), residential havens (high green, low noise), or cultural hotspots.
The "Coolest Neighborhood" score attempts to quantify livability based on a mix of positive amenities and negative externalities like noise.
