In [1]:
# LISBON GEOSPATIAL ANALYSIS
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import os
import numpy as np
import json
import warnings
warnings.filterwarnings('ignore')

# Set Plotly default theme
import plotly.io as pio
pio.templates.default = "plotly_white"

def normalize_freguesia(df, col_name):
    if col_name in df.columns:
        df['Freguesia_Norm'] = df[col_name].str.strip().str.upper()
    return df

def plot_choropleth(gdf, column, title, cmap='viridis', vmin=None, vmax=None, categorical=False):
    """
    Consistent plotting function using Plotly for interactivity.
    """
    # Check if column exists
    if column not in gdf.columns:
        print(f"Column {column} not found. Skipping.")
        return

    # Reproject to WGS84 for Mapbox
    gdf_4326 = gdf.to_crs("EPSG:4326")
    
    # Create Hover Data
    hover_data = {'Freguesia_Norm': True, column: True}
    
    # Handle Colormap mapping (Matplotlib names to Plotly names/lists)
    color_scale = cmap
    if cmap == 'RdBu': color_scale = 'RdBu'
    elif cmap == 'OrRd': color_scale = 'OrRd'
    elif cmap == 'YlGn': color_scale = 'YlGn'
    elif cmap == 'Purples': color_scale = 'Purples'
    elif cmap == 'Blues': color_scale = 'Blues'
    elif cmap == 'Greens': color_scale = 'Greens'
    elif cmap == 'magma': color_scale = 'Magma'
    elif cmap == 'viridis': color_scale = 'Viridis'
    
    # Determine range
    range_color = [vmin, vmax] if vmin is not None and vmax is not None else None

    if categorical:
        fig = px.choropleth_mapbox(
            gdf_4326,
            geojson=gdf_4326.geometry,
            locations=gdf_4326.index,
            color=column,
            hover_name='Freguesia_Norm',
            hover_data={column: True},
            title=title,
            mapbox_style="carto-positron",
            center={"lat": 38.7223, "lon": -9.1393},
            zoom=11,
            opacity=0.7,
            color_discrete_sequence=px.colors.qualitative.Bold
        )
    else:
        fig = px.choropleth_mapbox(
            gdf_4326,
            geojson=gdf_4326.geometry,
            locations=gdf_4326.index,
            color=column,
            hover_name='Freguesia_Norm',
            hover_data={column: True},
            title=title,
            mapbox_style="carto-positron",
            center={"lat": 38.7223, "lon": -9.1393},
            zoom=11,
            opacity=0.7,
            color_continuous_scale=color_scale,
            range_color=range_color
        )
        
    fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
    fig.show()


In [2]:
# 0. LOAD BASE GEOMETRY
from shapely.geometry import Polygon

freguesias_path = "../data/boundaries/lisboa_freguesias_oficial.geojson"
freguesias_gdf = gpd.read_file(freguesias_path).to_crs("EPSG:3763")
name_col = 'Des_Simpli' if 'Des_Simpli' in freguesias_gdf.columns else 'Freguesia'
freguesias_gdf = normalize_freguesia(freguesias_gdf, name_col)

# --- MANUAL CLIP WATER (Tagus River) ---
# Since we lack a precise land mask, we approximate the river boundary to fix densities.
# Coordinates are approximate trace of the coastline.
coast_points = [
    (-9.300, 38.690), # West limit
    (-9.235, 38.691), # Belém Tower area
    (-9.180, 38.695), # Alcântara
    (-9.150, 38.703), # Terreiro do Paço
    (-9.120, 38.710), # Santa Apolónia
    (-9.100, 38.735), # Beato/Marvila
    (-9.090, 38.750), # Braço de Prata
    (-9.085, 38.790), # Parque das Nações
    (-9.085, 38.850), # North limit (river side)
    (-8.900, 38.850), # East
    (-8.900, 38.500), # South East
    (-9.300, 38.500)  # South West
]

water_poly = Polygon(coast_points)
water_gdf = gpd.GeoDataFrame({'geometry': [water_poly]}, crs="EPSG:4326").to_crs("EPSG:3763")

# Clip (Difference)
freguesias_gdf = gpd.overlay(freguesias_gdf, water_gdf, how='difference')

# Recalculate Area
freguesias_gdf['Area_km2'] = freguesias_gdf.geometry.area / 10**6

# Master DataFrame for aggregations
master_stats = freguesias_gdf[['Freguesia_Norm', 'geometry', 'Area_km2', name_col]].copy()

print("Map clipped to remove water areas. Areas recalculated.")


Map clipped to remove water areas. Areas recalculated.


In [4]:
# 2. TOURISM ANALYSIS 
print("--- 2. Tourism Analysis ---") 
 
# Load Data 
al_gdf = gpd.read_file("../data/tourism/AL.geojson").to_crs("EPSG:3763") 
hotels_df = pd.read_json("../data/tourism/hotels lisbon.json") 
if 'lat' in hotels_df.columns: 
    hotels_gdf = gpd.GeoDataFrame(hotels_df, geometry=gpd.points_from_xy(hotels_df.lon, hotels_df.lat), crs="EPSG:4326").to_crs("EPSG:3763") 
else: 
    hotels_gdf = gpd.read_file("../data/tourism/hotels lisbon.json").to_crs("EPSG:3763") 
 
tourism_points = pd.concat([al_gdf[['geometry']], hotels_gdf[['geometry']]]) 
joined_tourism = gpd.sjoin(tourism_points, freguesias_gdf, how="inner", predicate="within") 
tourism_counts = joined_tourism.groupby('Freguesia_Norm').size().reset_index(name='Tourism_Count') 
master_stats = master_stats.merge(tourism_counts, on='Freguesia_Norm', how='left').fillna(0) 
master_stats['Tourism_Density'] = master_stats['Tourism_Count'] / master_stats['Area_km2'] 
 
# 2.1 Density Map 
plot_choropleth(master_stats, 'Tourism_Density', "2.1 Density of AL + Hotels per km²", cmap='OrRd') 
 
# 2.3 Relationship with Population (Viz) 
# Use Plotly Scatter 
fig_rel = px.scatter( 
    master_stats, x='Tourism_Density', y='Pop_Growth_Pct', 
    hover_name='Des_Simpli', size='Pop_Density', 
    title="2.3 Tourism Density vs Population Growth", 
    trendline="ols"  
) 
fig_rel.show() 
 
# 2.4 Restaurants 
rest_path = "../data/tourism/lisboa_restaurants.geojson" 
if not os.path.exists(rest_path): 
    rest_path = "lisboa_restaurantes_master_v5.geojson" 
 
if os.path.exists(rest_path): 
    rest_gdf = gpd.read_file(rest_path).to_crs("EPSG:3763") 
    joined_rest = gpd.sjoin(rest_gdf, freguesias_gdf, how="inner", predicate="within") 
    rest_counts = joined_rest.groupby('Freguesia_Norm').size().reset_index(name='Rest_Count') 
    master_stats = master_stats.merge(rest_counts, on='Freguesia_Norm', how='left').fillna(0) 
    master_stats['Rest_Density'] = master_stats['Rest_Count'] / master_stats['Area_km2'] 
 
    # 2.4.1 Restaurant Density Map 
    plot_choropleth(master_stats, 'Rest_Density', "2.4.1 Restaurant Density (units/km²)", cmap='YlGn') 
 
    # 2.4.2 Relationship AL+Hotels vs Restaurants 
    fig_rest = px.scatter( 
        master_stats, x='Tourism_Count', y='Rest_Count', 
        hover_name='Des_Simpli', 
        title="2.4.2 Relationship: Tourism Units vs Restaurants", 
        trendline="ols" 
    ) 
    fig_rest.show() 
 
    # 2.5 Kind of Restaurant Analysis 
    type_col = next((c for c in ['cuisine', 'amenity', 'type', 'categoria_mapa'] if c in rest_gdf.columns), None) 
    if type_col: 
        # Bar Plot (Top 10 types overall) 
        top_types = rest_gdf[type_col].value_counts().head(10).reset_index() 
        top_types.columns = ['Type', 'Count'] 
        fig_types = px.bar( 
            top_types, x='Count', y='Type', 
            orientation='h', 
            title=f"2.5 Top 10 Restaurant Types ({type_col})", 
            color='Count', color_continuous_scale='Viridis' 
        ) 
        fig_types.update_layout(yaxis={'categoryorder':'total ascending'}) 
        fig_types.show() 
         
        # Map Dominant Type 
        dominant = joined_rest.groupby(['Freguesia_Norm', type_col]).size().reset_index(name='count') 
        dominant = dominant.sort_values(['Freguesia_Norm', 'count'], ascending=[True, False]).drop_duplicates('Freguesia_Norm') 
         
        if type_col in master_stats.columns: 
             master_stats = master_stats.drop(columns=[type_col]) 
         
        # Merge dominant type info 
        master_stats = master_stats.merge(dominant[['Freguesia_Norm', type_col]], on='Freguesia_Norm', how='left') 
         
        # Create Map with Labels 
        master_stats_4326 = master_stats.to_crs("EPSG:4326") 
        master_stats_4326['centroid_lon'] = master_stats_4326.geometry.centroid.x 
        master_stats_4326['centroid_lat'] = master_stats_4326.geometry.centroid.y 
         
        fig_dom = px.choropleth_mapbox( 
            master_stats_4326, 
            geojson=master_stats_4326.geometry, 
            locations=master_stats_4326.index, 
            color=type_col, 
            hover_name='Freguesia_Norm', 
            hover_data={'Rest_Density': True, type_col: True}, 
            title=f"2.5 Dominant Restaurant Type per Freguesia (with Density)", 
            mapbox_style="carto-positron", 
            center={"lat": 38.7223, "lon": -9.1393}, 
            zoom=11, 
            opacity=0.6 
        ) 
         
        # Add Text Labels 
        fig_dom.add_trace(go.Scattermapbox( 
            lat=master_stats_4326['centroid_lat'], 
            lon=master_stats_4326['centroid_lon'], 
            mode='text', 
            text=master_stats_4326[type_col], 
            textfont=dict(size=10, color='black'), 
            showlegend=False, 
            hoverinfo='skip' 
        )) 
         
        fig_dom.update_layout(margin={"r":0,"t":40,"l":0,"b":0}) 
        fig_dom.show() 
         
else: 
    print(f"Restaurant data not found at {rest_path}") 
 
# 2.6 Nightlife 
night_path = "../data/tourism/nightlife_open_street_maps.geojson" 
if os.path.exists(night_path): 
    night_gdf = gpd.read_file(night_path).to_crs("EPSG:3763") 
    joined_night = gpd.sjoin(night_gdf, freguesias_gdf, how="inner", predicate="within") 
    night_counts = joined_night.groupby('Freguesia_Norm').size().reset_index(name='Nightlife_Count') 
    master_stats = master_stats.merge(night_counts, on='Freguesia_Norm', how='left').fillna(0) 
    master_stats['Nightlife_Density'] = master_stats['Nightlife_Count'] / master_stats['Area_km2'] 
 
    plot_choropleth(master_stats, 'Nightlife_Density', "2.6 Nightlife Density", cmap='Purples') 

--- 2. Tourism Analysis ---
