# ✨ Interactive Geospatial Dashboard ✨

Welcome to this Jupyter Notebook where data meets geography and interactivity! In this project, we will:

- **Upload and Explore** your dataset.
- **Link Geospatial Data** by merging it with the *zone d'emploi* shapefile.
- **Visualize Interactively** using Dash and Plotly to create dynamic maps and insightful charts.

---

## Project Overview

This notebook demonstrates a complete workflow that integrates spatial and non-spatial data:

1. **Data Ingestion & Preparation:**  
   Import your dataset, perform necessary cleaning, and get it ready for analysis.

2. **Geospatial Integration:**  
   Merge your data with the *zone d'emploi* shapefile to incorporate geographical coordinates and boundaries.

3. **Interactive Dashboard Development:**  
   Build an interactive dashboard using Dash and Plotly, featuring:
   - A **main map** displaying spatial data dynamically.
   - Additional visual elements for deeper data insights.

> **Note:**  
> Make sure you have the following Python packages installed: `pandas`, `geopandas`, `dash`, and `plotly`. Also, verify that your shapefile and dataset are correctly formatted and accessible.

---



In [1]:
# ---------------------------------------------------
# Standard imports (assumes all packages are installed)
# ---------------------------------------------------

# System and standard library
import os
import re
import sys
import json
import math
import webbrowser
import subprocess
import importlib
import base64
from pathlib import Path

# Data manipulation
import pandas as pd
import numpy as np
import unidecode

# Plotting
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
import plotly.express as px
import plotly.graph_objects as go

# Geospatial
import geopandas as gpd
import contextily as ctx

# Dash app (for interactive dashboard)
import dash
from dash import html, dcc, Input, Output


In [2]:
# ---------------------------------------------------
# Define the project's root directory
# ---------------------------------------------------
# Get the current working directory and assume that the project root is one level above.
project_root = Path().resolve().parent

# ---------------------------------------------------
# Construct file paths for each dataset
# ---------------------------------------------------
final_data = project_root / "data" / "3- Final Data" / "ratio_supply_demand.csv"
shapefile = project_root / "data" / "shapefiles" / "Zones d'Emploi" / "ze2020_2022.shp"
img_src1 = f"data:image/png;base64,{base64.b64encode((project_root / 'app' / 'Pictures' / 'image1.png').read_bytes()).decode('ascii')}"
img_src2 = f"data:image/jpeg;base64,{base64.b64encode((project_root / 'app' / 'Pictures' / 'image2.jpg').read_bytes()).decode('ascii')}"

# ---------------------------------------------------
# Import data from files
# ---------------------------------------------------
df = pd.read_csv(final_data)
ze_shp = gpd.read_file(shapefile)

# ---------------------------------------------------
# Simplify the geometries from the shapefile for maximum speed.
# Adjust the tolerance value as needed.
# A higher tolerance yields simpler geometries (but may lose detail).
# ---------------------------------------------------
ze_shp["geometry"] = ze_shp["geometry"].simplify(tolerance=0.01, preserve_topology=True)


# ---------------------------------------------------
# Merge the simplified shapefile with your CSV data.
# Assume that 'zone_emploi' in df corresponds to 'libze2020' in ze_shp.
# ---------------------------------------------------
merged_df = df.merge(ze_shp[["geometry", "libze2020"]],
                     left_on="zone_emploi",
                     right_on="libze2020",
                     how="left").drop(columns="libze2020")

# ---------------------------------------------------
# Convert the merged DataFrame into a GeoDataFrame,
# specifying EPSG:4326 as the CRS (latitude/longitude).
# ---------------------------------------------------
gdf = gpd.GeoDataFrame(merged_df, geometry="geometry", crs="EPSG:4326")



In [3]:
# ---------------------------
# 1) INITIALIZE THE DASH APP
# ---------------------------
app = dash.Dash(__name__)

app.layout = html.Div([
    # Top header with two logos in one line
    html.Div([
        html.Img(src=img_src1, style={"height": "80px"}),
        html.Img(src=img_src2, style={"height": "120px"})
    ], style={"display": "flex", "justifyContent": "space-between", "alignItems": "center", "padding": "10px"}),

    # Title with black border
    html.H1(
        "Visualising Labour Market Tightness In France",
        style={
            "textAlign": "center",
            "border": "1px solid #ccc",
            "padding": "12px",
            "backgroundColor": "#f4f3ee",
            "fontFamily": "'Roboto', sans-serif",
            "fontWeight": "400",
            "letterSpacing": "0.5px"
        }
    ),

    # Main container: left filters and right map
    html.Div([
        # Left filter box
        html.Div([
            # Filter 1: Granularity of Job Category
            html.Div("Choose the granularity of Job Category", 
                     style={"marginTop": "10px", "fontWeight": "bold",
                            "fontFamily": "'Roboto', sans-serif"}),
            dcc.RadioItems(
                id='fap-granularity',
                options=[
                    {'label': 'FAP 22', 'value': 'FAP22'},
                    {'label': 'FAP 87', 'value': 'FAP87'}
                ],
                value='FAP22',
                labelStyle={'display': 'inline-block', 'margin-right': '10px'}
            ),
            html.Br(),

            # Filter 2: Type of jobseekers
            html.Div("Which type of jobseekers to consider", 
                     style={"marginTop": "20px", "fontWeight": "bold",
                            "fontFamily": "'Roboto', sans-serif"}),
            dcc.RadioItems(
                id='jobseekers-type',
                options=[
                    {'label': 'Category A', 'value': 'A'},
                    {'label': 'Category A, B and C', 'value': 'ABC'}
                ],
                value='A',
                labelStyle={'display': 'inline-block', 'margin-right': '10px'}
            ),
            html.Br(),

            # Filter 3: Specific Professional Family (FAP)
            html.Div("Choose a specific Professional Family", 
                     style={"marginTop": "20px", "fontWeight": "bold",
                            "fontFamily": "'Roboto', sans-serif"}),
            dcc.Dropdown(
                id='fap-family',
                options=[{'label': fam, 'value': fam} for fam in gdf["famille_pro22"].unique()],
                value=gdf["famille_pro22"].unique()[0],
                clearable=False
            ),
            html.Br(),

            # Filter 4: Zoom on a region
            html.Div("Zoom on a region", 
                     style={"marginTop": "20px", "fontWeight": "bold",
                            "fontFamily": "'Roboto', sans-serif"}),
            dcc.Dropdown(
                id='region',
                options=[{'label': reg, 'value': reg} for reg in gdf["region"].unique()],
                placeholder="Select a region",
                clearable=True
            )
        ], style={"width": "20%", "padding": "20px"}),

        # Right map area
        html.Div([
            dcc.Graph(id='map-graph', style={"height": "600px"})
        ], style={"width": "60%", "padding": "20px"})
    ], style={
        "display": "flex",
        "flexWrap": "wrap",
        "justifyContent": "center",
        "alignItems": "center"
    })
])

# ---------------------------
# 2) CALLBACKS
# ---------------------------

# A) Update Professional Family dropdown based on granularity
@app.callback(
    [Output('fap-family', 'options'),
     Output('fap-family', 'value')],
    [Input('fap-granularity', 'value')]
)
def update_fap_family(granularity):
    if granularity == "FAP22":
        families = gdf["famille_pro22"].unique()
    else:
        families = gdf["famille_pro87"].unique()
    options = [{'label': fam, 'value': fam} for fam in families]
    default_value = families[0] if len(families) > 0 else None
    return options, default_value

# B) Update map based on selected filters
@app.callback(
    Output('map-graph', 'figure'),
    [Input('fap-granularity', 'value'),
     Input('jobseekers-type', 'value'),
     Input('fap-family', 'value'),
     Input('region', 'value')]
)
def update_map(granularity, jobseekers, fap_family, region):
    
    # 1. Determine which score and family column to use
    if granularity == "FAP87":
        score_col = 'lt_score_a_fap87' if jobseekers == 'A' else 'lt_score_abc_fap87'
        fam_col = "famille_pro87"
    else:
        score_col = 'lt_score_a_fap22' if jobseekers == 'A' else 'lt_score_abc_fap22'
        fam_col = "famille_pro22"
    
    # 2. Filter the GeoDataFrame based on the chosen family and region
    filtered_gdf = gdf.copy()
    if fap_family:
        filtered_gdf = filtered_gdf[filtered_gdf[fam_col] == fap_family]
    if region:
        filtered_gdf = filtered_gdf[filtered_gdf["region"] == region]
    filtered_gdf.drop_duplicates(subset=["geometry"], inplace=True)
    
    # Ensure the GeoDataFrame is in EPSG:4326 (WGS84) for Mapbox
    if filtered_gdf.crs is not None and filtered_gdf.crs.to_string() != 'EPSG:4326':
        filtered_gdf = filtered_gdf.to_crs(epsg=4326)
    
    # Check for valid data
    if filtered_gdf.empty or score_col not in filtered_gdf.columns:
        print("Filtered data is empty or missing score column.")
        return px.scatter_mapbox(
            lat=[], lon=[], mapbox_style="carto-positron",
            title="No data available for the selected filters"
        )
    
    # 3. Convert the score values to string labels (keep "1" to "5", else "No Data")
    labels = filtered_gdf[score_col].astype(str)
    filtered_gdf["lt_score_cat"] = labels.where(labels.isin(["1", "2", "3", "4", "5"]), "No Data")
    
    # 4. Obtain a GeoJSON-like dict using the __geo_interface__
    geojson = filtered_gdf.__geo_interface__
    
    # 5. Build the choropleth map using Plotly Express
    color_discrete_map = {
        "1": "#a6d96a",  # Light green
        "2": "#d9ef8b",
        "3": "#ffffbf",
        "4": "#fdae61",
        "5": "#f46d43",  # Red
        "No Data": "#d9d9d9"
    }
    
    fig = px.choropleth_mapbox(
        filtered_gdf,
        geojson=geojson,
        locations="zone_emploi",                  # Must match the column in your GeoDataFrame
        featureidkey="properties.zone_emploi",    # Must match the property in the geojson features
        color="lt_score_cat",
        color_discrete_map=color_discrete_map,
        mapbox_style="carto-positron",
        center={"lat": 46.2276, "lon": 2.2137},
        zoom=5,
        opacity=0.8,
        category_orders={"lt_score_cat": ["1", "2", "3", "4", "5", "No Data"]}
    )
    
    # 6. Adjust the map view based on the region selection
    if region:
        # Compute the bounds of the filtered geometries:
        minx, miny, maxx, maxy = filtered_gdf.total_bounds
        # Compute the center of the bounding box:
        center = {"lat": (miny + maxy) / 2, "lon": (minx + maxx) / 2}
        # Calculate width (longitude extent) with some padding:
        width = (maxx - minx) * 1.1 if (maxx - minx) > 0 else 360
        # Estimate the zoom level using a heuristic based on the width:
        zoom = math.log2(360 / width)
        zoom = min(zoom, 15)  # Cap the zoom level at 15
        fig.update_layout(mapbox=dict(center=center, zoom=zoom))
    else:
        # Use the default view if no region is selected
        fig.update_layout(mapbox=dict(center={"lat": 46.2276, "lon": 2.2137}, zoom=4.65))
    
    fig.update_layout(
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
        legend_title_text="Labour Tightness Score"
    )

    return fig

# ---------------------------
# 3) RUN THE APP
# ---------------------------
if __name__ == '__main__':
    webbrowser.open("http://127.0.0.1:8050")
    app.run(debug=True)
