# Phase 4 â€“ Dashboard interactif (Dash/Plotly)

Objectif : visualiser lâ€™ensemble des donnÃ©es enrichies (Phase 3) avec des filtres dynamiques et 5 graphiques clÃ©s (secteurs, salaires, clusters, entreprises, temporalitÃ©).

## Ã‰tape 1 â€“ Importer les bibliothÃ¨ques

Dash/Plotly pour lâ€™application, pandas pour les donnÃ©es, Path pour les fichiers, logging pour le suivi.

## Ã‰tape 2 â€“ Charger le dataset enrichi

On privilÃ©gie `data/enriched/hellowork_ml_enriched.csv` (Phase 3). Si absent, on retombe sur le dataset nettoyÃ© `data/processed/hellowork_cleaned.csv`.

In [2]:
# --- Chargement des donnÃ©es ---
ENRICHED = Path("data/enriched/hellowork_ml_enriched.csv")
CLEAN = Path("data/processed/hellowork_cleaned.csv")


def load_data():
    if ENRICHED.exists():
        use_path = ENRICHED
    else:
        use_path = CLEAN
    df = pd.read_csv(use_path, encoding="utf-8")

    # Harmoniser les noms attendus
    rename_map = {
        "Publication_Date": "publication_date",
        "Salary_Monthly": "salary_monthly",
        "Sector": "sector",
        "Location": "location",
        "Contract": "contract_type",
        "Job_Cluster": "job_cluster",
    }
    df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})

    # Dates en datetime pour la tendance temporelle
    if "publication_date" in df.columns:
        df["publication_date"] = pd.to_datetime(df["publication_date"], errors="coerce")

    print(f"ðŸ“‚ ChargÃ© depuis {use_path} | {len(df)} lignes")
    return df


df = load_data()
df.head(3)

ðŸ“‚ ChargÃ© depuis data\enriched\hellowork_ml_enriched.csv | 1219 lignes


Unnamed: 0,sector,Job_Title,Company,location,contract_type,Salary,Description,publication_date,URL,Top_Keywords,sector_enc,location_enc,contract_enc,company_enc,description_clean,salary_monthly,job_cluster
0,Agriculture â€¢ PÃªche,Alternance - ChargÃ©Â·e de Formation H/F,Remy Cointreau,Paris - 75,Alternance,"486,49 - 1â€¯801,80 â‚¬ / mois",Nous recherchons unÂ·e candidatÂ·e : Alternance...,NaT,https://www.hellowork.com/fr-fr/emplois/642118...,"formation,formations,des,de,groupe,aider,crÃ©at...",0,0,0,0,Nous recherchons unÂ·e candidatÂ·e : Alternance...,"486,49 - 1â€¯801,80 â‚¬ / mois",4
1,BTP,Alternance-Gestionnaire Paie H/F,Lafarge France,Issy-les-Moulineaux - 92,Alternance,"486,49 - 1â€¯801,80 â‚¬ / mois",Pourquoi nous rejoindre ? > Participer Ã la t...,NaT,https://www.hellowork.com/fr-fr/emplois/729761...,"paie,de,et,la,des,groupe,processus,ses",1,1,0,1,Pourquoi nous rejoindre ? > Participer Ã la t...,"486,49 - 1â€¯801,80 â‚¬ / mois",4
2,BTP,Ouvrier Polyvalent en Menuiserie H/F,Groupe Actual,Auterive - 31,IntÃ©rim,"Estimation â†’ 12,36 - 13,50 â‚¬ / heure",Nous recherchons un(e) menuisier(e) expÃ©riment...,NaT,https://www.hellowork.com/fr-fr/emplois/735245...,"recherchons,ayant,un,nous,avons,connaissance,c...",1,2,1,2,Nous recherchons un(e) menuisier(e) expÃ©riment...,"Estimation â†’ 12,36 - 13,50 â‚¬ / heure",4


## Ã‰tape 3 â€“ Construire le layout + filtres

On crÃ©e lâ€™app Dash avec 3 filtres principaux (secteur, lieu, contrat), un filtre cluster, un slider salaire, et 5 graphiques :
1) RÃ©partition par secteur
2) Distribution des salaires
3) Scatter salaire vs cluster
4) Top entreprises
5) Tendance temporelle (hebdomadaire)

In [3]:
# --- CrÃ©ation de lâ€™app Dash ---

def create_app(df: pd.DataFrame) -> Dash:
    app = Dash(__name__)

    sectors = sorted(df["sector"].dropna().unique().tolist()) if "sector" in df.columns else []
    locations = sorted(df["location"].dropna().unique().tolist()) if "location" in df.columns else []
    contracts = sorted(df["contract_type"].dropna().unique().tolist()) if "contract_type" in df.columns else []

    app.layout = html.Div([
        html.H1("LEBI - Job Offers Explorer", style={"textAlign": "center", "marginBottom": "20px"}),

        html.Div([
            html.H3("Filtres"),
            html.Label("Secteur"),
            dcc.Dropdown(id="sector-filter", options=[{"label": s, "value": s} for s in sectors], multi=True),
            html.Br(),
            html.Label("Lieu"),
            dcc.Dropdown(id="location-filter", options=[{"label": l, "value": l} for l in locations], multi=True),
            html.Br(),
            html.Label("Type de contrat"),
            dcc.Dropdown(id="contract-filter", options=[{"label": c, "value": c} for c in contracts], multi=True),
            html.Br(),
            html.Label("Cluster"),
            dcc.Input(id="cluster-filter", type="number", placeholder="ID de cluster", style={"width": "100%"}),
            html.Br(),
            html.Br(),
            html.Label("Salaire mensuel (â‚¬)"),
            dcc.RangeSlider(
                id="salary-range",
                min=0, max=20000, step=100,
                value=[0, 20000],
                marks={0: "0â‚¬", 5000: "5kâ‚¬", 10000: "10kâ‚¬", 15000: "15kâ‚¬", 20000: "20kâ‚¬"},
                tooltip={"placement": "bottom", "always_visible": True},
            ),
        ], style={"width": "25%", "display": "inline-block", "verticalAlign": "top", "padding": "15px", "backgroundColor": "#f8f9fa", "borderRadius": "6px"}),

        html.Div([
            dcc.Graph(id="jobs-by-sector"),
            dcc.Graph(id="salary-dist"),
            dcc.Graph(id="cluster-viz"),
            dcc.Graph(id="top-companies"),
            dcc.Graph(id="temporal-trend"),
        ], style={"width": "70%", "display": "inline-block", "padding": "15px"}),
    ])

    @app.callback(
        Output("jobs-by-sector", "figure"),
        Output("salary-dist", "figure"),
        Output("cluster-viz", "figure"),
        Output("top-companies", "figure"),
        Output("temporal-trend", "figure"),
        Input("sector-filter", "value"),
        Input("location-filter", "value"),
        Input("contract-filter", "value"),
        Input("cluster-filter", "value"),
        Input("salary-range", "value"),
    )
    def update(sectors_sel, locations_sel, contracts_sel, cluster_sel, salary_range):
        dff = df.copy()

        # Filtres
        if sectors_sel:
            dff = dff[dff["sector"].isin(sectors_sel)]
        if locations_sel:
            dff = dff[dff["location"].isin(locations_sel)]
        if contracts_sel:
            dff = dff[dff["contract_type"].isin(contracts_sel)]
        if cluster_sel is not None:
            if "job_cluster" in dff.columns:
                dff = dff[dff["job_cluster"] == int(cluster_sel)]
            elif "cluster" in dff.columns:
                dff = dff[dff["cluster"] == int(cluster_sel)]
        if salary_range and "salary_monthly" in dff.columns:
            dff = dff[(dff["salary_monthly"] >= salary_range[0]) & (dff["salary_monthly"] <= salary_range[1])]

        # Graph 1: Secteurs
        if "sector" in dff.columns and not dff.empty:
            sector_counts = dff["sector"].value_counts().reset_index()
            sector_counts.columns = ["sector", "count"]
            fig_sector = px.bar(sector_counts.head(15), x="sector", y="count", title="Offres par secteur (Top 15)")
            fig_sector.update_xaxes(tickangle=45)
        else:
            fig_sector = px.bar(title="Offres par secteur (aucune donnÃ©e)")

        # Graph 2: Salaires
        if "salary_monthly" in dff.columns and dff["salary_monthly"].notna().any():
            fig_salary = px.histogram(dff[dff["salary_monthly"].notna()], x="salary_monthly", nbins=50, title="Distribution des salaires (â‚¬/mois)")
        else:
            fig_salary = px.histogram(title="Distribution des salaires (aucune donnÃ©e)")

        # Graph 3: Clusters
        cluster_col = "job_cluster" if "job_cluster" in dff.columns else ("cluster" if "cluster" in dff.columns else None)
        if cluster_col and "salary_monthly" in dff.columns:
            cluster_data = dff[dff[cluster_col].notna() & dff["salary_monthly"].notna()].copy()
            if not cluster_data.empty:
                cluster_data[cluster_col] = cluster_data[cluster_col].astype(str)
                fig_cluster = px.scatter(cluster_data, x="salary_monthly", y=cluster_col, color=cluster_col, title="Clusters vs Salaire")
            else:
                fig_cluster = px.scatter(title="Clusters vs Salaire (aucune donnÃ©e)")
        else:
            fig_cluster = px.scatter(title="Clusters vs Salaire (non disponible)")

        # Graph 4: Entreprises
        if "company" in dff.columns and not dff.empty:
            top = dff["company"].value_counts().nlargest(10).reset_index()
            top.columns = ["company", "count"]
            fig_companies = px.bar(top, x="company", y="count", title="Top 10 entreprises")
            fig_companies.update_xaxes(tickangle=45)
        else:
            fig_companies = px.bar(title="Top entreprises (aucune donnÃ©e)")

        # Graph 5: Tendance temporelle (hebdo)
        if "publication_date" in dff.columns and dff["publication_date"].notna().any():
            dff_temporal = dff[dff["publication_date"].notna()].copy().set_index("publication_date")
            df_trend = dff_temporal.resample('W').size().reset_index(name='count')
            fig_temporal = px.line(df_trend, x="publication_date", y="count", title="Offres dans le temps (hebdo)", markers=True)
        else:
            fig_temporal = px.line(title="Tendance temporelle (aucune donnÃ©e)")

        return fig_sector, fig_salary, fig_cluster, fig_companies, fig_temporal

    return app


app = create_app(df)
app

<dash.dash.Dash at 0x262bdc53770>

In [1]:
# --- Imports ---
import logging
from pathlib import Path

import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

print("âœ… Imports OK")

âœ… Imports OK


## Ã‰tape 4 â€“ Lancer le serveur Dash

ExÃ©cute `run_server()` pour dÃ©marrer lâ€™app sur http://127.0.0.1:8050/.

In [None]:
# --- Lancer le serveur ---

def run_server():
    print("ðŸš€ Dashboard en cours de dÃ©marrage sur http://127.0.0.1:8050/")
    app.run(debug=True, host="127.0.0.1", port=8050)

# run_server()  # DÃ©commente pour lancer depuis le notebook

: 