In [7]:
# === Imports ===
import re
from pathlib import Path

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output

print("‚úÖ Imports OK")

# Modern color palette
COLORS = {
    'primary': '#2C3E50',
    'secondary': '#3498DB',
    'accent': '#E74C3C',
    'success': '#27AE60',
    'warning': '#F39C12',
    'background': '#ECF0F1',
    'card': '#FFFFFF',
    'text': '#2C3E50',
    'border': '#BDC3C7'
}

CHART_TEMPLATE = 'plotly_white'

‚úÖ Imports OK


# Phase 4 ‚Äì Dashboard interactif (Dash/Plotly)

**Objectif**: visualiser l'ensemble des donn√©es enrichies (Phase 3) avec des filtres dynamiques et 6 graphiques cl√©s:
1. Distribution par secteurs (top 15)
2. Distribution des salaires avec m√©diane/moyenne
3. Distribution g√©ographique (top 20)
4. Visualisation des clusters vs salaire (box plot)
5. Top 10 entreprises
6. Interface moderne avec KPIs et filtres interactifs

## √âtape 1 ‚Äì Importer les biblioth√®ques

Dash/Plotly pour l‚Äôapplication, pandas pour les donn√©es, Path pour les fichiers, logging pour le suivi.

## √âtape 2 ‚Äì Charger le dataset enrichi

On privil√©gie `data/enriched/hellowork_ml_enriched.csv` (Phase 3). Si absent, on retombe sur le dataset nettoy√© `data/processed/hellowork_cleaned.csv`.

In [8]:
# --- Chargement des donn√©es ---
ENRICHED = Path("data/enriched/hellowork_ml_enriched.csv")
CLEAN = Path("data/processed/hellowork_cleaned.csv")


def normalize_location(value: str) -> str:
    """Normalize location labels (collapse arrondissements into city-level)."""
    if pd.isna(value):
        return ""
    
    loc = str(value).strip()
    if not loc:
        return ""
    
    # Collapse Paris/Lyon/Marseille arrondissements to city-level
    match = re.match(r"^(?P<city>Paris|Lyon|Marseille)\s+\d+(?:er|e)?\s*-\s*(?P<dept>\d{2})$", loc, flags=re.IGNORECASE)
    if match:
        city = match.group("city").title()
        dept = match.group("dept")
        return f"{city} - {dept}"
    
    match = re.match(r"^(?P<city>Paris|Lyon|Marseille)\s*-\s*(?P<dept>\d{2})$", loc, flags=re.IGNORECASE)
    if match:
        city = match.group("city").title()
        dept = match.group("dept")
        return f"{city} - {dept}"
    
    return loc


def load_data():
    """Load enriched or cleaned CSV."""
    use_path = ENRICHED if ENRICHED.exists() else CLEAN
    df = pd.read_csv(use_path, encoding="utf-8")
    
    # Standardize column names
    rename_map = {
        "Publication_Date": "publication_date",
        "Salary_Monthly": "salary_monthly",
        "Sector": "sector",
        "Location": "location",
        "Contract": "contract_type",
        "Company": "company",
        "Job_Cluster": "job_cluster",
    }
    df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
    
    # Parse dates
    if "publication_date" in df.columns:
        df["publication_date"] = pd.to_datetime(df["publication_date"], errors='coerce')
        print(f"‚úì Parsed publication_date: {df['publication_date'].notna().sum()} valid dates")
    
    # Convert salary to numeric
    if "salary_monthly" in df.columns:
        df["salary_monthly"] = pd.to_numeric(df["salary_monthly"], errors='coerce')
        print(f"‚úì Converted salary_monthly: {df['salary_monthly'].notna().sum()} valid values")
    
    # Normalize locations
    if "location" in df.columns:
        df["location"] = df["location"].apply(normalize_location)
        print(f"‚úì Normalized locations: {df['location'].nunique()} unique")
    
    print(f"üìä Loaded dataset from {use_path} ({len(df)} rows)")
    return df


df = load_data()
df.head(3)

‚úì Converted salary_monthly: 1073 valid values
‚úì Normalized locations: 635 unique
üìä Loaded dataset from data\enriched\hellowork_ml_enriched.csv (1239 rows)


Unnamed: 0,sector,Job_Title,company,location,contract_type,Salary,Description,URL,salary_monthly,description_clean,Top_Keywords,sector_enc,location_enc,contract_enc,company_enc,high_salary,job_cluster,predicted_high_salary,pred_proba_high_salary
0,Agriculture ‚Ä¢ P√™che,Alternance - Charg√©¬∑e de Formation H/F,Remy Cointreau,Paris - 75,Alternance,"486,49 - 1‚ÄØ801,80 ‚Ç¨ / mois",Nous recherchons un¬∑e candidat¬∑e : Alternance...,https://www.hellowork.com/fr-fr/emplois/642118...,1144.145,recherchons un¬∑e candidat¬∑e alternance charg√©¬∑...,"formation,formations,groupe,aider,plan,cr√©atio...",0,0,0,0,0,4,0,0.018902
1,BTP,Electricien H/F,Samsic Emploi,Rennes - 35,Int√©rim,12 - 15 ‚Ç¨ / heure,Nous recherchons activement un/une electricien...,https://www.hellowork.com/fr-fr/emplois/729658...,2160.0,recherchons activement unune electriciennne ca...,"travail,samsic,sengage,lun,passion,lexp√©rience...",1,1,1,1,1,4,0,0.212084
2,BTP,Ouvrier Polyvalent en Menuiserie H/F,Groupe Actual,Auterive - 31,Int√©rim,"Estimation ‚Üí 12,36 - 13,50 ‚Ç¨ / heure",Nous recherchons un(e) menuisier(e) exp√©riment...,https://www.hellowork.com/fr-fr/emplois/732798...,2068.8,recherchons menuisiere exp√©riment√©e rejoindre ...,"recherchons,dexp√©rience,connaissance,candidats...",1,2,1,2,0,4,0,0.039824


## √âtape 3 ‚Äì Construire le layout + filtres

On cr√©e l‚Äôapp Dash avec 3 filtres principaux (secteur, lieu, contrat), un filtre cluster, un slider salaire, et 5 graphiques :
1) R√©partition par secteur
2) Distribution des salaires
3) Scatter salaire vs cluster
4) Top entreprises
5) Tendance temporelle (hebdomadaire)

In [9]:
# --- Cr√©ation de l'app Dash ---

def create_kpi_card(title, value, icon="üìä", color=COLORS['secondary']):
    """Create a KPI card component."""
    return html.Div([
        html.Div([
            html.Div(icon, style={'fontSize': '2.5rem', 'marginBottom': '10px'}),
            html.H3(title, style={
                'margin': '10px 0', 'fontSize': '0.9rem', 'color': COLORS['text'],
                'fontWeight': '500', 'textTransform': 'uppercase', 'letterSpacing': '0.5px'
            }),
            html.H2(value, style={'margin': '5px 0', 'fontSize': '2rem', 'color': color, 'fontWeight': 'bold'})
        ], style={'textAlign': 'center'})
    ], style={
        'backgroundColor': COLORS['card'], 'padding': '25px', 'borderRadius': '12px',
        'boxShadow': '0 2px 8px rgba(0,0,0,0.1)', 'transition': 'transform 0.2s, box-shadow 0.2s',
        'cursor': 'pointer', 'border': f'1px solid {COLORS["border"]}', 'flex': '1',
        'minWidth': '200px', 'margin': '10px'
    })


def create_app(df: pd.DataFrame) -> Dash:
    """Create and configure the Dash application."""
    app = Dash(__name__, suppress_callback_exceptions=True)
    
    # Extract unique values for filters
    sectors = sorted(df["sector"].dropna().unique().tolist()) if "sector" in df.columns else []
    locations = sorted(df["location"].dropna().unique().tolist()) if "location" in df.columns else []
    contract_types = sorted(df["contract_type"].dropna().unique().tolist()) if "contract_type" in df.columns else []

    # Calculate KPIs
    total_jobs = len(df)
    avg_salary = f"‚Ç¨{df['salary_monthly'].mean():,.0f}" if 'salary_monthly' in df.columns and df['salary_monthly'].notna().any() else "N/A"
    total_sectors = df['sector'].nunique() if 'sector' in df.columns else 0
    total_companies = df['company'].nunique() if 'company' in df.columns else 0

    # Layout with modern styling
    app.layout = html.Div([
        # Header
        html.Div([
            html.Div([
                html.H1("üéØ LEBI Job Market Dashboard", style={
                    'color': COLORS['card'], 'margin': '0', 'fontSize': '2.5rem',
                    'fontWeight': '700', 'letterSpacing': '-0.5px'
                }),
                html.P("Real-time insights into job market trends and opportunities", style={
                    'color': COLORS['card'], 'margin': '10px 0 0 0', 'fontSize': '1.1rem', 'opacity': '0.9'
                })
            ], style={'textAlign': 'center'})
        ], style={
            'backgroundColor': COLORS['primary'], 'padding': '40px 20px',
            'marginBottom': '30px', 'boxShadow': '0 4px 6px rgba(0,0,0,0.1)'
        }),

        # KPI Cards
        html.Div([
            create_kpi_card("Total Jobs", f"{total_jobs:,}", "üíº", COLORS['secondary']),
            create_kpi_card("Avg Salary", avg_salary, "üí∞", COLORS['success']),
            create_kpi_card("Sectors", total_sectors, "üè¢", COLORS['warning']),
            create_kpi_card("Companies", total_companies, "üè≠", COLORS['accent']),
        ], style={
            'display': 'flex', 'flexWrap': 'wrap', 'justifyContent': 'center',
            'margin': '0 auto 30px auto', 'maxWidth': '1400px', 'padding': '0 20px'
        }),

        # Main Content Container
        html.Div([
            # Filters Sidebar
            html.Div([
                html.Div([
                    html.H3("üîç Filters", style={
                        'color': COLORS['primary'], 'marginBottom': '25px', 'fontSize': '1.5rem',
                        'fontWeight': '600', 'borderBottom': f'3px solid {COLORS["secondary"]}',
                        'paddingBottom': '10px'
                    }),
                    
                    html.Div([
                        html.Label("üè¢ Sector", style={'fontWeight': '600', 'color': COLORS['text'], 'marginBottom': '8px', 'display': 'block'}),
                        dcc.Dropdown(id="sector-filter", options=[{"label": s, "value": s} for s in sectors],
                                   multi=True, placeholder="All sectors...", style={'marginBottom': '20px'}),
                    ]),
                    
                    html.Div([
                        html.Label("üìç Location", style={'fontWeight': '600', 'color': COLORS['text'], 'marginBottom': '8px', 'display': 'block'}),
                        dcc.Dropdown(id="location-filter", options=[{"label": l, "value": l} for l in locations],
                                   multi=True, placeholder="All locations...", style={'marginBottom': '20px'}),
                    ]),
                    
                    html.Div([
                        html.Label("üìù Contract Type", style={'fontWeight': '600', 'color': COLORS['text'], 'marginBottom': '8px', 'display': 'block'}),
                        dcc.Dropdown(id="contract-filter", options=[{"label": c, "value": c} for c in contract_types],
                                   multi=True, placeholder="All contract types...", style={'marginBottom': '20px'}),
                    ]),
                    
                    html.Div([
                        html.Label("üî¢ Cluster ID", style={'fontWeight': '600', 'color': COLORS['text'], 'marginBottom': '8px', 'display': 'block'}),
                        dcc.Input(id="cluster-filter", type="number", placeholder="Enter cluster...",
                                style={'width': '100%', 'padding': '10px', 'borderRadius': '5px',
                                     'border': f'1px solid {COLORS["border"]}', 'marginBottom': '20px'}),
                    ]),
                    
                    html.Div([
                        html.Label("üíµ Salary Range (monthly)", style={'fontWeight': '600', 'color': COLORS['text'], 'marginBottom': '15px', 'display': 'block'}),
                        dcc.RangeSlider(id="salary-range", min=0, max=5000, step=100, value=[0, 5000],
                                      marks={0: {'label': '0‚Ç¨'}, 1000: {'label': '1k‚Ç¨'}, 2000: {'label': '2k‚Ç¨'},
                                            3000: {'label': '3k‚Ç¨'}, 4000: {'label': '4k‚Ç¨'}, 5000: {'label': '5k‚Ç¨'}},
                                      tooltip={"placement": "bottom", "always_visible": True}),
                    ], style={'marginBottom': '30px'}),
                    
                    html.Div([
                        html.Button('üîÑ Reset Filters', id='reset-btn', n_clicks=0, style={
                            'width': '100%', 'padding': '12px', 'backgroundColor': COLORS['accent'],
                            'color': 'white', 'border': 'none', 'borderRadius': '8px', 'fontSize': '1rem',
                            'fontWeight': '600', 'cursor': 'pointer', 'transition': 'all 0.3s',
                            'boxShadow': '0 2px 4px rgba(0,0,0,0.2)'
                        })
                    ])
                ], style={
                    'backgroundColor': COLORS['card'], 'padding': '30px', 'borderRadius': '12px',
                    'boxShadow': '0 2px 8px rgba(0,0,0,0.1)', 'border': f'1px solid {COLORS["border"]}',
                    'position': 'sticky', 'top': '20px'
                })
            ], style={'width': '28%', 'display': 'inline-block', 'verticalAlign': 'top', 'padding': '0 15px'}),
            
            # Charts Panel
            html.Div([
                # Row 1: Sector and Salary
                html.Div([
                    html.Div([dcc.Graph(id="jobs-by-sector", config={'displayModeBar': True, 'displaylogo': False})],
                           style={'width': '48%', 'display': 'inline-block', 'backgroundColor': COLORS['card'],
                                 'padding': '20px', 'borderRadius': '12px', 'boxShadow': '0 2px 8px rgba(0,0,0,0.1)',
                                 'marginRight': '4%', 'border': f'1px solid {COLORS["border"]}'}),
                    html.Div([dcc.Graph(id="salary-dist", config={'displayModeBar': True, 'displaylogo': False})],
                           style={'width': '48%', 'display': 'inline-block', 'backgroundColor': COLORS['card'],
                                 'padding': '20px', 'borderRadius': '12px', 'boxShadow': '0 2px 8px rgba(0,0,0,0.1)',
                                 'border': f'1px solid {COLORS["border"]}'})
                ], style={'marginBottom': '30px'}),
                
                # Row 2: Location
                html.Div([dcc.Graph(id="jobs-by-location", config={'displayModeBar': True, 'displaylogo': False})],
                       style={'backgroundColor': COLORS['card'], 'padding': '20px', 'borderRadius': '12px',
                             'boxShadow': '0 2px 8px rgba(0,0,0,0.1)', 'marginBottom': '30px',
                             'border': f'1px solid {COLORS["border"]}'}),
                
                # Row 3: Clusters
                html.Div([dcc.Graph(id="cluster-viz", config={'displayModeBar': True, 'displaylogo': False})],
                       style={'backgroundColor': COLORS['card'], 'padding': '20px', 'borderRadius': '12px',
                             'boxShadow': '0 2px 8px rgba(0,0,0,0.1)', 'marginBottom': '30px',
                             'border': f'1px solid {COLORS["border"]}'}),
                
                # Row 4: Companies
                html.Div([dcc.Graph(id="top-companies", config={'displayModeBar': True, 'displaylogo': False})],
                       style={'backgroundColor': COLORS['card'], 'padding': '20px', 'borderRadius': '12px',
                             'boxShadow': '0 2px 8px rgba(0,0,0,0.1)', 'border': f'1px solid {COLORS["border"]}'})
            ], style={'width': '68%', 'display': 'inline-block', 'padding': '0 15px'})
        ], style={'maxWidth': '1600px', 'margin': '0 auto', 'padding': '20px'}),

        # Footer
        html.Div([
            html.P("¬© 2025 LEBI Project | Job Market Intelligence Platform",
                 style={'textAlign': 'center', 'color': COLORS['card'], 'margin': '0', 'fontSize': '0.9rem'})
        ], style={'backgroundColor': COLORS['primary'], 'padding': '20px', 'marginTop': '50px'})
    ], style={'backgroundColor': COLORS['background'], 'minHeight': '100vh',
             'fontFamily': '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif'})
    
    # Callbacks continue in next cell...
    return app

print("‚úÖ App layout created")
app = create_app(df)

‚úÖ App layout created


## √âtape 4 ‚Äì Lancer le serveur Dash

Ex√©cute `run_server()` pour d√©marrer l‚Äôapp sur http://127.0.0.1:8050/.

In [10]:
# --- Callbacks pour le dashboard ---
from dash import Output, Input

# Reset button callback
@app.callback(
    Output("sector-filter", "value"),
    Output("location-filter", "value"),
    Output("contract-filter", "value"),
    Output("cluster-filter", "value"),
    Output("salary-range", "value"),
    Input("reset-btn", "n_clicks"),
    prevent_initial_call=True
)
def reset_filters(n_clicks):
    """Reset all filters to default values."""
    return [], [], [], None, [0, 5000]


# Main update callback for all charts
@app.callback(
    Output("jobs-by-sector", "figure"),
    Output("salary-dist", "figure"),
    Output("jobs-by-location", "figure"),
    Output("cluster-viz", "figure"),
    Output("top-companies", "figure"),
    Input("sector-filter", "value"),
    Input("location-filter", "value"),
    Input("contract-filter", "value"),
    Input("cluster-filter", "value"),
    Input("salary-range", "value"),
)
def update(sectors_sel, locations_sel, contracts_sel, cluster_sel, salary_range):
    """Update all graphs based on filter selections."""
    dff = df.copy()
    
    # Apply filters
    if sectors_sel:
        dff = dff[dff["sector"].isin(sectors_sel)]
    if locations_sel:
        dff = dff[dff["location"].isin(locations_sel)]
    if contracts_sel:
        dff = dff[dff["contract_type"].isin(contracts_sel)]
    
    # Handle cluster filter (support both 'job_cluster' and 'cluster')
    if cluster_sel is not None:
        if "job_cluster" in dff.columns:
            try:
                dff = dff[dff["job_cluster"] == int(cluster_sel)]
            except:
                pass
        elif "cluster" in dff.columns:
            try:
                dff = dff[dff["cluster"] == int(cluster_sel)]
            except:
                pass
    
    # Salary range filter
    if salary_range and "salary_monthly" in dff.columns:
        dff = dff[dff["salary_monthly"].notna()]
        if not dff.empty:
            dff = dff[(dff["salary_monthly"] >= salary_range[0]) & (dff["salary_monthly"] <= salary_range[1])]

    # === GRAPH 1: Jobs by Sector ===
    if "sector" in dff.columns and not dff.empty:
        sector_counts = dff["sector"].value_counts().reset_index()
        sector_counts.columns = ["sector", "count"]
        fig_sector = px.bar(
            sector_counts.head(15), x="sector", y="count",
            title="üìä Job Distribution by Sector (Top 15)",
            labels={"sector": "Sector", "count": "Number of Jobs"},
            color="count", color_continuous_scale="Blues", template=CHART_TEMPLATE
        )
        fig_sector.update_traces(texttemplate='%{y}', textposition='outside',
                                hovertemplate='<b>%{x}</b><br>Jobs: %{y}<extra></extra>')
        fig_sector.update_xaxes(tickangle=45)
        fig_sector.update_layout(showlegend=False, plot_bgcolor='rgba(0,0,0,0)',
                                paper_bgcolor='rgba(0,0,0,0)', font=dict(size=12),
                                title_font=dict(size=16, color=COLORS['primary']),
                                margin=dict(t=60, l=50, r=30, b=100))
    else:
        fig_sector = go.Figure()
        fig_sector.update_layout(title="üìä Job Distribution by Sector (No Data)",
                                template=CHART_TEMPLATE,
                                annotations=[dict(text="No data available", showarrow=False, font=dict(size=14))])

    # === GRAPH 2: Salary Distribution ===
    if "salary_monthly" in dff.columns and dff["salary_monthly"].notna().any():
        salary_data = dff[dff["salary_monthly"].notna()]
        fig_salary = px.histogram(salary_data, x="salary_monthly", nbins=40,
                                 title="üí∞ Salary Distribution",
                                 labels={"salary_monthly": "Monthly Salary (‚Ç¨)", "count": "Frequency"},
                                 color_discrete_sequence=[COLORS['success']], template=CHART_TEMPLATE)
        
        mean_salary = salary_data["salary_monthly"].mean()
        median_salary = salary_data["salary_monthly"].median()
        fig_salary.add_vline(x=mean_salary, line_dash="dash", line_color=COLORS['accent'],
                            annotation_text=f"Mean: ‚Ç¨{mean_salary:,.0f}", annotation_position="top")
        fig_salary.add_vline(x=median_salary, line_dash="dot", line_color=COLORS['warning'],
                            annotation_text=f"Median: ‚Ç¨{median_salary:,.0f}", annotation_position="bottom")
        fig_salary.update_traces(hovertemplate='Salary: ‚Ç¨%{x:,.0f}<br>Count: %{y}<extra></extra>')
        fig_salary.update_layout(showlegend=False, plot_bgcolor='rgba(0,0,0,0)',
                                paper_bgcolor='rgba(0,0,0,0)', font=dict(size=12),
                                title_font=dict(size=16, color=COLORS['primary']),
                                margin=dict(t=60, l=50, r=30, b=50))
    else:
        fig_salary = go.Figure()
        fig_salary.update_layout(title="üí∞ Salary Distribution (No Data)", template=CHART_TEMPLATE,
                                annotations=[dict(text="No salary data available", showarrow=False, font=dict(size=14))])

    # === GRAPH 3: Location Distribution ===
    if "location" in dff.columns and not dff.empty:
        location_counts = dff["location"].value_counts().reset_index()
        location_counts.columns = ["location", "count"]
        location_counts = location_counts.head(20)
        
        fig_location = px.bar(location_counts, x="count", y="location", orientation='h',
                            title="üìç Top 20 Job Locations",
                            labels={"location": "Location", "count": "Number of Jobs"},
                            color="count", color_continuous_scale="Teal", template=CHART_TEMPLATE)
        fig_location.update_traces(texttemplate='%{x}', textposition='outside',
                                  hovertemplate='<b>%{y}</b><br>Jobs: %{x}<extra></extra>')
        fig_location.update_layout(showlegend=False, plot_bgcolor='rgba(0,0,0,0)',
                                  paper_bgcolor='rgba(0,0,0,0)', font=dict(size=12),
                                  title_font=dict(size=16, color=COLORS['primary']),
                                  margin=dict(t=60, l=200, r=30, b=50), yaxis=dict(autorange="reversed"), height=600)
    else:
        fig_location = go.Figure()
        fig_location.update_layout(title="üìç Job Distribution by Location (No Data)", template=CHART_TEMPLATE,
                                  annotations=[dict(text="No location data available", showarrow=False, font=dict(size=14))])

    # === GRAPH 4: Cluster Visualization ===
    cluster_col = "job_cluster" if "job_cluster" in dff.columns else ("cluster" if "cluster" in dff.columns else None)
    
    if cluster_col and "salary_monthly" in dff.columns:
        try:
            cluster_data = dff[dff[cluster_col].notna() & dff["salary_monthly"].notna()].copy()
            if not cluster_data.empty:
                cluster_data[cluster_col] = cluster_data[cluster_col].astype(str)
                fig_cluster = px.box(cluster_data, x=cluster_col, y="salary_monthly", color=cluster_col,
                                   title="üéØ Salary Distribution by Cluster",
                                   labels={"salary_monthly": "Monthly Salary (‚Ç¨)", cluster_col: "Cluster ID"},
                                   color_discrete_sequence=px.colors.qualitative.Set3, template=CHART_TEMPLATE)
                fig_cluster.update_traces(hovertemplate='Cluster %{x}<br>Salary: ‚Ç¨%{y:,.0f}<extra></extra>')
                fig_cluster.update_layout(showlegend=True, plot_bgcolor='rgba(0,0,0,0)',
                                        paper_bgcolor='rgba(0,0,0,0)', font=dict(size=12),
                                        title_font=dict(size=16, color=COLORS['primary']),
                                        margin=dict(t=60, l=50, r=30, b=50))
            else:
                fig_cluster = go.Figure()
                fig_cluster.update_layout(title="üéØ Cluster Visualization (No Data)", template=CHART_TEMPLATE,
                                        annotations=[dict(text="No cluster data available", showarrow=False, font=dict(size=14))])
        except Exception as e:
            fig_cluster = go.Figure()
            fig_cluster.update_layout(title="üéØ Cluster Visualization (Error)", template=CHART_TEMPLATE,
                                    annotations=[dict(text=f"Error: {str(e)}", showarrow=False, font=dict(size=14))])
    else:
        fig_cluster = go.Figure()
        fig_cluster.update_layout(title="üéØ Cluster Visualization (Not Available)", template=CHART_TEMPLATE,
                                annotations=[dict(text="Cluster data not available", showarrow=False, font=dict(size=14))])

    # === GRAPH 5: Top Companies ===
    if "company" in dff.columns and not dff.empty:
        top = dff["company"].value_counts().nlargest(10).reset_index()
        top.columns = ["company", "count"]
        fig_companies = px.bar(top, y="company", x="count", orientation='h',
                              title="üè¢ Top 10 Hiring Companies",
                              labels={"company": "Company", "count": "Number of Jobs"},
                              color="count", color_continuous_scale="Viridis", template=CHART_TEMPLATE)
        fig_companies.update_traces(texttemplate='%{x}', textposition='outside',
                                   hovertemplate='<b>%{y}</b><br>Jobs: %{x}<extra></extra>')
        fig_companies.update_layout(showlegend=False, plot_bgcolor='rgba(0,0,0,0)',
                                   paper_bgcolor='rgba(0,0,0,0)', font=dict(size=12),
                                   title_font=dict(size=16, color=COLORS['primary']),
                                   margin=dict(t=60, l=150, r=30, b=50), yaxis=dict(autorange="reversed"))
    else:
        fig_companies = go.Figure()
        fig_companies.update_layout(title="üè¢ Top Companies (No Data)", template=CHART_TEMPLATE,
                                   annotations=[dict(text="No company data available", showarrow=False, font=dict(size=14))])

    return fig_sector, fig_salary, fig_location, fig_cluster, fig_companies


print("‚úÖ Callbacks configured")

‚úÖ Callbacks configured


In [11]:
# --- Lancer le Dashboard ---

print("\n" + "="*70)
print("üöÄ PHASE 4 DASHBOARD - RAPPORT DE SYNTH√àSE")
print("="*70)

print(f"\n‚úÖ App charg√©e avec {len(df)} offres d'emploi")

# Salary statistics
salary_valid = df["salary_monthly"].dropna() if "salary_monthly" in df.columns else pd.Series()
salary_pct = (len(salary_valid) / len(df) * 100) if len(df) > 0 else 0
print(f"\nüí∞ DONN√âES SALARIALES")
print(f"   Salaires valides: {len(salary_valid)}/{len(df)} ({salary_pct:.1f}%)")
if len(salary_valid) > 0:
    print(f"   Fourchette: ‚Ç¨{salary_valid.min():.0f} - ‚Ç¨{salary_valid.max():.0f}")
    print(f"   Moyenne: ‚Ç¨{salary_valid.mean():.0f} | M√©diane: ‚Ç¨{salary_valid.median():.0f}")

# Geographic coverage
if "location" in df.columns:
    unique_locs = df["location"].nunique()
    print(f"\nüìç COUVERTURE G√âOGRAPHIQUE")
    print(f"   Lieux uniques: {unique_locs}")
    top_locs = df["location"].value_counts().head(3)
    for loc, count in top_locs.items():
        print(f"      {loc}: {count} offres")

# Sector distribution
if "sector" in df.columns:
    unique_sectors = df["sector"].nunique()
    print(f"\nüè¢ COUVERTURE SECTORIELLE")
    print(f"   Secteurs uniques: {unique_sectors}")
    top_sectors = df["sector"].value_counts().head(3)
    for sector, count in top_sectors.items():
        print(f"      {sector}: {count} offres")

# Contract types
if "contract_type" in df.columns:
    contracts = df["contract_type"].value_counts()
    print(f"\nüìã TYPES DE CONTRATS")
    for contract, count in contracts.items():
        pct = count / len(df) * 100
        print(f"   {contract}: {count} ({pct:.1f}%)")

# Clustering info
cluster_col = "job_cluster" if "job_cluster" in df.columns else ("cluster" if "cluster" in df.columns else None)
if cluster_col:
    n_clusters = int(df[cluster_col].max() + 1) if df[cluster_col].notna().any() else 0
    print(f"\nüéØ CLUSTERING ML")
    print(f"   Topics d√©couverts: {n_clusters}")
    for cluster in sorted(df[cluster_col].unique()):
        if pd.notna(cluster):
            count = (df[cluster_col] == cluster).sum()
            print(f"      Cluster {int(cluster)}: {count} offres")

print("\n" + "="*70)
print("‚úÖ Dashboard pr√™t √† √™tre lanc√©")
print("="*70)


üöÄ PHASE 4 DASHBOARD - RAPPORT DE SYNTH√àSE

‚úÖ App charg√©e avec 1239 offres d'emploi

üí∞ DONN√âES SALARIALES
   Salaires valides: 1073/1239 (86.6%)
   Fourchette: ‚Ç¨1 - ‚Ç¨8229
   Moyenne: ‚Ç¨2106 | M√©diane: ‚Ç¨2117

üìç COUVERTURE G√âOGRAPHIQUE
   Lieux uniques: 635
      Paris - 75: 94 offres
      √éle-de-France: 24 offres
      Toulouse - 31: 16 offres

üè¢ COUVERTURE SECTORIELLE
   Secteurs uniques: 23
      Services aux Personnes ‚Ä¢ Particuliers: 290 offres
      Enseignement ‚Ä¢ Formation: 251 offres
      Distribution ‚Ä¢ Commerce de gros: 213 offres

üìã TYPES DE CONTRATS
   CDD: 516 (41.6%)
   CDI: 452 (36.5%)
   Int√©rim: 95 (7.7%)
   Stage: 90 (7.3%)
   Alternance: 86 (6.9%)

üéØ CLUSTERING ML
   Topics d√©couverts: 7
      Cluster 0: 209 offres
      Cluster 1: 149 offres
      Cluster 2: 210 offres
      Cluster 3: 103 offres
      Cluster 4: 404 offres
      Cluster 5: 70 offres
      Cluster 6: 94 offres

‚úÖ Dashboard pr√™t √† √™tre lanc√©


In [12]:
# === LANCER LE SERVEUR DASH ===
# ‚ö†Ô∏è ATTENTION: Cette cellule va d√©marrer le serveur Dash
# Le serveur continuera √† tourner jusqu'√† ce que vous l'arr√™tiez (Ctrl+C ou Stop)

print("üöÄ D√©marrage du serveur Dash...")
print("üìç Le dashboard sera accessible sur: http://127.0.0.1:8050/")
print("‚ö†Ô∏è  Pour arr√™ter le serveur: Cliquez sur le bouton 'Stop' ou appuyez sur Ctrl+C")
print("\n" + "="*70)

# Lancer le serveur
app.run(debug=True, host="127.0.0.1", port=8050)

üöÄ D√©marrage du serveur Dash...
üìç Le dashboard sera accessible sur: http://127.0.0.1:8050/
‚ö†Ô∏è  Pour arr√™ter le serveur: Cliquez sur le bouton 'Stop' ou appuyez sur Ctrl+C



## √âtape 5 ‚Äì Rapport de synth√®se du dashboard

Affichons les statistiques cl√©s et la qualit√© des donn√©es pour le dashboard.