In [8]:
import pandas as pd
import numpy as np
import sqlite3
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path

# Connexion
DB_PATH = Path.home() / 'Desktop/BNP Paribas/Data/Processed/hobart_database.db'
conn = sqlite3.connect(str(DB_PATH))

df = pd.read_sql_query("""
SELECT 
    c.NAME as category,
    sr.CREATIONDATE,
    sr.CLOSINGDATE,
    sr.EXPIRATION_DATE
FROM sr
JOIN category c ON sr.CATEGORY_ID = c.ID
WHERE sr.CLOSINGDATE IS NOT NULL 
  AND sr.CREATIONDATE IS NOT NULL
""", conn)
conn.close()

# Conversion dates
df['CREATIONDATE'] = pd.to_datetime(df['CREATIONDATE'], format='mixed')
df['CLOSINGDATE'] = pd.to_datetime(df['CLOSINGDATE'], format='mixed')
df['EXPIRATION_DATE'] = pd.to_datetime(df['EXPIRATION_DATE'], format="mixed", errors='coerce')

# Temps de r√©solution en jours
df['resolution_days'] = (df['CLOSINGDATE'] - df['CREATIONDATE']).dt.total_seconds() / 86400
df['deadline_days'] = (df['EXPIRATION_DATE'] - df['CREATIONDATE']).dt.total_seconds() / 86400

# Filtrer valeurs aberrantes
df = df[(df['resolution_days'] >= 0) & (df['resolution_days'] <= 365)]

# Stats par cat√©gorie (min 1000 SRs pour TOP/FLOP, min 100 pour volume)
cat_stats = df.groupby('category').agg(
    count=('resolution_days', 'size'),
    avg_resolution=('resolution_days', 'mean'),
    avg_deadline=('deadline_days', 'mean')
)

cat_stats_1k = cat_stats.query('count >= 1000')

# TOP 10 par rapidit√©, FLOP 10 par lenteur (min 1000 SRs), TOP 10 par volume (min 100)
top10_speed = cat_stats_1k.sort_values('avg_resolution').head(10)
flop10 = cat_stats_1k.sort_values('avg_resolution').tail(10).sort_values('avg_resolution', ascending=False)
top10_volume = cat_stats.query('count >= 100').sort_values('count', ascending=False).head(10)

# ‚îÄ‚îÄ‚îÄ Bins pour les histogrammes ‚îÄ‚îÄ‚îÄ
all_bin_edges = np.array([0, 0.5, 1, 2, 3, 5, 7, 14, 30, 60, 90, 180, 365])
all_bin_labels = ['<12h', '12h-1j', '1-2j', '2-3j', '3-5j', '5-7j', '7-14j', '14-30j', '30-60j', '60-90j', '90-180j', '180-365j']


def make_category_grid(cat_list, title, color_bars, color_deadline, max_days=365, min_days=0):
    """Cr√©e une grille 2x5 de graphiques pour 10 cat√©gories."""
    # Tronquer les bins selon max_days et min_days
    start = np.searchsorted(all_bin_edges, min_days, side='right')
    if start > 0:
        start -= 1
    cut = np.searchsorted(all_bin_edges, max_days, side='right')
    edges = all_bin_edges[start:cut]
    if edges[-1] < max_days:
        edges = np.append(edges, max_days)
    labels = all_bin_labels[start:start + len(edges) - 1]

    rows, cols = 2, 5

    fig = make_subplots(
        rows=rows, cols=cols,
        subplot_titles=[f"<b>{cat}</b>" for cat in cat_list.index],
        horizontal_spacing=0.05,
        vertical_spacing=0.12
    )

    for idx, (cat, row) in enumerate(cat_list.iterrows()):
        r = idx // cols + 1
        c = idx % cols + 1

        cat_data = df[df['category'] == cat]['resolution_days']
        counts, _ = np.histogram(cat_data, bins=edges)

        # Barres : distribution du temps de r√©solution
        fig.add_trace(go.Bar(
            x=labels,
            y=counts,
            marker_color=color_bars,
            opacity=0.85,
            name='SRs',
            showlegend=(idx == 0),
            hovertemplate='<b>%{x}</b><br>SRs: %{y:,}<extra></extra>'
        ), row=r, col=c)

        # Ligne verticale deadline moyenne
        deadline = row['avg_deadline']
        if not np.isnan(deadline) and min_days < deadline <= max_days:
            deadline_bin_idx = np.searchsorted(edges, deadline, side='right') - 1
            deadline_bin_idx = min(deadline_bin_idx, len(labels) - 1)
            deadline_label = labels[deadline_bin_idx]

            y_max = counts.max() if counts.max() > 0 else 1

            fig.add_vline(
                x=deadline_label, row=r, col=c,
                line=dict(color=color_deadline, width=2, dash='dash'),
            )
            fig.add_annotation(
                x=deadline_label, y=y_max * 0.95,
                text=f"Deadline<br>{deadline:.1f}j",
                showarrow=False,
                font=dict(size=8, color=color_deadline),
                row=r, col=c
            )

        # Annotation stats
        axis_suffix = "" if idx == 0 else str(idx + 1)
        pct_filtered = (cat_data >= min_days).sum() / len(cat_data) * 100 if len(cat_data) > 0 else 0
        note_filter = f"<br>({100 - pct_filtered:.0f}% < 12h)" if min_days > 0 else ""
        fig.add_annotation(
            x=0.95, y=0.85,
            xref=f"x{axis_suffix} domain", yref=f"y{axis_suffix} domain",
            text=f"n={row['count']:,.0f}<br>Moy: {row['avg_resolution']:.1f}j{note_filter}",
            showarrow=False,
            font=dict(size=8, color='#555'),
            align='right',
            xanchor='right'
        )

        # Axes
        fig.update_xaxes(tickangle=45, tickfont=dict(size=7), row=r, col=c)
        fig.update_yaxes(tickfont=dict(size=7), row=r, col=c)

    fig.update_layout(
        title=dict(text=title, font=dict(size=18)),
        height=600,
        width=1400,
        showlegend=False,
        plot_bgcolor='white',
        margin=dict(t=100, b=50, l=50, r=30)
    )
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=True, gridcolor='#ecf0f1')

    return fig


# ‚îÄ‚îÄ‚îÄ TOP 10 rapidit√© : filtre >= 12h, √©chelle 30j ‚îÄ‚îÄ‚îÄ
fig_top = make_category_grid(
    top10_speed,
    "üèÜ <b>TOP 10 ‚Äî Cat√©gories les plus rapides (‚â• 1 000 SRs)</b><br>"
    "<sup>Distribution hors &lt;12h (‚â§ 30j) | Ligne pointill√©e = deadline moyenne</sup>",
    color_bars='#2ecc71',
    color_deadline='#e74c3c',
    max_days=30,
    min_days=0.5
)
fig_top.show()

# ‚îÄ‚îÄ‚îÄ TOP 10 volume ‚îÄ‚îÄ‚îÄ
fig_vol = make_category_grid(
    top10_volume,
    "üìä <b>TOP 10 ‚Äî Cat√©gories avec le plus de SRs</b><br>"
    "<sup>Distribution du temps de r√©solution | Ligne pointill√©e = deadline moyenne</sup>",
    color_bars='#3498db',
    color_deadline='#e74c3c',
    max_days=365
)
fig_vol.show()

# ‚îÄ‚îÄ‚îÄ FLOP 10 ‚îÄ‚îÄ‚îÄ
fig_flop = make_category_grid(
    flop10,
    "‚ö†Ô∏è <b>FLOP 10 ‚Äî Cat√©gories les plus lentes (‚â• 1 000 SRs)</b><br>"
    "<sup>Distribution du temps de r√©solution | Ligne pointill√©e = deadline moyenne</sup>",
    color_bars='#e74c3c',
    color_deadline='#2ecc71',
    max_days=365
)
fig_flop.show()

In [1]:
import pandas as pd
import numpy as np
import sqlite3
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path

# ‚îÄ‚îÄ‚îÄ Connexion & Requ√™te ‚îÄ‚îÄ‚îÄ
DB_PATH = Path.home() / 'Desktop/BNP Paribas/Data/Processed/hobart_database.db'
conn = sqlite3.connect(str(DB_PATH))

df = pd.read_sql_query("""
SELECT 
    sr.ID as sr_id,
    sr.CREATIONDATE,
    sr.CLOSINGDATE,
    COUNT(a.ID) as nb_activities
FROM sr
LEFT JOIN activity a ON a.SR_ID = sr.ID
WHERE sr.CLOSINGDATE IS NOT NULL 
  AND sr.CREATIONDATE IS NOT NULL
GROUP BY sr.ID, sr.CREATIONDATE, sr.CLOSINGDATE
""", conn)
conn.close()

# Conversion dates & calcul temps de r√©solution en heures
df['CREATIONDATE'] = pd.to_datetime(df['CREATIONDATE'], format='mixed')
df['CLOSINGDATE'] = pd.to_datetime(df['CLOSINGDATE'], format='mixed')
df['resolution_hours'] = (df['CLOSINGDATE'] - df['CREATIONDATE']).dt.total_seconds() / 3600

# Filtrer valeurs aberrantes (r√©solution entre 0 et 365 jours)
df = df[(df['resolution_hours'] >= 0) & (df['resolution_hours'] <= 365 * 24)]

# ‚îÄ‚îÄ‚îÄ Cat√©gorisation par nombre d'activities ‚îÄ‚îÄ‚îÄ
def categorize_activities(n):
    if n == 0:
        return '0 activity'
    elif n == 1:
        return '1 activity'
    else:
        return '2+ activities'

df['activity_group'] = df['nb_activities'].apply(categorize_activities)

# ‚îÄ‚îÄ‚îÄ Stats par groupe ‚îÄ‚îÄ‚îÄ
group_order = ['0 activity', '1 activity', '2+ activities']
stats = df.groupby('activity_group').agg(
    nb_sr=('sr_id', 'count'),
    total_hours=('resolution_hours', 'sum'),
    avg_hours=('resolution_hours', 'mean'),
    median_hours=('resolution_hours', 'median')
).reindex(group_order)

stats['pct_sr'] = stats['nb_sr'] / stats['nb_sr'].sum() * 100
stats['pct_hours'] = stats['total_hours'] / stats['total_hours'].sum() * 100

print("=" * 70)
print("ANALYSE DES SR PAR NOMBRE D'ACTIVITIES")
print("=" * 70)
print(f"\nTotal SRs analys√©s : {stats['nb_sr'].sum():,.0f}")
print(f"Total heures de r√©solution : {stats['total_hours'].sum():,.0f} h")
print()

for group in group_order:
    row = stats.loc[group]
    print(f"  {group:15s} | {row['nb_sr']:>10,.0f} SRs ({row['pct_sr']:5.1f}%) "
          f"| {row['total_hours']:>12,.0f} h ({row['pct_hours']:5.1f}%) "
          f"| Moy: {row['avg_hours']:>8,.1f} h | M√©d: {row['median_hours']:>8,.1f} h")

# ‚îÄ‚îÄ‚îÄ Graphiques ‚îÄ‚îÄ‚îÄ
colors = ['#3498db', '#2ecc71', '#e74c3c']

fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "pie"}, {"type": "pie"}]],
    subplot_titles=[
        "<b>R√©partition des SRs par nombre d'activities</b>",
        "<b>R√©partition du temps de r√©solution</b>"
    ]
)

# Donut 1 : % de SRs
fig.add_trace(go.Pie(
    labels=group_order,
    values=stats['nb_sr'].values,
    hole=0.5,
    marker_colors=colors,
    textinfo='label+percent',
    textposition='outside',
    texttemplate='<b>%{label}</b><br>%{value:,.0f} SRs<br>(%{percent})',
    hovertemplate='<b>%{label}</b><br>SRs: %{value:,.0f}<br>%{percent}<extra></extra>',
    name='SRs'
), row=1, col=1)

# Donut 2 : % du temps de r√©solution
fig.add_trace(go.Pie(
    labels=group_order,
    values=stats['total_hours'].values,
    hole=0.5,
    marker_colors=colors,
    textinfo='label+percent',
    textposition='outside',
    texttemplate='<b>%{label}</b><br>%{value:,.0f} h<br>(%{percent})',
    hovertemplate='<b>%{label}</b><br>Heures: %{value:,.0f}<br>%{percent}<extra></extra>',
    name='Heures'
), row=1, col=2)

fig.update_layout(
    title=dict(
        text="üìä <b>Analyse des SRs par nombre d'activities</b><br>"
             "<sup>R√©partition en volume (SRs) et en temps de r√©solution (heures)</sup>",
        font=dict(size=18)
    ),
    height=500,
    width=1100,
    showlegend=False,
    plot_bgcolor='white',
    margin=dict(t=120, b=50)
)
fig.show()

# ‚îÄ‚îÄ‚îÄ Bar chart : temps moyen et m√©dian par groupe ‚îÄ‚îÄ‚îÄ
fig2 = go.Figure()

fig2.add_trace(go.Bar(
    x=group_order,
    y=stats['avg_hours'].values,
    name='Moyenne',
    marker_color='#3498db',
    text=[f"{v:,.1f} h" for v in stats['avg_hours'].values],
    textposition='outside',
    hovertemplate='<b>%{x}</b><br>Temps moyen: %{y:,.1f} h<extra></extra>'
))

fig2.add_trace(go.Bar(
    x=group_order,
    y=stats['median_hours'].values,
    name='M√©diane',
    marker_color='#2ecc71',
    text=[f"{v:,.1f} h" for v in stats['median_hours'].values],
    textposition='outside',
    hovertemplate='<b>%{x}</b><br>Temps m√©dian: %{y:,.1f} h<extra></extra>'
))

fig2.update_layout(
    title=dict(
        text="‚è±Ô∏è <b>Temps de r√©solution moyen et m√©dian par groupe d'activities</b><br>"
             "<sup>Comparaison entre SRs avec 0, 1 ou 2+ activities</sup>",
        font=dict(size=16)
    ),
    xaxis_title="Nombre d'activities",
    yaxis_title="Heures",
    barmode='group',
    height=450,
    width=800,
    plot_bgcolor='white',
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
    margin=dict(t=120)
)
fig2.update_yaxes(showgrid=True, gridcolor='#ecf0f1')
fig2.show()

ANALYSE DES SR PAR NOMBRE D'ACTIVITIES

Total SRs analys√©s : 2,311,676
Total heures de r√©solution : 356,982,235 h

  0 activity      |  2,189,170 SRs ( 94.7%) |  318,275,354 h ( 89.2%) | Moy:    145.4 h | M√©d:      2.1 h
  1 activity      |     99,742 SRs (  4.3%) |   21,533,406 h (  6.0%) | Moy:    215.9 h | M√©d:      4.5 h
  2+ activities   |     22,764 SRs (  1.0%) |   17,173,475 h (  4.8%) | Moy:    754.4 h | M√©d:    118.9 h


In [2]:
import pandas as pd
df = pd.read_csv('/Users/jo/Downloads/SR_CLIENT_ANNE_LAURE-1 (1).csv')
df.head()

Unnamed: 0,SR_ID,CUSTOMER_ID,CUSTOMER_CONTACT_ID
0,15753131,1945056,5280997
1,15600407,1945056,5280997
2,15731447,1945056,5280997
3,15827443,1945056,5280997
4,15542900,1945056,5280997


In [9]:
import pandas as pd
import numpy as np
import sqlite3
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path

# ‚îÄ‚îÄ‚îÄ Connexion ‚îÄ‚îÄ‚îÄ
DB_PATH = Path.home() / 'Desktop/BNP Paribas/Data/Processed/hobart_database.db'
conn = sqlite3.connect(str(DB_PATH))

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# √âTAPE 1 : Charger les donn√©es SR + nombre d'activit√©s
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
sr_activities = pd.read_sql_query("""
SELECT 
    sr.ID as sr_id,
    COUNT(a.ID) as nb_activities
FROM sr
LEFT JOIN activity a ON a.SR_ID = sr.ID
GROUP BY sr.ID
""", conn)

sr_activities['activity_group'] = pd.cut(
    sr_activities['nb_activities'],
    bins=[-1, 0, 1, float('inf')],
    labels=['0 activit√©', '1 activit√©', '‚â•2 activit√©s']
)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# √âTAPE 2 : Charger les interactions (srcontact)
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
contacts = pd.read_sql_query("""
SELECT 
    sc.SR_ID,
    sc.OUTBOUND,
    sc.EMAIL_CATEGORY,
    sc.IS_FORWARDED,
    sc.FOUR_EYE_CHECK_ENABLED
FROM srcontact sc
""", conn)

conn.close()

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# √âTAPE 3 : Classifier chaque interaction
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# Note : client_concat est un extract partiel (46% de couverture, INTERNAL_FLAG=0 partout)
#        ‚Üí on ne peut PAS l'utiliser comme filtre fiable.
#        On se base uniquement sur les champs de srcontact.

contacts['is_inbound'] = contacts['OUTBOUND'] == 0

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# HYPOTH√àSE A (Conservative) :
# Outbound externe = ACKNOWLEDGED + FIRST_RESPONSE
# Outbound interne = NORMAL (on garde)
# Logique : seuls les messages explicitement tagu√©s "r√©ponse client" sont exclus
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
contacts['outbound_externe_A'] = (
    (contacts['OUTBOUND'] == 1) &
    (contacts['EMAIL_CATEGORY'].isin(['ACKNOWLEDGED', 'FIRST_RESPONSE']))
)
contacts['outbound_interne_A'] = (
    (contacts['OUTBOUND'] == 1) & (~contacts['outbound_externe_A'])
)

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# HYPOTH√àSE B (Mod√©r√©e) :
# Outbound externe = ACKNOWLEDGED + FIRST_RESPONSE
#                   + NORMAL avec FOUR_EYE_CHECK = 1 (contr√¥le compliance ‚Üí client)
# Outbound interne = NORMAL sans 4-eyes + forwarded
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
contacts['outbound_externe_B'] = (
    (contacts['OUTBOUND'] == 1) &
    (
        contacts['EMAIL_CATEGORY'].isin(['ACKNOWLEDGED', 'FIRST_RESPONSE']) |
        ((contacts['EMAIL_CATEGORY'] == 'NORMAL') & (contacts['FOUR_EYE_CHECK_ENABLED'] == 1))
    )
)
contacts['outbound_interne_B'] = (
    (contacts['OUTBOUND'] == 1) & (~contacts['outbound_externe_B'])
)

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# HYPOTH√àSE C (Agressive) :
# Outbound interne = IS_FORWARDED = 1 uniquement (forwarding interne)
# Outbound externe = tout le reste des outbound
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
contacts['outbound_externe_C'] = (
    (contacts['OUTBOUND'] == 1) & (contacts['IS_FORWARDED'] != 1)
)
contacts['outbound_interne_C'] = (
    (contacts['OUTBOUND'] == 1) & (contacts['IS_FORWARDED'] == 1)
)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# √âTAPE 4 : Agr√©ger par SR
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
sr_contacts = contacts.groupby('SR_ID').agg(
    total_interactions=('OUTBOUND', 'size'),
    nb_inbound=('is_inbound', 'sum'),
    nb_outbound_interne_A=('outbound_interne_A', 'sum'),
    nb_outbound_externe_A=('outbound_externe_A', 'sum'),
    nb_outbound_interne_B=('outbound_interne_B', 'sum'),
    nb_outbound_externe_B=('outbound_externe_B', 'sum'),
    nb_outbound_interne_C=('outbound_interne_C', 'sum'),
    nb_outbound_externe_C=('outbound_externe_C', 'sum'),
).reset_index()

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# √âTAPE 5 : Joindre avec groupes d'activit√©s
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
merged = sr_activities.merge(sr_contacts, left_on='sr_id', right_on='SR_ID', how='left')
numeric_cols = merged.select_dtypes(include='number').columns
merged[numeric_cols] = merged[numeric_cols].fillna(0)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# √âTAPE 6 : Calcul des moyennes par groupe d'activit√©s
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
group_order = ['0 activit√©', '1 activit√©', '‚â•2 activit√©s']

results = {}
for hyp in ['A', 'B', 'C']:
    merged[f'interactions_retenues_{hyp}'] = (
        merged['nb_inbound'] + merged[f'nb_outbound_interne_{hyp}']
    )
    
    stats = merged.groupby('activity_group', observed=False).agg(
        nb_sr=('sr_id', 'count'),
        avg_inbound=('nb_inbound', 'mean'),
        avg_outbound_interne=(f'nb_outbound_interne_{hyp}', 'mean'),
        avg_outbound_externe=(f'nb_outbound_externe_{hyp}', 'mean'),
        avg_interactions_retenues=(f'interactions_retenues_{hyp}', 'mean'),
        avg_total=('total_interactions', 'mean'),
    ).reindex(group_order)
    
    results[hyp] = stats

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# AFFICHAGE DES R√âSULTATS
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

hyp_labels = {
    'A': 'Hyp. A (Conservative) : externe = ACKNOWLEDGED + FIRST_RESPONSE',
    'B': 'Hyp. B (Mod√©r√©e)      : externe = ACK + FIRST_RESP + NORMAL avec 4-eyes',
    'C': 'Hyp. C (Agressive)    : interne = IS_FORWARDED uniquement'
}

print("=" * 110)
print("NOMBRE MOYEN D'INTERACTIONS PAR SR ‚Äî SELON LE NOMBRE D'ACTIVIT√âS")
print("Interactions retenues = Inbound + Outbound internes (hors outbound client)")
print("‚îÄ" * 110)
print("Note : client_concat est un extract partiel (46% couverture, INTERNAL_FLAG=0 partout)")
print("       ‚Üí classification bas√©e uniquement sur les champs srcontact")
print("=" * 110)

for hyp, label in hyp_labels.items():
    stats = results[hyp]
    print(f"\n{'‚îÄ' * 110}")
    print(f"  {label}")
    print(f"{'‚îÄ' * 110}")
    print(f"  {'Groupe':<18} | {'N SRs':>10} | {'Moy Inbound':>12} | {'Moy Out Int':>12} | "
          f"{'Moy Out Ext':>12} | {'Moy Retenues':>13} | {'Moy Total':>10}")
    print(f"  {'‚îÄ' * 17} | {'‚îÄ' * 10} | {'‚îÄ' * 12} | {'‚îÄ' * 12} | {'‚îÄ' * 12} | {'‚îÄ' * 13} | {'‚îÄ' * 10}")
    
    for group in group_order:
        row = stats.loc[group]
        print(f"  {group:<18} | {row['nb_sr']:>10,.0f} | {row['avg_inbound']:>12.2f} | "
              f"{row['avg_outbound_interne']:>12.2f} | {row['avg_outbound_externe']:>12.2f} | "
              f"{row['avg_interactions_retenues']:>13.2f} | {row['avg_total']:>10.2f}")

# Volume global exclu par hypoth√®se
print(f"\n{'=' * 110}")
print("VOLUME D'OUTBOUND EXCLUS PAR HYPOTH√àSE")
print(f"{'=' * 110}")
total_outbound = (contacts['OUTBOUND'] == 1).sum()
for hyp in ['A', 'B', 'C']:
    exclu = contacts[f'outbound_externe_{hyp}'].sum()
    interne = contacts[f'outbound_interne_{hyp}'].sum()
    print(f"  Hyp. {hyp} : {exclu:>10,.0f} exclus ({exclu/total_outbound*100:5.1f}%) | "
          f"{interne:>10,.0f} internes retenus ({interne/total_outbound*100:5.1f}%)")

NOMBRE MOYEN D'INTERACTIONS PAR SR ‚Äî SELON LE NOMBRE D'ACTIVIT√âS
Interactions retenues = Inbound + Outbound internes (hors outbound client)
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
Note : client_concat est un extract partiel (46% couverture, INTERNAL_FLAG=0 partout)
       ‚Üí classification bas√©e uniquement sur les champs srcontact

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
  Hyp. A (Conservative) : externe = ACK

In [10]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# VISUALISATION : Comparaison des 3 hypoth√®ses
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

fig = make_subplots(
    rows=1, cols=3,
    subplot_titles=[
        "<b>Hyp. A (Conservative)</b><br><sup>Externe = ACK + FIRST_RESP</sup>",
        "<b>Hyp. B (Mod√©r√©e)</b><br><sup>+ NORMAL avec 4-eyes</sup>",
        "<b>Hyp. C (Agressive)</b><br><sup>Interne = IS_FORWARDED seul</sup>",
    ],
    horizontal_spacing=0.08
)

colors = {'inbound': '#3498db', 'out_interne': '#2ecc71', 'out_externe': '#e74c3c'}

for col_idx, hyp in enumerate(['A', 'B', 'C'], 1):
    stats = results[hyp]
    
    # Barres empil√©es : inbound + outbound interne + outbound externe
    fig.add_trace(go.Bar(
        x=group_order,
        y=stats['avg_inbound'].values,
        name='Inbound (client ‚Üí desk)',
        marker_color=colors['inbound'],
        text=[f"{v:.2f}" for v in stats['avg_inbound'].values],
        textposition='inside',
        showlegend=(col_idx == 1),
        hovertemplate='<b>%{x}</b><br>Moy inbound: %{y:.2f}<extra></extra>'
    ), row=1, col=col_idx)
    
    fig.add_trace(go.Bar(
        x=group_order,
        y=stats['avg_outbound_interne'].values,
        name='Outbound interne (BNPP ‚Üí BNPP)',
        marker_color=colors['out_interne'],
        text=[f"{v:.2f}" for v in stats['avg_outbound_interne'].values],
        textposition='inside',
        showlegend=(col_idx == 1),
        hovertemplate='<b>%{x}</b><br>Moy out. interne: %{y:.2f}<extra></extra>'
    ), row=1, col=col_idx)
    
    fig.add_trace(go.Bar(
        x=group_order,
        y=stats['avg_outbound_externe'].values,
        name='Outbound externe (‚Üí client, exclu)',
        marker_color=colors['out_externe'],
        text=[f"{v:.2f}" for v in stats['avg_outbound_externe'].values],
        textposition='inside',
        opacity=0.4,
        showlegend=(col_idx == 1),
        hovertemplate='<b>%{x}</b><br>Moy out. externe: %{y:.2f}<extra></extra>'
    ), row=1, col=col_idx)

fig.update_layout(
    title=dict(
        text="üìä <b>Nombre moyen d'interactions par SR selon le nombre d'activit√©s</b><br>"
             "<sup>Comparaison de 3 hypoth√®ses de classification inbound/outbound interne/outbound externe</sup>",
        font=dict(size=16)
    ),
    barmode='stack',
    height=550,
    width=1400,
    plot_bgcolor='white',
    legend=dict(orientation='h', yanchor='bottom', y=-0.18, xanchor='center', x=0.5),
    margin=dict(t=120, b=120)
)
fig.update_yaxes(title_text="Moy. interactions / SR", showgrid=True, gridcolor='#ecf0f1')
fig.update_xaxes(tickangle=15)
fig.show()

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# FOCUS : Interactions retenues uniquement (inbound + outbound interne)
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

fig2 = go.Figure()

bar_colors = ['#3498db', '#e67e22', '#9b59b6']
hyp_short = {'A': 'Hyp. A (Conservative)', 'B': 'Hyp. B (Mod√©r√©e)', 'C': 'Hyp. C (Agressive)'}

for i, hyp in enumerate(['A', 'B', 'C']):
    stats = results[hyp]
    fig2.add_trace(go.Bar(
        x=group_order,
        y=stats['avg_interactions_retenues'].values,
        name=hyp_short[hyp],
        marker_color=bar_colors[i],
        text=[f"{v:.2f}" for v in stats['avg_interactions_retenues'].values],
        textposition='outside',
        hovertemplate='<b>%{x}</b><br>Moy retenues: %{y:.2f}<extra></extra>'
    ))

fig2.update_layout(
    title=dict(
        text="üéØ <b>Interactions retenues par SR (Inbound + Outbound internes)</b><br>"
             "<sup>Excluant les outbound envoy√©s au client ‚Äî Comparaison des 3 hypoth√®ses</sup>",
        font=dict(size=16)
    ),
    barmode='group',
    height=500,
    width=900,
    plot_bgcolor='white',
    legend=dict(orientation='h', yanchor='bottom', y=-0.18, xanchor='center', x=0.5),
    margin=dict(t=120, b=100),
    xaxis_title="Nombre d'activit√©s associ√©es √† la SR",
    yaxis_title="Moy. interactions retenues / SR"
)
fig2.update_yaxes(showgrid=True, gridcolor='#ecf0f1')
fig2.show()

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# TABLEAU R√âCAP : D√©tail inbound vs outbound interne (Hyp. B recommand√©e)
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

fig3 = go.Figure()

stats_b = results['B']

fig3.add_trace(go.Bar(
    x=group_order,
    y=stats_b['avg_inbound'].values,
    name='Inbound (client ‚Üí desk)',
    marker_color='#3498db',
    text=[f"{v:.2f}" for v in stats_b['avg_inbound'].values],
    textposition='outside',
))

fig3.add_trace(go.Bar(
    x=group_order,
    y=stats_b['avg_outbound_interne'].values,
    name='Outbound interne (BNPP ‚Üí BNPP)',
    marker_color='#2ecc71',
    text=[f"{v:.2f}" for v in stats_b['avg_outbound_interne'].values],
    textposition='outside',
))

fig3.update_layout(
    title=dict(
        text="üì¨ <b>D√©tail Inbound vs Outbound interne par groupe d'activit√©s</b><br>"
             "<sup>Hypoth√®se B (recommand√©e) ‚Äî Outbound client exclu</sup>",
        font=dict(size=16)
    ),
    barmode='group',
    height=500,
    width=800,
    plot_bgcolor='white',
    legend=dict(orientation='h', yanchor='bottom', y=-0.15, xanchor='center', x=0.5),
    margin=dict(t=120, b=80),
    xaxis_title="Nombre d'activit√©s",
    yaxis_title="Moy. interactions / SR"
)
fig3.update_yaxes(showgrid=True, gridcolor='#ecf0f1')
fig3.show()

In [None]:
import pandas as pd
import numpy as np
import sqlite3
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path

# ‚îÄ‚îÄ‚îÄ Connexion ‚îÄ‚îÄ‚îÄ
DB_PATH = Path.home() / 'Desktop/BNP Paribas/Data/Processed/hobart_database.db'
conn = sqlite3.connect(str(DB_PATH))

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# ANALYSE FACTUELLE ‚Äî Inbound vs Outbound (sans hypoth√®se)
# Bas√©e uniquement sur le champ OUTBOUND (0 = re√ßu, 1 = envoy√©)
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

sr_activities = pd.read_sql_query("""
SELECT 
    sr.ID as sr_id,
    COUNT(a.ID) as nb_activities
FROM sr
LEFT JOIN activity a ON a.SR_ID = sr.ID
GROUP BY sr.ID
""", conn)

sr_activities['activity_group'] = pd.cut(
    sr_activities['nb_activities'],
    bins=[-1, 0, 1, float('inf')],
    labels=['0 activit√©', '1 activit√©', '‚â•2 activit√©s']
)

contacts = pd.read_sql_query("""
SELECT SR_ID, OUTBOUND FROM srcontact
""", conn)

conn.close()

# Agr√©ger par SR
sr_contacts = contacts.groupby('SR_ID').agg(
    total=('OUTBOUND', 'size'),
    nb_inbound=('OUTBOUND', lambda x: (x == 0).sum()),
    nb_outbound=('OUTBOUND', lambda x: (x == 1).sum()),
).reset_index()

# Joindre
merged = sr_activities.merge(sr_contacts, left_on='sr_id', right_on='SR_ID', how='left')
numeric_cols = merged.select_dtypes(include='number').columns
merged[numeric_cols] = merged[numeric_cols].fillna(0)

# Stats par groupe
group_order = ['0 activit√©', '1 activit√©', '‚â•2 activit√©s']

facts = merged.groupby('activity_group', observed=False).agg(
    nb_sr=('sr_id', 'count'),
    avg_total=('total', 'mean'),
    avg_inbound=('nb_inbound', 'mean'),
    avg_outbound=('nb_outbound', 'mean'),
    med_total=('total', 'median'),
    med_inbound=('nb_inbound', 'median'),
    med_outbound=('nb_outbound', 'median'),
).reindex(group_order)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# TABLEAU FACTUEL
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
print("=" * 100)
print("ANALYSE FACTUELLE ‚Äî Nombre moyen d'interactions par SR")
print("Bas√©e uniquement sur OUTBOUND (0 = inbound, 1 = outbound) ‚Äî Aucune hypoth√®se")
print("=" * 100)
print(f"\n  {'Groupe':<18} | {'N SRs':>10} | {'Moy Total':>10} | {'Moy Inbound':>12} | "
      f"{'Moy Outbound':>13} | {'M√©d Total':>10} | {'M√©d Inbound':>12} | {'M√©d Outbound':>13}")
print(f"  {'‚îÄ'*17} | {'‚îÄ'*10} | {'‚îÄ'*10} | {'‚îÄ'*12} | {'‚îÄ'*13} | {'‚îÄ'*10} | {'‚îÄ'*12} | {'‚îÄ'*13}")

for group in group_order:
    r = facts.loc[group]
    print(f"  {group:<18} | {r['nb_sr']:>10,.0f} | {r['avg_total']:>10.2f} | {r['avg_inbound']:>12.2f} | "
          f"{r['avg_outbound']:>13.2f} | {r['med_total']:>10.1f} | {r['med_inbound']:>12.1f} | {r['med_outbound']:>13.1f}")

# Ratio inbound/outbound
print(f"\n  Ratio moyen inbound / outbound :")
for group in group_order:
    r = facts.loc[group]
    ratio = r['avg_inbound'] / r['avg_outbound'] if r['avg_outbound'] > 0 else float('inf')
    pct_in = r['avg_inbound'] / r['avg_total'] * 100 if r['avg_total'] > 0 else 0
    pct_out = r['avg_outbound'] / r['avg_total'] * 100 if r['avg_total'] > 0 else 0
    print(f"    {group:<18} : {pct_in:.1f}% inbound / {pct_out:.1f}% outbound (ratio {ratio:.2f})")

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# GRAPHIQUE 1 : Bar chart group√© ‚Äî Moy inbound vs outbound
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
fig1 = go.Figure()

fig1.add_trace(go.Bar(
    x=group_order,
    y=facts['avg_inbound'].values,
    name='Inbound (OUTBOUND=0)',
    marker_color='#3498db',
    text=[f"{v:.2f}" for v in facts['avg_inbound'].values],
    textposition='outside',
    hovertemplate='<b>%{x}</b><br>Moy inbound: %{y:.2f}<extra></extra>'
))

fig1.add_trace(go.Bar(
    x=group_order,
    y=facts['avg_outbound'].values,
    name='Outbound (OUTBOUND=1)',
    marker_color='#e67e22',
    text=[f"{v:.2f}" for v in facts['avg_outbound'].values],
    textposition='outside',
    hovertemplate='<b>%{x}</b><br>Moy outbound: %{y:.2f}<extra></extra>'
))

fig1.update_layout(
    title=dict(
        text="<b>Nombre moyen d'interactions par SR ‚Äî Inbound vs Outbound</b><br>"
             "<sup>Donn√©es factuelles (champ OUTBOUND) par groupe d'activit√©s ‚Äî Aucune hypoth√®se</sup>",
        font=dict(size=16)
    ),
    barmode='group',
    height=500,
    width=800,
    plot_bgcolor='white',
    legend=dict(orientation='h', yanchor='bottom', y=-0.15, xanchor='center', x=0.5),
    margin=dict(t=120, b=80),
    xaxis_title="Nombre d'activit√©s",
    yaxis_title="Moy. interactions / SR"
)
fig1.update_yaxes(showgrid=True, gridcolor='#ecf0f1')
fig1.show()

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# GRAPHIQUE 2 : Stacked bar ‚Äî Proportion inbound / outbound
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
fig2 = go.Figure()

fig2.add_trace(go.Bar(
    x=group_order,
    y=facts['avg_inbound'].values,
    name='Inbound (OUTBOUND=0)',
    marker_color='#3498db',
    text=[f"{v:.2f}" for v in facts['avg_inbound'].values],
    textposition='inside',
    insidetextanchor='middle',
    hovertemplate='<b>%{x}</b><br>Moy inbound: %{y:.2f}<extra></extra>'
))

fig2.add_trace(go.Bar(
    x=group_order,
    y=facts['avg_outbound'].values,
    name='Outbound (OUTBOUND=1)',
    marker_color='#e67e22',
    text=[f"{v:.2f}" for v in facts['avg_outbound'].values],
    textposition='inside',
    insidetextanchor='middle',
    hovertemplate='<b>%{x}</b><br>Moy outbound: %{y:.2f}<extra></extra>'
))

# Annotation total au dessus
for i, group in enumerate(group_order):
    fig2.add_annotation(
        x=group, y=facts.loc[group, 'avg_total'],
        text=f"<b>Total: {facts.loc[group, 'avg_total']:.2f}</b>",
        showarrow=False, yshift=15,
        font=dict(size=11, color='#2c3e50')
    )

fig2.update_layout(
    title=dict(
        text="<b>Interactions par SR ‚Äî Composition Inbound / Outbound</b><br>"
             "<sup>Barres empil√©es par groupe d'activit√©s ‚Äî Donn√©es factuelles</sup>",
        font=dict(size=16)
    ),
    barmode='stack',
    height=500,
    width=800,
    plot_bgcolor='white',
    legend=dict(orientation='h', yanchor='bottom', y=-0.15, xanchor='center', x=0.5),
    margin=dict(t=120, b=80),
    xaxis_title="Nombre d'activit√©s",
    yaxis_title="Moy. interactions / SR"
)
fig2.update_yaxes(showgrid=True, gridcolor='#ecf0f1')
fig2.show()