# BNP Paribas Securities Services - Category Analysis

**Probl√©matique** : *How can we improve our institutional clients' day-to-day experience?*

**P√©riode d'analyse** : Janvier √† Septembre 2024-2025

**Approche** :
1. Analyse de la r√©partition par cat√©gorie de demandes
2. Temps de traitement par cat√©gorie (impact sur l'exp√©rience client)
3. Tendances temporelles et patterns
4. Identification des pain points clients
5. KPIs

---
## 1. Configuration & Connexion

In [71]:
import pandas as pd
import numpy as np
import sqlite3
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configuration
BASE_DIR = Path('/Users/jo/Desktop/BNP Paribas')
DB_PATH = BASE_DIR / 'Data/Processed/hobart_database.db'
EXPORT_DIR = BASE_DIR / 'Data/Exports'
EXPORT_DIR.mkdir(parents=True, exist_ok=True)

# Connexion
conn = sqlite3.connect(str(DB_PATH))

print("‚úÖ Connexion √©tablie √† la base de donn√©es Hobart")
print(f"üìÇ Base: {DB_PATH}")
print(f"üìä P√©riode: Janvier - Septembre 2024-2025")

# Configuration Plotly
import plotly.io as pio
pio.templates.default = "plotly_white"

‚úÖ Connexion √©tablie √† la base de donn√©es Hobart
üìÇ Base: /Users/jo/Desktop/BNP Paribas/Data/Processed/hobart_database.db
üìä P√©riode: Janvier - Septembre 2024-2025


---
## 2. Vue d'Ensemble : Statistiques Cl√©s

Comprendre le volume et la nature des demandes clients.

In [72]:
# Statistiques globales (Jan-Sep uniquement, exclure Octobre incomplet)
stats_global = pd.read_sql_query("""
SELECT 
    COUNT(*) as total_srs,
    COUNT(CASE WHEN CLOSINGDATE IS NOT NULL THEN 1 END) as srs_closed,
    ROUND(COUNT(CASE WHEN CLOSINGDATE IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2) as closure_rate,
    COUNT(DISTINCT CATEGORY_ID) as categories_count,
    COUNT(DISTINCT JUR_DESK_ID) as desks_count,
    ROUND(AVG(CAST((julianday(CLOSINGDATE) - julianday(CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours_to_close
FROM sr

""", conn)

print("="*70)
print("üìä VUE D'ENSEMBLE (Janvier - Septembre)")
print("="*70)
print(f"\nüìã Service Requests totaux : {stats_global['total_srs'][0]:,}")
print(f"‚úÖ SRs ferm√©s : {stats_global['srs_closed'][0]:,} ({stats_global['closure_rate'][0]}%)")
print(f"üìÇ Cat√©gories de demandes : {stats_global['categories_count'][0]}")
print(f"üë• Desks actifs : {stats_global['desks_count'][0]}")
print(f"‚è±Ô∏è  Temps moyen de r√©solution : {stats_global['avg_hours_to_close'][0]:.1f} heures ({stats_global['avg_hours_to_close'][0]/24:.1f} jours)")

display(stats_global)

üìä VUE D'ENSEMBLE (Janvier - Septembre)

üìã Service Requests totaux : 2,370,662
‚úÖ SRs ferm√©s : 2,314,348 (97.62%)
üìÇ Cat√©gories de demandes : 1565
üë• Desks actifs : 877
‚è±Ô∏è  Temps moyen de r√©solution : 167.2 heures (7.0 jours)


Unnamed: 0,total_srs,srs_closed,closure_rate,categories_count,desks_count,avg_hours_to_close
0,2370662,2314348,97.62,1565,877,167.17


---
## 3. Analyse par Cat√©gorie : R√©partition des Demandes Clients

**Question Cl√©** : Quelles sont les cat√©gories de demandes les plus fr√©quentes ?

In [73]:
# R√©partition par cat√©gorie (Top 20)
category_distribution = pd.read_sql_query("""
SELECT 
    c.NAME as category_name,
    c.ID as category_id,
    COUNT(sr.ID) as total_requests,
    COUNT(CASE WHEN sr.CLOSINGDATE IS NOT NULL THEN 1 END) as closed_requests,
    ROUND(COUNT(CASE WHEN sr.CLOSINGDATE IS NOT NULL THEN 1 END) * 100.0 / COUNT(sr.ID), 2) as closure_rate,
    ROUND(AVG(CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours_to_close,
    ROUND(COUNT(sr.ID) * 100.0 / (SELECT COUNT(*) FROM sr ), 2) as pct_of_total
FROM sr
LEFT JOIN category c ON sr.CATEGORY_ID = c.ID
WHERE strftime('%Y-%m', sr.CREATIONDATE) BETWEEN '2024-01' AND '2025-09'
GROUP BY c.ID, c.NAME
ORDER BY total_requests DESC
LIMIT 20
""", conn)

print("="*70)
print("üìÇ TOP 20 CAT√âGORIES DE DEMANDES CLIENTS")
print("="*70)
print(f"\nTop 5 cat√©gories repr√©sentent {category_distribution.head(5)['pct_of_total'].sum():.1f}% du volume total\n")

display(category_distribution)

üìÇ TOP 20 CAT√âGORIES DE DEMANDES CLIENTS

Top 5 cat√©gories repr√©sentent 9.0% du volume total



Unnamed: 0,category_name,category_id,total_requests,closed_requests,closure_rate,avg_hours_to_close,pct_of_total
0,Cash instruction,353585,65780,65677,99.84,62.71,2.77
1,Tax,172255,48176,45329,94.09,583.16,2.03
2,BAU Asset Creation,218638,34432,34407,99.93,23.64,1.45
3,OTHERS,154192,33615,33447,99.5,84.72,1.42
4,CREST,371413,31853,31816,99.88,39.36,1.34
5,Settlement,173197,29441,29277,99.44,139.13,1.24
6,Investigation Level 1,219273,29391,29357,99.88,84.65,1.24
7,FRPP,327439,27565,27509,99.8,83.38,1.16
8,DEFF,327494,25333,25306,99.89,49.59,1.07
9,Transaction cycle and follow-up,478777,25179,25174,99.98,61.44,1.06


In [74]:
# Visualisation : R√©partition par cat√©gorie
fig = px.bar(
    category_distribution.head(15),
    x='category_name',
    y='total_requests',
    title='Top 15 Cat√©gories de Demandes Clients (Jan-Sep)',
    labels={'category_name': 'Cat√©gorie', 'total_requests': 'Nombre de Demandes'},
    text='total_requests',
    color='closure_rate',
    color_continuous_scale='RdYlGn',
    hover_data=['avg_hours_to_close', 'pct_of_total']
)

fig.update_traces(textposition='outside')
fig.update_layout(xaxis_tickangle=-45, height=600)
fig.show()

---
## 4. Temps de Traitement par Cat√©gorie : Impact sur l'Exp√©rience Client

**Question Cl√©** : Quelles cat√©gories ont les temps de traitement les plus longs (pain points clients) ?

In [75]:
# Temps de traitement par cat√©gorie (cat√©gories avec > 100 SRs)
treatment_time_by_category = pd.read_sql_query("""
SELECT 
    c.NAME as category_name,
    COUNT(sr.ID) as total_requests,
    ROUND(AVG(CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours,
    ROUND(MIN(CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)), 2) as min_hours,
    ROUND(MAX(CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)), 2) as max_hours,
    ROUND(AVG(CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)) / 24, 1) as avg_days
FROM sr
LEFT JOIN category c ON sr.CATEGORY_ID = c.ID
WHERE sr.CLOSINGDATE IS NOT NULL 
  AND sr.CREATIONDATE IS NOT NULL

GROUP BY c.NAME
HAVING COUNT(sr.ID) > 100
ORDER BY avg_hours DESC
LIMIT 20
""", conn)

print("="*70)
print("‚è±Ô∏è  TEMPS DE TRAITEMENT PAR CAT√âGORIE (Top 20 plus longs)")
print("="*70)
print("\n‚ö†Ô∏è  Cat√©gories avec les temps les plus longs = Pain points clients !\n")

display(treatment_time_by_category)

‚è±Ô∏è  TEMPS DE TRAITEMENT PAR CAT√âGORIE (Top 20 plus longs)

‚ö†Ô∏è  Cat√©gories avec les temps les plus longs = Pain points clients !



Unnamed: 0,category_name,total_requests,avg_hours,min_hours,max_hours,avg_days
0,SUPPORT,113,2888.68,0.03,16534.34,120.4
1,CLIENT PROJECT,126,2673.19,0.01,10219.1,111.4
2,Breakdown Tax Reclaims,138,2567.11,0.01,22222.44,107.0
3,EXTERNAL QUERIES,5693,2217.93,0.0,13965.28,92.4
4,Matrix tool,107,2058.45,2.14,12402.02,85.8
5,INTERNAL QUERIES,5503,2051.19,0.0,15426.08,85.5
6,Tax Voucher,106,1892.96,0.02,11419.93,78.9
7,Tax Reclaim,832,1888.5,0.01,15213.55,78.7
8,General client queries,129,1818.36,0.01,12984.61,75.8
9,TAX INCOME-TAX,1535,1781.43,0.0,18985.95,74.2


In [76]:
# Visualisation : Temps moyen par cat√©gorie
fig = go.Figure()

# Trier par temps de traitement
df_sorted = treatment_time_by_category.head(15).sort_values('avg_days')

fig.add_trace(go.Bar(
    y=df_sorted['category_name'],
    x=df_sorted['avg_days'],
    orientation='h',
    text=df_sorted['avg_days'].apply(lambda x: f"{x:.1f}j"),
    textposition='outside',
    marker_color=df_sorted['avg_days'],
    marker_colorscale='Reds',
    hovertemplate='<b>%{y}</b><br>Temps moyen: %{x:.1f} jours<extra></extra>'
))

fig.update_layout(
    title='Temps Moyen de Traitement par Cat√©gorie (en jours)',
    xaxis_title='Jours',
    yaxis_title='Cat√©gorie',
    height=600,
    showlegend=False
)

fig.show()

---
## 5. Tendances Temporelles : √âvolution des Demandes par Cat√©gorie

**Question Cl√©** : Comment √©voluent les demandes clients dans le temps ?

In [77]:
# √âvolution mensuelle des top 10 cat√©gories
top_10_categories = category_distribution.head(10)['category_id'].tolist()

monthly_category_trends = pd.read_sql_query(f"""
SELECT 
    strftime('%Y-%m', sr.CREATIONDATE) as month,
    c.NAME as category_name,
    COUNT(sr.ID) as total_requests,
    ROUND(AVG(CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours_to_close
FROM sr
LEFT JOIN category c ON sr.CATEGORY_ID = c.ID
WHERE sr.CATEGORY_ID IN ({','.join(map(str, top_10_categories))})

GROUP BY month, c.NAME
ORDER BY month, total_requests DESC
""", conn)

print("="*70)
print("üìà √âVOLUTION MENSUELLE DES TOP 10 CAT√âGORIES")
print("="*70)
print(f"Nombre de mois analys√©s : {monthly_category_trends['month'].nunique()}\n")

display(monthly_category_trends.head(30))

üìà √âVOLUTION MENSUELLE DES TOP 10 CAT√âGORIES
Nombre de mois analys√©s : 36



Unnamed: 0,month,category_name,total_requests,avg_hours_to_close
0,2023-01,Tax,4,25415.61
1,2023-01,FRPP,1,
2,2023-02,Tax,3,25193.82
3,2023-02,Settlement,1,
4,2023-03,Tax,5,24487.65
5,2023-04,Tax,3,
6,2023-04,CREST,1,23565.2
7,2023-05,Tax,6,22838.08
8,2023-06,Tax,3,22153.02
9,2023-07,Tax,6,19282.15


In [78]:
# Visualisation : √âvolution des top 5 cat√©gories
top_5_categories_names = category_distribution.head(5)['category_name'].tolist()
df_top5 = monthly_category_trends[monthly_category_trends['category_name'].isin(top_5_categories_names)]

fig = px.line(
    df_top5,
    x='month',
    y='total_requests',
    color='category_name',
    title='√âvolution Mensuelle des Top 5 Cat√©gories de Demandes',
    labels={'month': 'Mois', 'total_requests': 'Nombre de Demandes', 'category_name': 'Cat√©gorie'},
    markers=True
)

fig.update_layout(height=500, hovermode='x unified')
fig.show()

---
## 6. KPIs Orient√©s Exp√©rience Client

**Focus** : SLA, First Response Time, Taux de R√©solution

In [79]:
# KPIs exp√©rience client par cat√©gorie
client_experience_kpis = pd.read_sql_query("""
SELECT 
    c.NAME as category_name,
    COUNT(sr.ID) as total_requests,
    
    -- Taux de r√©solution
    ROUND(COUNT(CASE WHEN sr.CLOSINGDATE IS NOT NULL THEN 1 END) * 100.0 / COUNT(sr.ID), 2) as resolution_rate,
    
    -- Temps de premi√®re r√©ponse (Acknowledge)
    ROUND(AVG(CAST((julianday(sr.ACKNOWLEDGE_DATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)), 2) as avg_first_response_hours,
    
    -- Temps de traitement total
    ROUND(AVG(CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)), 2) as avg_total_hours,
    
    -- Respect SLA (expiration_date)
    ROUND(COUNT(CASE WHEN sr.CLOSINGDATE <= sr.EXPIRATION_DATE THEN 1 END) * 100.0 / 
          COUNT(CASE WHEN sr.CLOSINGDATE IS NOT NULL AND sr.EXPIRATION_DATE IS NOT NULL THEN 1 END), 2) as sla_compliance,
    
    -- Nombre de contacts moyen
    ROUND(AVG((SELECT COUNT(*) FROM srcontact WHERE srcontact.SR_ID = sr.ID)), 2) as avg_contacts
    
FROM sr
LEFT JOIN category c ON sr.CATEGORY_ID = c.ID
WHERE strftime('%Y-%m', sr.CREATIONDATE) BETWEEN '2024-01' AND '2025-09'
GROUP BY c.NAME
HAVING COUNT(sr.ID) > 100
ORDER BY total_requests DESC
LIMIT 20
""", conn)

print("="*70)
print("üéØ KPIs EXP√âRIENCE CLIENT PAR CAT√âGORIE")
print("="*70)
print("\nIndicateurs cl√©s :")
print("  ‚Ä¢ Taux de r√©solution : % de SRs ferm√©s")
print("  ‚Ä¢ Premier temps de r√©ponse : Temps avant acknowledge")
print("  ‚Ä¢ SLA Compliance : % de SRs trait√©s avant expiration")
print("  ‚Ä¢ Contacts moyens : Nombre d'interactions par SR\n")

display(client_experience_kpis)

üéØ KPIs EXP√âRIENCE CLIENT PAR CAT√âGORIE

Indicateurs cl√©s :
  ‚Ä¢ Taux de r√©solution : % de SRs ferm√©s
  ‚Ä¢ Premier temps de r√©ponse : Temps avant acknowledge
  ‚Ä¢ SLA Compliance : % de SRs trait√©s avant expiration
  ‚Ä¢ Contacts moyens : Nombre d'interactions par SR



Unnamed: 0,category_name,total_requests,resolution_rate,avg_first_response_hours,avg_total_hours,sla_compliance,avg_contacts
0,Tax,67431,95.02,9.63,584.63,63.22,7.62
1,Cash instruction,65780,99.84,1.66,62.71,81.07,4.02
2,BAU Asset Creation,34432,99.93,,23.64,39.36,2.74
3,OTHERS,33615,99.5,3.2,84.72,72.65,3.58
4,Settlement,32320,99.45,0.79,142.22,67.27,7.46
5,CREST,31853,99.88,0.45,39.36,85.12,3.37
6,Investigation Level 1,29391,99.88,1.95,84.65,73.16,5.24
7,FRPP,27565,99.8,0.46,83.38,66.74,6.07
8,DEFF,25333,99.89,0.39,49.59,74.93,4.77
9,Transaction cycle and follow-up,25179,99.98,0.29,61.44,93.66,5.7


In [80]:
# Visualisation : SLA Compliance vs Volume (Top 15)
df_viz = client_experience_kpis.head(15).copy()

fig = px.scatter(
    df_viz,
    x='total_requests',
    y='sla_compliance',
    size='avg_total_hours',
    color='resolution_rate',
    hover_name='category_name',
    title='SLA Compliance vs Volume (Top 15 Cat√©gories)',
    labels={
        'total_requests': 'Volume de Demandes',
        'sla_compliance': 'SLA Compliance (%)',
        'avg_total_hours': 'Temps Moyen (h)',
        'resolution_rate': 'Taux R√©solution (%)'
    },
    color_continuous_scale='RdYlGn',
    size_max=60
)

fig.add_hline(y=95, line_dash="dash", line_color="red", 
              annotation_text="Target SLA 95%", annotation_position="right")

fig.update_layout(height=600)
fig.show()

---
## 7. Analyse des Pain Points Clients

**Identification** : Cat√©gories avec faible performance sur l'exp√©rience client

In [81]:
# Identifier les pain points (volume √©lev√© + SLA faible OU temps long)
pain_points = client_experience_kpis[
    ((client_experience_kpis['total_requests'] > 1000) & 
     ((client_experience_kpis['sla_compliance'] < 90) | 
      (client_experience_kpis['avg_total_hours'] > 100)))
].sort_values('total_requests', ascending=False)

print("="*70)
print("‚ö†Ô∏è  PAIN POINTS CLIENTS IDENTIFI√âS")
print("="*70)
print("\nCrit√®res : Volume > 1000 ET (SLA < 90% OU Temps > 100h)")
print(f"\nNombre de cat√©gories probl√©matiques : {len(pain_points)}\n")

if len(pain_points) > 0:
    print("üî¥ Cat√©gories n√©cessitant une attention imm√©diate :\n")
    display(pain_points)
    
    # Calculer l'impact total
    total_impact = pain_points['total_requests'].sum()
    total_srs = category_distribution['total_requests'].sum()
    impact_pct = (total_impact / total_srs) * 100
    
    print(f"\nüí• Impact : {total_impact:,} SRs concern√©s ({impact_pct:.1f}% du volume total)")
else:
    print("‚úÖ Aucun pain point majeur identifi√© avec ces crit√®res")

‚ö†Ô∏è  PAIN POINTS CLIENTS IDENTIFI√âS

Crit√®res : Volume > 1000 ET (SLA < 90% OU Temps > 100h)

Nombre de cat√©gories probl√©matiques : 19

üî¥ Cat√©gories n√©cessitant une attention imm√©diate :



Unnamed: 0,category_name,total_requests,resolution_rate,avg_first_response_hours,avg_total_hours,sla_compliance,avg_contacts
0,Tax,67431,95.02,9.63,584.63,63.22,7.62
1,Cash instruction,65780,99.84,1.66,62.71,81.07,4.02
2,BAU Asset Creation,34432,99.93,,23.64,39.36,2.74
3,OTHERS,33615,99.5,3.2,84.72,72.65,3.58
4,Settlement,32320,99.45,0.79,142.22,67.27,7.46
5,CREST,31853,99.88,0.45,39.36,85.12,3.37
6,Investigation Level 1,29391,99.88,1.95,84.65,73.16,5.24
7,FRPP,27565,99.8,0.46,83.38,66.74,6.07
8,DEFF,25333,99.89,0.39,49.59,74.93,4.77
10,KPI PARIS,22410,99.98,,6.05,52.03,2.31



üí• Impact : 558,062 SRs concern√©s (99.5% du volume total)


---
## 8. Analyse des R√©ouvertures (Indicateur de Satisfaction)

**Question** : Quelles cat√©gories ont le plus de SRs r√©ouverts (insatisfaction client) ?

In [82]:
# Analyse des r√©ouvertures par cat√©gorie
reopened_analysis = pd.read_sql_query("""
SELECT 
    c.NAME as category_name,
    COUNT(sr.ID) as total_requests,
    COUNT(CASE WHEN sr.REOPEN_DATE IS NOT NULL THEN 1 END) as reopened_count,
    ROUND(COUNT(CASE WHEN sr.REOPEN_DATE IS NOT NULL THEN 1 END) * 100.0 / COUNT(sr.ID), 2) as reopen_rate
FROM sr
LEFT JOIN category c ON sr.CATEGORY_ID = c.ID
WHERE strftime('%Y-%m', sr.CREATIONDATE) BETWEEN '2024-01' AND '2025-09'
GROUP BY c.NAME
HAVING COUNT(sr.ID) > 100 AND reopened_count > 0
ORDER BY reopen_rate DESC
LIMIT 20
""", conn)

print("="*70)
print("üîÑ ANALYSE DES R√âOUVERTURES (Indicateur de Satisfaction)")
print("="*70)
print("\n‚ö†Ô∏è  Taux de r√©ouverture √©lev√© = R√©solution incompl√®te ou insatisfaction\n")

if len(reopened_analysis) > 0:
    display(reopened_analysis)
    
    avg_reopen_rate = reopened_analysis['reopen_rate'].mean()
    print(f"\nüìä Taux moyen de r√©ouverture : {avg_reopen_rate:.2f}%")
else:
    print("‚úÖ Tr√®s peu de r√©ouvertures dans les donn√©es")

üîÑ ANALYSE DES R√âOUVERTURES (Indicateur de Satisfaction)

‚ö†Ô∏è  Taux de r√©ouverture √©lev√© = R√©solution incompl√®te ou insatisfaction



Unnamed: 0,category_name,total_requests,reopened_count,reopen_rate
0,Cannis,195,194,99.49
1,Breakdown RAS,312,310,99.36
2,Processing,247,219,88.66
3,Trade Placement,1469,1263,85.98
4,Physicals,164,136,82.93
5,OST,753,603,80.08
6,Support to Production,352,278,78.98
7,Aberdeen Case,107,84,78.5
8,Requests to HUB,269,210,78.07
9,Settlement Trade Format,209,153,73.21



üìä Taux moyen de r√©ouverture : 75.65%


---
## 9. nombre de SR par cat√©gorie et combien d‚Äôentre elles ont eu besoin de faire une activit√©

In [83]:
# Analyse des SRs avec activit√©s par cat√©gorie
activity_analysis = pd.read_sql_query("""
SELECT 
    c.NAME as category_name,
    COUNT(DISTINCT sr.ID) as total_srs,
    COUNT(DISTINCT CASE WHEN activity.ID IS NOT NULL THEN sr.ID END) as srs_with_activity,
    ROUND(COUNT(DISTINCT CASE WHEN activity.ID IS NOT NULL THEN sr.ID END) * 100.0 / COUNT(DISTINCT sr.ID), 2) as activity_rate,
    COUNT(activity.ID) as total_activities,
    ROUND(AVG(CASE WHEN activity.ID IS NOT NULL THEN 1.0 * (SELECT COUNT(*) FROM activity a2 WHERE a2.SR_ID = sr.ID) END), 2) as avg_activities_per_sr,
    ROUND(AVG(CAST((julianday(activity.CLOSINGDATE) - julianday(activity.CREATIONDATE)) * 24 AS REAL)), 2) as avg_activity_hours,
    ROUND(AVG(CAST((julianday(activity.CLOSINGDATE) - julianday(activity.CREATIONDATE)) * 24 AS REAL)) / 24, 1) as avg_activity_days
FROM sr
LEFT JOIN category c ON sr.CATEGORY_ID = c.ID
LEFT JOIN activity ON sr.ID = activity.SR_ID
WHERE strftime('%Y-%m', sr.CREATIONDATE) BETWEEN '2024-01' AND '2025-09'
GROUP BY c.ID, c.NAME
HAVING COUNT(DISTINCT sr.ID) > 100
ORDER BY total_srs DESC
LIMIT 20
""", conn)

print("="*70)
print("üìã ANALYSE DES ACTIVIT√âS PAR CAT√âGORIE")
print("="*70)
print("\nIndicateurs :")
print("  ‚Ä¢ Total SRs : Nombre total de demandes")
print("  ‚Ä¢ SRs avec activit√© : Nombre de SRs n√©cessitant une intervention manuelle")
print("  ‚Ä¢ Taux d'activit√© : % de SRs n√©cessitant une activit√©")
print("  ‚Ä¢ Activit√©s moyennes : Nombre moyen d'activit√©s par SR")
print("  ‚Ä¢ Temps moyen activit√© : Dur√©e moyenne de traitement d'une activit√©\n")

display(activity_analysis)

# Statistiques globales
total_with_activity = activity_analysis['srs_with_activity'].sum()
total_all = activity_analysis['total_srs'].sum()
global_activity_rate = (total_with_activity / total_all) * 100
avg_activity_time = activity_analysis['avg_activity_hours'].mean()

print(f"\nüìä Statistiques globales (Top 20 cat√©gories) :")
print(f"   ‚Ä¢ SRs avec activit√© : {total_with_activity:,} / {total_all:,}")
print(f"   ‚Ä¢ Taux global d'activit√© : {global_activity_rate:.2f}%")
print(f"   ‚Ä¢ Temps moyen d'une activit√© : {avg_activity_time:.1f}h ({avg_activity_time/24:.1f}j)")

üìã ANALYSE DES ACTIVIT√âS PAR CAT√âGORIE

Indicateurs :
  ‚Ä¢ Total SRs : Nombre total de demandes
  ‚Ä¢ SRs avec activit√© : Nombre de SRs n√©cessitant une intervention manuelle
  ‚Ä¢ Taux d'activit√© : % de SRs n√©cessitant une activit√©
  ‚Ä¢ Activit√©s moyennes : Nombre moyen d'activit√©s par SR
  ‚Ä¢ Temps moyen activit√© : Dur√©e moyenne de traitement d'une activit√©



Unnamed: 0,category_name,total_srs,srs_with_activity,activity_rate,total_activities,avg_activities_per_sr,avg_activity_hours,avg_activity_days
0,Cash instruction,65780,5319,8.09,7227,2.45,1.23,0.1
1,Tax,48176,4020,8.34,8214,8.1,95.46,4.0
2,BAU Asset Creation,34432,6,0.02,10,2.4,4.09,0.2
3,OTHERS,33615,916,2.72,1122,1.96,156.04,6.5
4,CREST,31853,14481,45.46,17771,1.82,1.24,0.1
5,Settlement,29441,32,0.11,39,1.67,186.35,7.8
6,Investigation Level 1,29391,1680,5.72,1997,1.56,1.36,0.1
7,FRPP,27565,2970,10.77,4491,3.06,0.69,0.0
8,DEFF,25333,3419,13.5,4848,2.04,0.42,0.0
9,Transaction cycle and follow-up,25179,164,0.65,213,1.8,1.76,0.1



üìä Statistiques globales (Top 20 cat√©gories) :
   ‚Ä¢ SRs avec activit√© : 43,705 / 561,107
   ‚Ä¢ Taux global d'activit√© : 7.79%
   ‚Ä¢ Temps moyen d'une activit√© : 39.8h (1.7j)


In [84]:
# Visualisation : Taux d'activit√© par cat√©gorie
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Taux d\'Activit√© par Cat√©gorie (Top 15)', 'Volume SRs vs Taux d\'Activit√©'),
    specs=[[{"type": "bar"}, {"type": "scatter"}]]
)

df_viz = activity_analysis.head(15).copy()
# Remplacer les NaN par 1 pour √©viter les erreurs dans la taille des points
df_viz['avg_activities_per_sr'] = df_viz['avg_activities_per_sr'].fillna(1)

# Graphique 1 : Barres horizontales du taux d'activit√©
fig.add_trace(
    go.Bar(
        y=df_viz['category_name'],
        x=df_viz['activity_rate'],
        orientation='h',
        text=df_viz['activity_rate'].apply(lambda x: f"{x:.1f}%"),
        textposition='outside',
        marker_color=df_viz['activity_rate'],
        marker_colorscale='Blues',
        name='Taux activit√©',
        hovertemplate='<b>%{y}</b><br>Taux: %{x:.1f}%<extra></extra>'
    ),
    row=1, col=1
)

# Graphique 2 : Scatter volume vs taux d'activit√©
fig.add_trace(
    go.Scatter(
        x=df_viz['total_srs'],
        y=df_viz['activity_rate'],
        mode='markers+text',
        marker=dict(
            size=df_viz['avg_activities_per_sr']*10,
            color=df_viz['activity_rate'],
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title="Taux<br>Activit√©", x=1.15)
        ),
        text=df_viz['category_name'].str[:15],
        textposition='top center',
        hovertemplate='<b>%{text}</b><br>Volume: %{x:,}<br>Taux activit√©: %{y:.1f}%<extra></extra>',
        name='Cat√©gories'
    ),
    row=1, col=2
)

fig.update_xaxes(title_text="Taux d'Activit√© (%)", row=1, col=1)
fig.update_yaxes(title_text="Cat√©gorie", row=1, col=1)
fig.update_xaxes(title_text="Nombre de SRs", row=1, col=2)
fig.update_yaxes(title_text="Taux d'Activit√© (%)", row=1, col=2)

fig.update_layout(height=500, showlegend=False, title_text="Analyse des Activit√©s par Cat√©gorie")
fig.show()

---
## 10. Analyse D√©taill√©e des Activit√©s

**Focus** : Statistiques sur les activit√©s elles-m√™mes (types, dur√©es, volumes)

In [85]:
# Statistiques globales sur les activit√©s
activity_stats = pd.read_sql_query("""
SELECT 
    COUNT(*) as total_activities,
    COUNT(DISTINCT SR_ID) as unique_srs_with_activities,
    COUNT(CASE WHEN CLOSINGDATE IS NOT NULL THEN 1 END) as closed_activities,
    ROUND(COUNT(CASE WHEN CLOSINGDATE IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2) as closure_rate,
    ROUND(AVG(CAST((julianday(CLOSINGDATE) - julianday(CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours_to_close,
    ROUND(MIN(CAST((julianday(CLOSINGDATE) - julianday(CREATIONDATE)) * 24 AS REAL)), 2) as min_hours,
    ROUND(MAX(CAST((julianday(CLOSINGDATE) - julianday(CREATIONDATE)) * 24 AS REAL)), 2) as max_hours
FROM activity

""", conn)

print("="*70)
print("üìä STATISTIQUES GLOBALES DES ACTIVIT√âS")
print("="*70)
print(f"\nüìã Total d'activit√©s : {activity_stats['total_activities'][0]:,}")
print(f"üéØ SRs concern√©s : {activity_stats['unique_srs_with_activities'][0]:,}")
print(f"‚úÖ Activit√©s ferm√©es : {activity_stats['closed_activities'][0]:,} ({activity_stats['closure_rate'][0]}%)")
print(f"‚è±Ô∏è  Temps moyen de traitement : {activity_stats['avg_hours_to_close'][0]:.1f}h ({activity_stats['avg_hours_to_close'][0]/24:.1f}j)")
print(f"‚ö° Temps min : {activity_stats['min_hours'][0]:.2f}h | Temps max : {activity_stats['max_hours'][0]:.1f}h ({activity_stats['max_hours'][0]/24:.0f}j)")

display(activity_stats)

üìä STATISTIQUES GLOBALES DES ACTIVIT√âS

üìã Total d'activit√©s : 183,248
üéØ SRs concern√©s : 127,277
‚úÖ Activit√©s ferm√©es : 179,750 (98.09%)
‚è±Ô∏è  Temps moyen de traitement : 46.3h (1.9j)
‚ö° Temps min : 0.00h | Temps max : 15241.8h (635j)


Unnamed: 0,total_activities,unique_srs_with_activities,closed_activities,closure_rate,avg_hours_to_close,min_hours,max_hours
0,183248,127277,179750,98.09,46.28,0.0,15241.79


In [86]:
# Analyse par type d'activit√© (par TYPE_ID)
activity_by_type = pd.read_sql_query("""
SELECT 
    COALESCE(activity.TYPE_ID, 0) as type_id,
    COUNT(activity.ID) as total_activities,
    COUNT(DISTINCT activity.SR_ID) as unique_srs,
    ROUND(COUNT(activity.ID) * 100.0 / (SELECT COUNT(*) FROM activity ), 2) as pct_of_total,
    ROUND(AVG(CAST((julianday(activity.CLOSINGDATE) - julianday(activity.CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours_to_close,
    ROUND(AVG(CAST((julianday(activity.CLOSINGDATE) - julianday(activity.CREATIONDATE)) * 24 AS REAL)) / 24, 1) as avg_days_to_close
FROM activity
WHERE strftime('%Y-%m', activity.CREATIONDATE) BETWEEN '2024-01' AND '2025-09'
GROUP BY activity.TYPE_ID
ORDER BY total_activities DESC
LIMIT 20
""", conn)

# Ajouter un nom lisible
activity_by_type['type_name'] = 'Type ' + activity_by_type['type_id'].astype(str)

print("="*70)
print("üìä ACTIVIT√âS PAR TYPE (Top 20)")
print("="*70)
print(f"\nTop 5 types repr√©sentent {activity_by_type.head(5)['pct_of_total'].sum():.1f}% des activit√©s\n")

display(activity_by_type)

üìä ACTIVIT√âS PAR TYPE (Top 20)

Top 5 types repr√©sentent 86.0% des activit√©s



Unnamed: 0,type_id,total_activities,unique_srs,pct_of_total,avg_hours_to_close,avg_days_to_close,type_name
0,1579,157337,108571,85.86,48.52,2.0,Type 1579
1,1580,232,232,0.13,157.15,6.5,Type 1580


In [87]:
# Analyse par statut d'activit√© (par STATUS_ID)
activity_by_status = pd.read_sql_query("""
SELECT 
    COALESCE(activity.STATUS_ID, 0) as status_id,
    COUNT(activity.ID) as total_activities,
    COUNT(DISTINCT activity.SR_ID) as unique_srs,
    ROUND(COUNT(activity.ID) * 100.0 / (SELECT COUNT(*) FROM activity ), 2) as pct_of_total,
    ROUND(AVG(CAST((julianday(activity.CLOSINGDATE) - julianday(activity.CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours_to_close,
    ROUND(AVG(CAST((julianday(activity.CLOSINGDATE) - julianday(activity.CREATIONDATE)) * 24 AS REAL)) / 24, 1) as avg_days_to_close
FROM activity
WHERE strftime('%Y-%m', activity.CREATIONDATE) BETWEEN '2024-01' AND '2025-09'
GROUP BY activity.STATUS_ID
ORDER BY total_activities DESC
LIMIT 20
""", conn)

# Ajouter un nom lisible
activity_by_status['status_name'] = 'Status ' + activity_by_status['status_id'].astype(str)

print("="*70)
print("üìä ACTIVIT√âS PAR STATUT (Top 20)")
print("="*70)
print(f"\nTop 5 statuts repr√©sentent {activity_by_status.head(5)['pct_of_total'].sum():.1f}% des activit√©s\n")

display(activity_by_status)

üìä ACTIVIT√âS PAR STATUT (Top 20)

Top 5 statuts repr√©sentent 86.0% des activit√©s



Unnamed: 0,status_id,total_activities,unique_srs,pct_of_total,avg_hours_to_close,avg_days_to_close,status_name
0,31,149200,104258,81.42,42.62,1.8,Status 31
1,11,6111,5334,3.33,188.7,7.9,Status 11
2,100,1566,1381,0.85,5.97,0.2,Status 100
3,10,554,483,0.3,825.67,34.4,Status 10
4,12,122,89,0.07,700.24,29.2,Status 12
5,101,10,10,0.01,0.11,0.0,Status 101
6,13,3,3,0.0,0.06,0.0,Status 13
7,14,3,3,0.0,0.05,0.0,Status 14


In [88]:
# √âvolution mensuelle des activit√©s
monthly_activities = pd.read_sql_query("""
SELECT 
    strftime('%Y-%m', activity.CREATIONDATE) as month,
    COUNT(activity.ID) as total_activities,
    COUNT(DISTINCT activity.SR_ID) as unique_srs,
    COUNT(CASE WHEN activity.CLOSINGDATE IS NOT NULL THEN 1 END) as closed_activities,
    ROUND(AVG(CAST((julianday(activity.CLOSINGDATE) - julianday(activity.CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours_to_close
FROM activity
WHERE strftime('%Y-%m', activity.CREATIONDATE) BETWEEN '2024-01' AND '2025-09'
GROUP BY month
ORDER BY month
""", conn)

print("="*70)
print("üìà √âVOLUTION MENSUELLE DES ACTIVIT√âS")
print("="*70)
print(f"\nP√©riode : Janvier 2024 - Septembre 2025")
print(f"Nombre de mois : {len(monthly_activities)}\n")

display(monthly_activities)

# Visualisation de la tendance
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Volume d\'Activit√©s par Mois', 'Temps Moyen de Traitement par Mois'),
    vertical_spacing=0.15
)

# Graphique 1 : Volume mensuel
fig.add_trace(
    go.Scatter(
        x=monthly_activities['month'],
        y=monthly_activities['total_activities'],
        mode='lines+markers',
        name='Activit√©s',
        line=dict(color='royalblue', width=3),
        marker=dict(size=8),
        hovertemplate='<b>%{x}</b><br>Activit√©s: %{y:,}<extra></extra>'
    ),
    row=1, col=1
)

# Graphique 2 : Temps moyen mensuel
fig.add_trace(
    go.Scatter(
        x=monthly_activities['month'],
        y=monthly_activities['avg_hours_to_close'] / 24,
        mode='lines+markers',
        name='Temps moyen',
        line=dict(color='coral', width=3),
        marker=dict(size=8),
        hovertemplate='<b>%{x}</b><br>Temps: %{y:.1f} jours<extra></extra>'
    ),
    row=2, col=1
)

fig.update_xaxes(title_text="Mois", row=2, col=1)
fig.update_yaxes(title_text="Nombre d'Activit√©s", row=1, col=1)
fig.update_yaxes(title_text="Jours", row=2, col=1)

fig.update_layout(height=700, showlegend=False, title_text="Tendances Temporelles des Activit√©s")
fig.show()

üìà √âVOLUTION MENSUELLE DES ACTIVIT√âS

P√©riode : Janvier 2024 - Septembre 2025
Nombre de mois : 21



Unnamed: 0,month,total_activities,unique_srs,closed_activities,avg_hours_to_close
0,2024-01,95,48,93,175.36
1,2024-02,157,85,150,609.49
2,2024-03,172,105,166,372.01
3,2024-04,310,191,294,211.17
4,2024-05,307,192,306,233.3
5,2024-06,352,240,349,328.04
6,2024-07,566,341,555,264.98
7,2024-08,648,394,639,328.78
8,2024-09,732,439,723,218.72
9,2024-10,1201,735,1184,220.82


In [89]:
# Visualisation : Distribution des activit√©s
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Volume d\'Activit√©s par Statut (Top 10)', 'Temps de Traitement par Type (Top 10)'),
    specs=[[{"type": "bar"}, {"type": "bar"}]]
)

# Graphique 1 : Volume par statut
df_status_top = activity_by_status.head(10).sort_values('total_activities')
fig.add_trace(
    go.Bar(
        y=df_status_top['status_name'],
        x=df_status_top['total_activities'],
        orientation='h',
        text=df_status_top['total_activities'].apply(lambda x: f"{x:,}"),
        textposition='outside',
        marker_color='lightblue',
        name='Volume',
        hovertemplate='<b>%{y}</b><br>Volume: %{x:,}<extra></extra>'
    ),
    row=1, col=1
)

# Graphique 2 : Temps par type
df_type_top = activity_by_type.head(10).sort_values('avg_days_to_close')
fig.add_trace(
    go.Bar(
        y=df_type_top['type_name'],
        x=df_type_top['avg_days_to_close'],
        orientation='h',
        text=df_type_top['avg_days_to_close'].apply(lambda x: f"{x:.1f}j"),
        textposition='outside',
        marker_color=df_type_top['avg_days_to_close'],
        marker_colorscale='Reds',
        name='Dur√©e',
        hovertemplate='<b>%{y}</b><br>Dur√©e: %{x:.1f} jours<extra></extra>'
    ),
    row=1, col=2
)

fig.update_xaxes(title_text="Nombre d'Activit√©s", row=1, col=1)
fig.update_yaxes(title_text="Statut", row=1, col=1)
fig.update_xaxes(title_text="Jours", row=1, col=2)
fig.update_yaxes(title_text="Type", row=1, col=2)

fig.update_layout(height=500, showlegend=False, title_text="Distribution des Activit√©s")
fig.show()

---
## 11. Synth√®se


In [90]:
print("="*70)
print("üìã SYNTH√àSE EX√âCUTIVE")
print("="*70)

# Top 3 cat√©gories par volume
print("\n1Ô∏è‚É£ TOP 3 CAT√âGORIES PAR VOLUME (Focus d'am√©lioration)")
for idx, row in category_distribution.head(3).iterrows():
    print(f"   ‚Ä¢ {row['category_name']}: {row['total_requests']:,} demandes ({row['pct_of_total']}%)")
    print(f"     ‚Üí Temps moyen: {row['avg_hours_to_close']/24:.1f} jours | Closure: {row['closure_rate']}%")

# Cat√©gories les plus lentes
print("\n2Ô∏è‚É£ CAT√âGORIES AVEC TEMPS DE TRAITEMENT LE PLUS LONG (Pain Points)")
for idx, row in treatment_time_by_category.head(3).iterrows():
    print(f"   ‚Ä¢ {row['category_name']}: {row['avg_days']:.1f} jours en moyenne")
    print(f"     ‚Üí Volume: {row['total_requests']:,} demandes")

# KPIs moyens
avg_resolution = client_experience_kpis['resolution_rate'].mean()
avg_sla = client_experience_kpis['sla_compliance'].mean()
avg_first_response = client_experience_kpis['avg_first_response_hours'].mean()

print("\n3Ô∏è‚É£ KPIs GLOBAUX EXP√âRIENCE CLIENT")
print(f"   ‚Ä¢ Taux de r√©solution moyen: {avg_resolution:.1f}%")
print(f"   ‚Ä¢ SLA Compliance moyen: {avg_sla:.1f}%")
print(f"   ‚Ä¢ Temps de premi√®re r√©ponse: {avg_first_response:.1f}h ({avg_first_response/24:.1f}j)")

üìã SYNTH√àSE EX√âCUTIVE

1Ô∏è‚É£ TOP 3 CAT√âGORIES PAR VOLUME (Focus d'am√©lioration)
   ‚Ä¢ Cash instruction: 65,780 demandes (2.77%)
     ‚Üí Temps moyen: 2.6 jours | Closure: 99.84%
   ‚Ä¢ Tax: 48,176 demandes (2.03%)
     ‚Üí Temps moyen: 24.3 jours | Closure: 94.09%
   ‚Ä¢ BAU Asset Creation: 34,432 demandes (1.45%)
     ‚Üí Temps moyen: 1.0 jours | Closure: 99.93%

2Ô∏è‚É£ CAT√âGORIES AVEC TEMPS DE TRAITEMENT LE PLUS LONG (Pain Points)
   ‚Ä¢ SUPPORT: 120.4 jours en moyenne
     ‚Üí Volume: 113 demandes
   ‚Ä¢ CLIENT PROJECT: 111.4 jours en moyenne
     ‚Üí Volume: 126 demandes
   ‚Ä¢ Breakdown Tax Reclaims: 107.0 jours en moyenne
     ‚Üí Volume: 138 demandes

3Ô∏è‚É£ KPIs GLOBAUX EXP√âRIENCE CLIENT
   ‚Ä¢ Taux de r√©solution moyen: 99.1%
   ‚Ä¢ SLA Compliance moyen: 73.1%
   ‚Ä¢ Temps de premi√®re r√©ponse: 3.1h (0.1j)


---
## 12. Export des R√©sultats

Sauvegarder les analyses pour pr√©sentation.

In [91]:
# Export des analyses cl√©s
timestamp = datetime.now().strftime('%Y%m%d')

# 1. R√©partition par cat√©gorie
export_1 = EXPORT_DIR / f'category_distribution_{timestamp}.csv'
category_distribution.to_csv(export_1, index=False)
print(f"‚úÖ Export√©: {export_1.name}")

# 2. Temps de traitement
export_2 = EXPORT_DIR / f'treatment_time_by_category_{timestamp}.csv'
treatment_time_by_category.to_csv(export_2, index=False)
print(f"‚úÖ Export√©: {export_2.name}")

# 3. KPIs exp√©rience client
export_3 = EXPORT_DIR / f'client_experience_kpis_{timestamp}.csv'
client_experience_kpis.to_csv(export_3, index=False)
print(f"‚úÖ Export√©: {export_3.name}")

# 4. Pain points
if len(pain_points) > 0:
    export_4 = EXPORT_DIR / f'pain_points_{timestamp}.csv'
    pain_points.to_csv(export_4, index=False)
    print(f"‚úÖ Export√©: {export_4.name}")

# 5. Tendances mensuelles
export_5 = EXPORT_DIR / f'monthly_category_trends_{timestamp}.csv'
monthly_category_trends.to_csv(export_5, index=False)
print(f"‚úÖ Export√©: {export_5.name}")

# 6. Analyse des activit√©s
export_6 = EXPORT_DIR / f'activity_analysis_{timestamp}.csv'
activity_analysis.to_csv(export_6, index=False)
print(f"‚úÖ Export√©: {export_6.name}")

# 7. Activit√©s par statut
export_7 = EXPORT_DIR / f'activity_by_status_{timestamp}.csv'
activity_by_status.to_csv(export_7, index=False)
print(f"‚úÖ Export√©: {export_7.name}")

# 8. Activit√©s par type
export_8 = EXPORT_DIR / f'activity_by_type_{timestamp}.csv'
activity_by_type.to_csv(export_8, index=False)
print(f"‚úÖ Export√©: {export_8.name}")

# 9. √âvolution mensuelle des activit√©s
export_9 = EXPORT_DIR / f'monthly_activities_{timestamp}.csv'
monthly_activities.to_csv(export_9, index=False)
print(f"‚úÖ Export√©: {export_9.name}")

print(f"\nüìÇ Tous les exports dans: {EXPORT_DIR}")

‚úÖ Export√©: category_distribution_20260215.csv
‚úÖ Export√©: treatment_time_by_category_20260215.csv
‚úÖ Export√©: client_experience_kpis_20260215.csv
‚úÖ Export√©: pain_points_20260215.csv
‚úÖ Export√©: monthly_category_trends_20260215.csv
‚úÖ Export√©: activity_analysis_20260215.csv
‚úÖ Export√©: activity_by_status_20260215.csv
‚úÖ Export√©: activity_by_type_20260215.csv
‚úÖ Export√©: monthly_activities_20260215.csv

üìÇ Tous les exports dans: /Users/jo/Desktop/BNP Paribas/Data/Exports


In [92]:
# Fermer la connexion
conn.close()
print("‚úÖ Connexion ferm√©e")

‚úÖ Connexion ferm√©e


---
## 13. SR avec 0 Interactions (Contacts)

**Analyse** : Identifier les SR qui n'ont eu aucune interaction client (CONTACT_)

In [93]:
# Reconnecter √† la base de donn√©es si n√©cessaire
conn = sqlite3.connect(str(DB_PATH))

# SR avec 0 interactions par cat√©gorie
zero_contact_analysis = pd.read_sql_query("""
SELECT 
    c.NAME as category_name,
    COUNT(sr.ID) as total_srs,
    COUNT(CASE WHEN (SELECT COUNT(*) FROM srcontact WHERE srcontact.SR_ID = sr.ID) = 0 THEN 1 END) as srs_zero_contact,
    ROUND(COUNT(CASE WHEN (SELECT COUNT(*) FROM srcontact WHERE srcontact.SR_ID = sr.ID) = 0 THEN 1 END) * 100.0 / COUNT(sr.ID), 2) as pct_zero_contact,
    ROUND(AVG(CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL)), 2) as avg_hours_all,
    ROUND(AVG(CASE WHEN (SELECT COUNT(*) FROM srcontact WHERE srcontact.SR_ID = sr.ID) = 0 
                   THEN CAST((julianday(sr.CLOSINGDATE) - julianday(sr.CREATIONDATE)) * 24 AS REAL) END), 2) as avg_hours_zero_contact
FROM sr
LEFT JOIN category c ON sr.CATEGORY_ID = c.ID
WHERE strftime('%Y-%m', sr.CREATIONDATE) BETWEEN '2025-01' AND '2025-09'
  AND sr.CLOSINGDATE IS NOT NULL
GROUP BY c.ID, c.NAME
HAVING COUNT(sr.ID) > 100
ORDER BY srs_zero_contact DESC
LIMIT 30
""", conn)

print("="*70)
print("üìä SR AVEC 0 INTERACTIONS (CONTACT) PAR CAT√âGORIE")
print("="*70)
print("\nCat√©gories avec le plus de SR sans interaction client\n")

display(zero_contact_analysis)

# Statistiques globales
total_zero_contact = zero_contact_analysis['srs_zero_contact'].sum()
total_srs_analyzed = zero_contact_analysis['total_srs'].sum()
global_pct = (total_zero_contact / total_srs_analyzed) * 100

print(f"\nüìä Statistiques (cat√©gories >100 SRs) :")
print(f"   ‚Ä¢ Total SRs avec 0 contact : {total_zero_contact:,} / {total_srs_analyzed:,}")
print(f"   ‚Ä¢ Taux global : {global_pct:.2f}%")
print(f"   ‚Ä¢ Temps moyen (SRs avec 0 contact) : {zero_contact_analysis['avg_hours_zero_contact'].mean():.1f}h ({zero_contact_analysis['avg_hours_zero_contact'].mean()/24:.1f}j)")

üìä SR AVEC 0 INTERACTIONS (CONTACT) PAR CAT√âGORIE

Cat√©gories avec le plus de SR sans interaction client



Unnamed: 0,category_name,total_srs,srs_zero_contact,pct_zero_contact,avg_hours_all,avg_hours_zero_contact
0,Check Lists,18431,14130,76.66,4.32,1.22
1,Check list,5909,3935,66.59,6.38,3.66
2,Income/Redemptions,14637,3191,21.8,168.75,84.35
3,Tax,42073,2285,5.43,325.95,80.92
4,Checklist,3046,1970,64.67,26.11,21.02
5,Settlement Trade Processing,11726,1807,15.41,43.22,5.29
6,CA - Payment,9991,1731,17.33,169.08,74.99
7,BAU,20128,1655,8.22,65.45,157.03
8,Cash instruction,65135,1625,2.49,51.76,8.87
9,CREST,31657,1550,4.9,28.3,9.11



üìä Statistiques (cat√©gories >100 SRs) :
   ‚Ä¢ Total SRs avec 0 contact : 44,347 / 460,069
   ‚Ä¢ Taux global : 9.64%
   ‚Ä¢ Temps moyen (SRs avec 0 contact) : 53.6h (2.2j)


In [94]:
# Visualisation : SR avec 0 contact
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Top 15: SR avec 0 Contact (Volume)', 'Taux de SR sans Contact (%)'),
    specs=[[{"type": "bar"}, {"type": "bar"}]]
)

df_viz = zero_contact_analysis.head(15).copy()

# Graphique 1 : Volume de SRs avec 0 contact
fig.add_trace(
    go.Bar(
        y=df_viz['category_name'],
        x=df_viz['srs_zero_contact'],
        orientation='h',
        text=df_viz['srs_zero_contact'].apply(lambda x: f"{x:,}"),
        textposition='outside',
        marker_color='lightcoral',
        name='SRs sans contact',
        hovertemplate='<b>%{y}</b><br>SRs sans contact: %{x:,}<extra></extra>'
    ),
    row=1, col=1
)

# Graphique 2 : Pourcentage de SRs avec 0 contact
df_sorted = df_viz.sort_values('pct_zero_contact')
fig.add_trace(
    go.Bar(
        y=df_sorted['category_name'],
        x=df_sorted['pct_zero_contact'],
        orientation='h',
        text=df_sorted['pct_zero_contact'].apply(lambda x: f"{x:.1f}%"),
        textposition='outside',
        marker_color=df_sorted['pct_zero_contact'],
        marker_colorscale='RdYlGn_r',
        name='% sans contact',
        hovertemplate='<b>%{y}</b><br>Taux: %{x:.1f}%<extra></extra>'
    ),
    row=1, col=2
)

fig.update_xaxes(title_text="Nombre de SRs", row=1, col=1)
fig.update_yaxes(title_text="Cat√©gorie", row=1, col=1)
fig.update_xaxes(title_text="% de SRs sans contact", row=1, col=2)
fig.update_yaxes(title_text="Cat√©gorie", row=1, col=2)

fig.update_layout(height=600, showlegend=False, title_text="Analyse des SR avec 0 Interaction Client")
fig.show()

In [95]:
# Fermer la connexion
conn.close()
print("‚úÖ Connexion ferm√©e")

‚úÖ Connexion ferm√©e
