# üîç Analyse des Probl√®mes Historiques - Piti√©-Salp√™tri√®re

**Objectif** : Identifier les causes principales des tensions hospitali√®res sur 2020-2025

- Pics d'admissions
- Suroccupation des lits
- Manque de personnel
- Impact des √©v√©nements (COVID, grippe, canicule)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Configuration
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['font.size'] = 11

# Charger les donn√©es
df_etab = pd.read_csv('../data/etablissement.csv')
df_etab['date'] = pd.to_datetime(df_etab['date'])

df_adm = pd.read_csv('../data/admissions_complet.csv')
df_adm['date_admission'] = pd.to_datetime(df_adm['date_admission'])

print(f"üìä √âtablissement: {len(df_etab)} jours ({df_etab['date'].min().date()} ‚Üí {df_etab['date'].max().date()})")
print(f"üë• Admissions: {len(df_adm):,} patients")

## 1Ô∏è‚É£ D√©finition des Seuils de Tension

In [None]:
# Seuils de tension
SEUILS = {
    'admissions_critique': df_etab['nb_admissions'].quantile(0.95),  # Top 5%
    'admissions_eleve': df_etab['nb_admissions'].quantile(0.85),     # Top 15%
    'occupation_critique': 90,  # >90% = critique
    'occupation_eleve': 85,     # >85% = √©lev√©
    'personnel_bas': 0.80,      # <80% pr√©sence = probl√®me
    'stock_sang_critique': 400,
}

print("üìè Seuils d√©finis:")
print(f"   - Admissions critique: >{SEUILS['admissions_critique']:.0f}/jour (top 5%)")
print(f"   - Admissions √©lev√©: >{SEUILS['admissions_eleve']:.0f}/jour (top 15%)")
print(f"   - Occupation critique: >{SEUILS['occupation_critique']}%")
print(f"   - Personnel bas: <{SEUILS['personnel_bas']*100:.0f}% pr√©sence")

## 2Ô∏è‚É£ Identification des Jours de Tension

In [None]:
# Calculer les indicateurs de tension pour chaque jour
df_etab['tension_admissions'] = df_etab['nb_admissions'] > SEUILS['admissions_eleve']
df_etab['tension_admissions_critique'] = df_etab['nb_admissions'] > SEUILS['admissions_critique']

# Taux occupation moyen (tous lits)
df_etab['taux_occ_global'] = (
    df_etab['taux_occ_medecine'] * 0.4 +
    df_etab['taux_occ_chirurgie'] * 0.25 +
    df_etab['taux_occ_reanimation'] * 0.15 +
    df_etab['taux_occ_soins_intensifs'] * 0.1 +
    df_etab['taux_occ_usc'] * 0.05 +
    df_etab['taux_occ_obstetrique'] * 0.05
)

df_etab['tension_occupation'] = df_etab['taux_occ_global'] > SEUILS['occupation_eleve']
df_etab['tension_occupation_critique'] = df_etab['taux_occ_global'] > SEUILS['occupation_critique']

# Taux de pr√©sence personnel soins
df_etab['taux_presence_soins'] = df_etab['personnel_soins_presents'] / df_etab['personnel_soins_effectif']
df_etab['tension_personnel'] = df_etab['taux_presence_soins'] < SEUILS['personnel_bas']

# Stock sang
df_etab['tension_sang'] = df_etab['stock_sang_poches'] < SEUILS['stock_sang_critique']

# Compter
tensions = {
    'Admissions √©lev√©es': df_etab['tension_admissions'].sum(),
    'Admissions critiques': df_etab['tension_admissions_critique'].sum(),
    'Occupation √©lev√©e': df_etab['tension_occupation'].sum(),
    'Occupation critique': df_etab['tension_occupation_critique'].sum(),
    'Personnel insuffisant': df_etab['tension_personnel'].sum(),
    'Stock sang critique': df_etab['tension_sang'].sum(),
}

print("\nüìä JOURS DE TENSION (sur 2192 jours):")
print("=" * 50)
for k, v in tensions.items():
    pct = v / len(df_etab) * 100
    bar = '‚ñà' * int(pct / 2)
    print(f"{k:25s}: {v:4d} jours ({pct:5.1f}%) {bar}")

## 3Ô∏è‚É£ Analyse par Type de Probl√®me

In [None]:
# Cr√©er un score de tension global
df_etab['score_tension'] = (
    df_etab['tension_admissions'].astype(int) * 1 +
    df_etab['tension_admissions_critique'].astype(int) * 2 +
    df_etab['tension_occupation'].astype(int) * 1 +
    df_etab['tension_occupation_critique'].astype(int) * 2 +
    df_etab['tension_personnel'].astype(int) * 2 +
    df_etab['tension_sang'].astype(int) * 1
)

# Jours les plus critiques
print("\nüö® TOP 20 JOURS LES PLUS CRITIQUES:")
print("=" * 80)
top_jours = df_etab.nlargest(20, 'score_tension')[[
    'date', 'evenement_special', 'nb_admissions', 'taux_occ_global', 
    'taux_presence_soins', 'score_tension'
]].copy()
top_jours['date'] = top_jours['date'].dt.strftime('%Y-%m-%d')
top_jours['taux_occ_global'] = top_jours['taux_occ_global'].round(1)
top_jours['taux_presence_soins'] = (top_jours['taux_presence_soins'] * 100).round(1)
print(top_jours.to_string(index=False))

In [None]:
# Visualisation des tensions dans le temps
fig, axes = plt.subplots(4, 1, figsize=(16, 12), sharex=True)

# Admissions
ax = axes[0]
ax.fill_between(df_etab['date'], df_etab['nb_admissions'], alpha=0.7)
ax.axhline(SEUILS['admissions_eleve'], color='orange', linestyle='--', label='Seuil √©lev√©')
ax.axhline(SEUILS['admissions_critique'], color='red', linestyle='--', label='Seuil critique')
ax.set_ylabel('Admissions/jour')
ax.set_title('üìä Admissions Journali√®res')
ax.legend(loc='upper right')

# Occupation
ax = axes[1]
ax.fill_between(df_etab['date'], df_etab['taux_occ_global'], alpha=0.7, color='green')
ax.axhline(85, color='orange', linestyle='--', label='Seuil √©lev√© (85%)')
ax.axhline(90, color='red', linestyle='--', label='Seuil critique (90%)')
ax.set_ylabel('Taux occupation (%)')
ax.set_title('üõèÔ∏è Taux d\'Occupation Global')
ax.legend(loc='upper right')

# Personnel
ax = axes[2]
ax.fill_between(df_etab['date'], df_etab['taux_presence_soins'] * 100, alpha=0.7, color='purple')
ax.axhline(80, color='orange', linestyle='--', label='Seuil bas (80%)')
ax.set_ylabel('Pr√©sence (%)')
ax.set_title('üë• Taux de Pr√©sence Personnel Soins')
ax.legend(loc='lower right')

# Score tension
ax = axes[3]
colors = ['green' if s <= 1 else 'orange' if s <= 3 else 'red' for s in df_etab['score_tension']]
ax.bar(df_etab['date'], df_etab['score_tension'], color=colors, alpha=0.7, width=1)
ax.set_ylabel('Score')
ax.set_title('‚ö†Ô∏è Score de Tension Global (0-9)')
ax.set_xlabel('Date')

plt.tight_layout()
plt.savefig('../data/analyse_tensions_historique.png', dpi=150, bbox_inches='tight')
plt.show()

## 4Ô∏è‚É£ Analyse par √âv√©nement

In [None]:
# Statistiques par type d'√©v√©nement
event_stats = df_etab.groupby('evenement_special').agg({
    'nb_admissions': ['mean', 'max', 'std'],
    'taux_occ_global': 'mean',
    'taux_presence_soins': 'mean',
    'nb_deces': 'sum',
    'score_tension': 'mean',
    'date': 'count'
}).round(2)

event_stats.columns = ['Adm. moy', 'Adm. max', 'Adm. std', 'Occ. moy', 'Pr√©sence moy', 'D√©c√®s total', 'Tension moy', 'Nb jours']
event_stats = event_stats.sort_values('Tension moy', ascending=False)

print("\nüìà IMPACT PAR TYPE D'√âV√âNEMENT:")
print("=" * 100)
print(event_stats.to_string())

In [None]:
# Graphique comparatif par √©v√©nement
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

event_order = event_stats.index.tolist()

# Admissions moyennes
ax = axes[0]
colors = ['red' if 'covid' in e else 'orange' if e in ['grippe', 'canicule', 'bronchiolite'] else 'gray' for e in event_order]
ax.barh(event_order, event_stats['Adm. moy'], color=colors)
ax.axvline(df_etab['nb_admissions'].mean(), color='black', linestyle='--', label='Moyenne globale')
ax.set_xlabel('Admissions/jour')
ax.set_title('üìä Admissions Moyennes')
ax.legend()

# Occupation
ax = axes[1]
ax.barh(event_order, event_stats['Occ. moy'], color=colors)
ax.axvline(85, color='red', linestyle='--', label='Seuil tension')
ax.set_xlabel('Taux occupation (%)')
ax.set_title('üõèÔ∏è Occupation Moyenne')
ax.legend()

# Score tension
ax = axes[2]
ax.barh(event_order, event_stats['Tension moy'], color=colors)
ax.set_xlabel('Score tension (0-9)')
ax.set_title('‚ö†Ô∏è Score Tension Moyen')

plt.tight_layout()
plt.savefig('../data/analyse_par_evenement.png', dpi=150, bbox_inches='tight')
plt.show()

## 5Ô∏è‚É£ Corr√©lations entre Probl√®mes

In [None]:
# Matrice de corr√©lation
cols_corr = [
    'nb_admissions', 'taux_occ_global', 'taux_presence_soins',
    'nb_deces', 'stock_sang_poches', 'nb_examens_total'
]

corr_matrix = df_etab[cols_corr].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='RdYlGn', center=0, fmt='.2f',
            square=True, linewidths=0.5)
plt.title('üîó Matrice de Corr√©lation des Indicateurs')
plt.tight_layout()
plt.savefig('../data/correlation_indicateurs.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nüìä Corr√©lations cl√©s:")
print(f"   - Admissions ‚Üî Occupation: {corr_matrix.loc['nb_admissions', 'taux_occ_global']:.3f}")
print(f"   - Admissions ‚Üî D√©c√®s: {corr_matrix.loc['nb_admissions', 'nb_deces']:.3f}")
print(f"   - Occupation ‚Üî Pr√©sence: {corr_matrix.loc['taux_occ_global', 'taux_presence_soins']:.3f}")

## 6Ô∏è‚É£ Analyse par Ann√©e

In [None]:
# Stats par ann√©e
yearly = df_etab.groupby('annee').agg({
    'nb_admissions': ['sum', 'mean', 'max'],
    'nb_deces': 'sum',
    'tension_admissions': 'sum',
    'tension_occupation_critique': 'sum',
    'tension_personnel': 'sum',
    'score_tension': 'mean'
}).round(2)

yearly.columns = ['Adm. total', 'Adm. moy/j', 'Adm. max', 'D√©c√®s', 
                  'Jours tension adm', 'Jours occ critique', 'Jours pers. bas', 'Tension moy']

print("\nüìÖ BILAN PAR ANN√âE:")
print("=" * 120)
print(yearly.to_string())

In [None]:
# Visualisation par ann√©e
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

years = yearly.index.tolist()

# Admissions totales
ax = axes[0, 0]
ax.bar(years, yearly['Adm. total'], color='steelblue')
ax.set_ylabel('Admissions totales')
ax.set_title('üìä Admissions Annuelles')
ax.set_xticks(years)

# D√©c√®s
ax = axes[0, 1]
ax.bar(years, yearly['D√©c√®s'], color='darkred')
ax.set_ylabel('D√©c√®s')
ax.set_title('‚ò†Ô∏è D√©c√®s Annuels')
ax.set_xticks(years)

# Jours de tension
ax = axes[1, 0]
width = 0.25
x = np.arange(len(years))
ax.bar(x - width, yearly['Jours tension adm'], width, label='Admissions', color='orange')
ax.bar(x, yearly['Jours occ critique'], width, label='Occupation', color='red')
ax.bar(x + width, yearly['Jours pers. bas'], width, label='Personnel', color='purple')
ax.set_ylabel('Nombre de jours')
ax.set_title('‚ö†Ô∏è Jours de Tension par Type')
ax.set_xticks(x)
ax.set_xticklabels(years)
ax.legend()

# Score tension moyen
ax = axes[1, 1]
colors = ['red' if s > 2 else 'orange' if s > 1 else 'green' for s in yearly['Tension moy']]
ax.bar(years, yearly['Tension moy'], color=colors)
ax.set_ylabel('Score moyen')
ax.set_title('üìà Score Tension Moyen par Ann√©e')
ax.set_xticks(years)

plt.tight_layout()
plt.savefig('../data/bilan_annuel.png', dpi=150, bbox_inches='tight')
plt.show()

## 7Ô∏è‚É£ Diagnostic des Causes Principales

In [None]:
# Quand il y a tension, quelle est la cause principale ?
jours_tension = df_etab[df_etab['score_tension'] >= 2].copy()

print(f"\nüîç ANALYSE DES {len(jours_tension)} JOURS DE TENSION (score >= 2):")
print("=" * 60)

# Cause principale
causes = []
for _, row in jours_tension.iterrows():
    if row['tension_admissions_critique']:
        causes.append('Admissions critiques')
    elif row['tension_occupation_critique']:
        causes.append('Suroccupation critique')
    elif row['tension_personnel']:
        causes.append('Manque personnel')
    elif row['tension_admissions']:
        causes.append('Admissions √©lev√©es')
    elif row['tension_occupation']:
        causes.append('Suroccupation')
    else:
        causes.append('Autre')

jours_tension['cause_principale'] = causes

cause_counts = jours_tension['cause_principale'].value_counts()

print("\nüìä R√âPARTITION DES CAUSES PRINCIPALES:")
for cause, count in cause_counts.items():
    pct = count / len(jours_tension) * 100
    bar = '‚ñà' * int(pct / 2)
    print(f"   {cause:25s}: {count:4d} ({pct:5.1f}%) {bar}")

In [None]:
# Pie chart des causes
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Causes principales
ax = axes[0]
colors_pie = ['#ff6b6b', '#feca57', '#48dbfb', '#ff9ff3', '#54a0ff']
ax.pie(cause_counts, labels=cause_counts.index, autopct='%1.1f%%', colors=colors_pie, startangle=90)
ax.set_title('üéØ Causes Principales des Tensions')

# Par √©v√©nement
ax = axes[1]
event_tension = jours_tension['evenement_special'].value_counts()
ax.pie(event_tension, labels=event_tension.index, autopct='%1.1f%%', startangle=90)
ax.set_title('ü¶† R√©partition par √âv√©nement')

plt.tight_layout()
plt.savefig('../data/causes_tensions.png', dpi=150, bbox_inches='tight')
plt.show()

## 8Ô∏è‚É£ Synth√®se et Recommandations

In [None]:
print("\n" + "="*70)
print("üìã SYNTH√àSE ANALYTIQUE")
print("="*70)

# Calculs
total_jours = len(df_etab)
jours_tension_total = len(jours_tension)
pct_tension = jours_tension_total / total_jours * 100

# Ann√©e la plus difficile
annee_pire = yearly['Tension moy'].idxmax()
tension_max = yearly.loc[annee_pire, 'Tension moy']

# √âv√©nement le plus impactant
event_pire = event_stats['Tension moy'].idxmax()

# Cause principale
cause_principale = cause_counts.idxmax()
pct_cause = cause_counts[cause_principale] / jours_tension_total * 100

print(f"""
üìä P√âRIODE ANALYS√âE: 2020-2025 ({total_jours} jours)

üî¥ JOURS DE TENSION: {jours_tension_total} ({pct_tension:.1f}% du temps)

üìÖ ANN√âE LA PLUS DIFFICILE: {annee_pire}
   Score de tension moyen: {tension_max:.2f}/9

ü¶† √âV√âNEMENT LE PLUS IMPACTANT: {event_pire}
   (COVID vague 1 avec +45% d'activit√©)

üéØ CAUSE PRINCIPALE DES TENSIONS: {cause_principale}
   Repr√©sente {pct_cause:.1f}% des jours de tension

üìà CORR√âLATION CL√â:
   Les pics d'admissions sont directement li√©s aux crises sanitaires.
   La suroccupation suit m√©caniquement l'afflux de patients.

üí° INSIGHT:
   La plupart des probl√®mes viennent de PICS D'ADMISSIONS lors d'√©v√©nements
   sanitaires (COVID, grippe, canicule), pas d'un manque de personnel en soi.
   Le personnel s'adapte (+pr√©sence lors des crises) mais la capacit√© lit
   est le facteur limitant principal.
""")

print("\n" + "="*70)
print("üìã RECOMMANDATIONS PRIORITAIRES")
print("="*70)
print("""
1. CAPACIT√â LITS ‚≠ê‚≠ê‚≠ê
   ‚Üí Augmenter la capacit√© d'accueil de 10-15% pour absorber les pics
   ‚Üí Pr√©voir des lits "tampon" activables rapidement

2. ANTICIPATION CRISES ‚≠ê‚≠ê‚≠ê
   ‚Üí Renforcer la veille √©pid√©miologique (grippe, COVID)
   ‚Üí Activer les protocoles "plan blanc" plus t√¥t

3. FLEXIBILIT√â PERSONNEL ‚≠ê‚≠ê
   ‚Üí Cr√©er un pool de personnel mobilisable en urgence
   ‚Üí Accords avec √©tablissements voisins pour renforts

4. STOCK SANG ‚≠ê
   ‚Üí Campagnes de don pr√©ventives avant l'hiver
   ‚Üí Seuil d'alerte relev√© √† 450 poches
""")

In [None]:
# Sauvegarder les r√©sultats
results = {
    'periode': f"{df_etab['date'].min().date()} - {df_etab['date'].max().date()}",
    'total_jours': total_jours,
    'jours_tension': jours_tension_total,
    'pct_tension': round(pct_tension, 1),
    'annee_pire': int(annee_pire),
    'event_pire': event_pire,
    'cause_principale': cause_principale,
    'pct_cause_principale': round(pct_cause, 1),
    'repartition_causes': cause_counts.to_dict(),
    'stats_evenements': event_stats.to_dict(),
    'stats_annuelles': yearly.to_dict(),
}

import json
with open('../data/analyse_problemes_historiques.json', 'w') as f:
    json.dump(results, f, indent=2, default=str)

print("\n‚úÖ R√©sultats sauvegard√©s dans data/analyse_problemes_historiques.json")