In [1]:
# Importation des packages

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


# Lecture et chargement du dataset
df = pd.read_csv("../data/LFB_Mobilisation_2015_2020.csv", low_memory=False)
df.head(10)

Unnamed: 0,IncidentNumber,CalYear,HourOfCall,ResourceMobilisationId,Resource_Code,PerformanceReporting,DateAndTimeMobilised,DateAndTimeMobile,DateAndTimeArrived,TurnoutTimeSeconds,...,DateAndTimeLeft,DateAndTimeReturned,DeployedFromStation_Code,DeployedFromStation_Name,DeployedFromLocation,PumpOrder,PlusCode_Code,PlusCode_Description,DelayCodeId,DelayCode_Description
0,1151,2015,0,4436381,H262,1,2015-01-01 00:02:36,2015-01-01 00:03:55,2015-01-01 00:07:18,79.0,...,2015-01-01 00:35:20,2015-01-01 00:43:10,H26,Addington,Home Station,1,Initial,Initial Mobilisation,,
1,1151,2015,0,4436380,H261,2,2015-01-01 00:02:36,2015-01-01 00:03:50,2015-01-01 00:07:21,74.0,...,2015-01-01 00:33:33,2015-01-01 00:38:47,H26,Addington,Home Station,2,Initial,Initial Mobilisation,,
2,4151,2015,0,4436383,A352,1,2015-01-01 00:03:14,2015-01-01 00:03:58,2015-01-01 00:09:58,44.0,...,2015-01-01 00:15:10,2015-01-01 00:27:26,A35,Enfield,Home Station,1,Initial,Initial Mobilisation,12.0,Not held up
3,8151,2015,0,4436385,G251,1,2015-01-01 00:04:54,2015-01-01 00:06:10,2015-01-01 00:09:53,76.0,...,2015-01-01 00:44:58,2015-01-01 00:50:28,G25,Ealing,Home Station,1,Initial,Initial Mobilisation,,
4,10151,2015,0,4436390,H291,1,2015-01-01 00:06:52,2015-01-01 00:11:01,2015-01-01 00:13:33,249.0,...,2015-01-01 00:20:53,2015-01-01 00:25:16,H29,Purley,Home Station,1,Initial,Initial Mobilisation,11.0,Mob/Radio problems when mobilised
5,13151,2015,0,4436392,A361,1,2015-01-01 00:07:49,2015-01-01 00:08:21,2015-01-01 00:13:51,32.0,...,2015-01-01 01:10:41,2015-01-01 01:24:59,A36,Southgate,Home Station,1,Initial,Initial Mobilisation,12.0,Not held up
6,13151,2015,0,4436391,A351,2,2015-01-01 00:07:49,2015-01-01 00:08:54,2015-01-01 00:16:13,65.0,...,2015-01-01 00:35:04,2015-01-01 00:45:10,A35,Enfield,Home Station,2,Initial,Initial Mobilisation,12.0,Not held up
7,14151,2015,0,4436393,A342,1,2015-01-01 00:10:44,2015-01-01 00:12:12,2015-01-01 00:19:49,88.0,...,2015-01-01 00:44:04,2015-01-01 00:49:39,A34,Edmonton,Home Station,1,Initial,Initial Mobilisation,12.0,Not held up
8,17151,2015,0,4436394,E392,1,2015-01-01 00:12:14,2015-01-01 00:13:29,2015-01-01 00:19:33,75.0,...,2015-01-01 01:04:30,2015-01-01 01:12:46,E39,Bromley,Home Station,1,Initial,Initial Mobilisation,12.0,Not held up
9,19151,2015,0,4436396,H351,1,2015-01-01 00:16:13,2015-01-01 00:17:03,2015-01-01 00:20:19,50.0,...,2015-01-01 00:30:09,,H35,Tooting,Home Station,1,Initial,Initial Mobilisation,,


In [2]:
# Premières visualitsations de base
print("Types des différentes colonnes :")
print(df.dtypes)

print("Infos du Dataset chargé :")
print(df.info())

Types des différentes colonnes :
IncidentNumber               object
CalYear                       int64
HourOfCall                    int64
ResourceMobilisationId        int64
Resource_Code                object
PerformanceReporting         object
DateAndTimeMobilised         object
DateAndTimeMobile            object
DateAndTimeArrived           object
TurnoutTimeSeconds          float64
TravelTimeSeconds           float64
AttendanceTimeSeconds         int64
DateAndTimeLeft              object
DateAndTimeReturned          object
DeployedFromStation_Code     object
DeployedFromStation_Name     object
DeployedFromLocation         object
PumpOrder                     int64
PlusCode_Code                object
PlusCode_Description         object
DelayCodeId                 float64
DelayCode_Description        object
dtype: object
Infos du Dataset chargé :
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 883641 entries, 0 to 883640
Data columns (total 22 columns):
 #   Column             

In [3]:
# Analyse des pourcentages de NaN pour le rapport d'exploitation
na_percent = (df.isna().mean() * 100).round(2)
na_percent.sort_values(ascending=False)

DateAndTimeReturned         89.17
DelayCode_Description       75.58
DelayCodeId                 75.58
TravelTimeSeconds            0.84
TurnoutTimeSeconds           0.83
DateAndTimeMobile            0.82
DateAndTimeLeft              0.12
DeployedFromLocation         0.08
PlusCode_Description         0.00
PlusCode_Code                0.00
PumpOrder                    0.00
DeployedFromStation_Name     0.00
DeployedFromStation_Code     0.00
IncidentNumber               0.00
CalYear                      0.00
DateAndTimeArrived           0.00
DateAndTimeMobilised         0.00
PerformanceReporting         0.00
Resource_Code                0.00
ResourceMobilisationId       0.00
HourOfCall                   0.00
AttendanceTimeSeconds        0.00
dtype: float64

In [4]:
# Uniformisation des types
df['IncidentNumber'] = df['IncidentNumber'].astype(str)
df['ResourceMobilisationId'] = df['ResourceMobilisationId'].astype(str)
df['Resource_Code'] = df['Resource_Code'].astype(str)

In [15]:
# Conversion des colonnes temporelles en datetime
date_cols = [
    'DateAndTimeMobilised',
    'DateAndTimeMobile',
    'DateAndTimeArrived',
    'DateAndTimeLeft',
    'DateAndTimeReturned'
]
for col in date_cols:
    df[col] = pd.to_datetime(df[col], errors='coerce')

In [6]:
# AttendanceTimeSeconds en float
df['AttendanceTimeSeconds'] = df['AttendanceTimeSeconds'].astype(float)

In [16]:
# Gestion des valeurs manquantes
df['DelayCode_Description'] = df['DelayCode_Description'].fillna("No Delay")
df['DelayCodeId'] = df['DelayCodeId'].fillna(-1)

In [8]:
# Création d'une colonne "Cancelled" pour les interventions annulées

def check_cancelled(row):
    if pd.notnull(row['DateAndTimeMobilised']) and pd.isnull(row['DateAndTimeMobile']):
        return "Before Departure"
    elif pd.notnull(row['DateAndTimeMobile']) and pd.isnull(row['DateAndTimeArrived']):
        return "En Route"
    else:
        return "No"

df['Cancelled'] = df.apply(check_cancelled, axis=1)

In [9]:
# Création de la colonne season pour anlyse saisonnière des interventions
# Extraire le mois
df['Month'] = df['DateAndTimeMobilised'].dt.month

# Fonction saison
def get_season(month):
    if month in [12, 1, 2]:
        return "Hiver"
    elif month in [3, 4, 5]:
        return "Printemps"
    elif month in [6, 7, 8]:
        return "Été"
    else:
        return "Automne"

df['Season'] = df['Month'].apply(get_season)

In [10]:
# Création de la colonne Dayperiod pour analyse de sinterventions en fonction de la periode de la journée 
# Matin - Midi - Après midi - Soir - Nuit

def get_day_period(hour):
    if 6 <= hour < 12:
        return "Matin"
    elif 12 <= hour < 15:
        return "Midi"
    elif 15 <= hour < 18:
        return "Après-midi"
    elif 18 <= hour < 22:
        return "Soirée"
    else:
        return "Nuit"

df['DayPeriod'] = df['HourOfCall'].apply(get_day_period)

In [11]:
print(df[['HourOfCall','DayPeriod','Month','Season']].head(15))

    HourOfCall DayPeriod  Month Season
0            0      Nuit      1  Hiver
1            0      Nuit      1  Hiver
2            0      Nuit      1  Hiver
3            0      Nuit      1  Hiver
4            0      Nuit      1  Hiver
5            0      Nuit      1  Hiver
6            0      Nuit      1  Hiver
7            0      Nuit      1  Hiver
8            0      Nuit      1  Hiver
9            0      Nuit      1  Hiver
10           0      Nuit      1  Hiver
11           0      Nuit      1  Hiver
12           0      Nuit      1  Hiver
13           0      Nuit      1  Hiver
14           0      Nuit      1  Hiver


In [19]:
# Organisation des nouvelles colonnes pour meilleures lisibilité
season_order = pd.CategoricalDtype(["Hiver", "Printemps", "Été", "Automne"], ordered=True)
period_order = pd.CategoricalDtype(["Nuit", "Matin", "Midi", "Après-midi", "Soirée"], ordered=True)

df['Season'] = df['Season'].astype(season_order)
df['DayPeriod'] = df['DayPeriod'].astype(period_order)

# Selection des colonnes pour le df utilisé lors des analyses
cols_to_keep = [
    "IncidentNumber","ResourceMobilisationId","PumpOrder",
    "CalYear","HourOfCall",
    "DateAndTimeMobilised","DateAndTimeMobile","DateAndTimeArrived",
    "DateAndTimeLeft","DateAndTimeReturned",
    "TurnoutTimeSeconds","TravelTimeSeconds","AttendanceTimeSeconds",
    "PerformanceReporting","DelayCodeId","DelayCode_Description",
    "Resource_Code","DeployedFromStation_Code","DeployedFromStation_Name","DeployedFromLocation",
    "Cancelled","Season","DayPeriod"
]

# Création du nouveau df après nettoyage du dataset
df_clean = df[cols_to_keep].copy()

In [18]:
df.dtypes

IncidentNumber                      object
CalYear                              int64
HourOfCall                           int64
ResourceMobilisationId              object
Resource_Code                       object
PerformanceReporting                object
DateAndTimeMobilised        datetime64[ns]
DateAndTimeMobile           datetime64[ns]
DateAndTimeArrived          datetime64[ns]
TurnoutTimeSeconds                 float64
TravelTimeSeconds                  float64
AttendanceTimeSeconds              float64
DateAndTimeLeft             datetime64[ns]
DateAndTimeReturned         datetime64[ns]
DeployedFromStation_Code            object
DeployedFromStation_Name            object
DeployedFromLocation                object
PumpOrder                            int64
PlusCode_Code                       object
PlusCode_Description                object
DelayCodeId                        float64
DelayCode_Description               object
Cancelled                           object
Month      

In [20]:
# Sauvegarde en CSV
df_clean.to_csv("../data/LFB_Mobilisation_2015_2020_clean.csv", index=False)

In [21]:
print(df_clean.dtypes)
print(df_clean.isna().mean().sort_values(ascending=False).head(8))

IncidentNumber                      object
ResourceMobilisationId              object
PumpOrder                            int64
CalYear                              int64
HourOfCall                           int64
DateAndTimeMobilised        datetime64[ns]
DateAndTimeMobile           datetime64[ns]
DateAndTimeArrived          datetime64[ns]
DateAndTimeLeft             datetime64[ns]
DateAndTimeReturned         datetime64[ns]
TurnoutTimeSeconds                 float64
TravelTimeSeconds                  float64
AttendanceTimeSeconds              float64
PerformanceReporting                object
DelayCodeId                        float64
DelayCode_Description               object
Resource_Code                       object
DeployedFromStation_Code            object
DeployedFromStation_Name            object
DeployedFromLocation                object
Cancelled                           object
Season                            category
DayPeriod                         category
dtype: obje