In [22]:
import pandas as pd

In [23]:
# Load the dataset
file_path = "aqu_borneincendie_p.csv"
df = pd.read_csv(file_path)

In [24]:
df.head()

Unnamed: 0,ADRESSE,ADRESSE_R_M,COORDONNEE_SPATIALE_X,COORDONNEE_SPATIALE_Y,DATE_ABANDON,DATE_ABANDON_R_M,DATE_INSTALLATION,DATE_INSTALLATION_R_M,ELEVATION_TERRAIN,ID_BI,...,PRECISION_DATE_INSTALL_R_M,PROPRIETAIRE,PROPRIETAIRE_R_M,PROVENANCE_DONNEE,STATUT_ACTIF,STATUT_ACTIF_R_M,LATITUDE,LONGITUDE,ABANDONNE_R,ABANDONNE_R_M
0,430 rue de Bonsecours,Cartographie géomatique ville,300735.516,5041116.762,,Non applicable,19670101,Plan Atlas et inventaire,25.684,5020413,...,Plan Atlas et inventaire,VILLE DE MONTREAL,Cartographie géomatique ville,RELEVE TERRAIN INTERNE,EXISTANT,Plan profil,45.509968,-73.552019,NON,Non applicable
1,88 boulevard René-Lévesque Ouest,Cartographie géomatique ville,299926.983,5040845.13,,Non applicable,19830101,Plan Atlas et inventaire,23.224,5020476,...,Plan Atlas et inventaire,VILLE DE MONTREAL,Cartographie géomatique ville,RELEVE TERRAIN INTERNE,EXISTANT,Plan profil,45.507518,-73.562364,NON,Non applicable
2,1110-1116 boulevard Saint-Laurent,Cartographie géomatique ville,299979.9,5040977.088,,Non applicable,19540101,Plan Atlas et inventaire,24.602,5020477,...,Plan Atlas et inventaire,VILLE DE MONTREAL,Cartographie géomatique ville,RELEVE TERRAIN INTERNE,EXISTANT,Plan profil,45.508706,-73.561688,NON,Non applicable
3,160 chemin du Tour-de-l'Isle,Cartographie géomatique ville,302614.124,5041650.945,,Non applicable,19980101,Plan Atlas et inventaire,14.87,5020692,...,Plan Atlas et inventaire,VILLE DE MONTREAL,Cartographie géomatique ville,RELEVE PHOTOGRAMMETRIQUE,EXISTANT,Plan profil,45.514783,-73.527979,NON,Non applicable
4,1316 boulevard Mont-Royal,Cartographie géomatique ville,296744.791,5041332.756,,Non applicable,19100101,Plan Atlas et inventaire,126.041,5020735,...,Plan Atlas et inventaire,VILLE DE MONTREAL,Cartographie géomatique ville,RELEVE PHOTOGRAMMETRIQUE,EXISTANT,Plan profil,45.511876,-73.603096,NON,Non applicable


In [25]:
# Drop rows where STATUT_ACTIF is "ABANDONNE"
df = df[df["STATUT_ACTIF"] != "ABANDONNE"]

In [26]:
# Drop redundant columns (_R_M versions) and additional columns
columns_to_drop = [col for col in df.columns if col.endswith("_R_M")] + ["DATE_INSTALLATION", "DATE_ABANDON", "STATUT_ACTIF", "ABANDONNE_R"]
df_cleaned = df.drop(columns=columns_to_drop)

In [27]:
df_cleaned.head()

Unnamed: 0,ADRESSE,COORDONNEE_SPATIALE_X,COORDONNEE_SPATIALE_Y,ELEVATION_TERRAIN,ID_BI,ID_POINT,JURIDICTION,PRECISION_DATE_INSTALL,PROPRIETAIRE,PROVENANCE_DONNEE,LATITUDE,LONGITUDE
0,430 rue de Bonsecours,300735.516,5041116.762,25.684,5020413,5262933,LOCALE,DATE INSTALLATION DANS L'ANNEE COURANTE,VILLE DE MONTREAL,RELEVE TERRAIN INTERNE,45.509968,-73.552019
1,88 boulevard René-Lévesque Ouest,299926.983,5040845.13,23.224,5020476,5262996,LOCALE,DATE INSTALLATION DANS L'ANNEE COURANTE,VILLE DE MONTREAL,RELEVE TERRAIN INTERNE,45.507518,-73.562364
2,1110-1116 boulevard Saint-Laurent,299979.9,5040977.088,24.602,5020477,5262997,LOCALE,DATE INSTALLATION DANS L'ANNEE COURANTE,VILLE DE MONTREAL,RELEVE TERRAIN INTERNE,45.508706,-73.561688
3,160 chemin du Tour-de-l'Isle,302614.124,5041650.945,14.87,5020692,5267508,LOCALE,DATE INSTALLATION DANS L'ANNEE COURANTE,VILLE DE MONTREAL,RELEVE PHOTOGRAMMETRIQUE,45.514783,-73.527979
4,1316 boulevard Mont-Royal,296744.791,5041332.756,126.041,5020735,5268022,LOCALE,DATE INSTALLATION DANS L'ANNEE COURANTE,VILLE DE MONTREAL,RELEVE PHOTOGRAMMETRIQUE,45.511876,-73.603096


In [28]:
# Standardize categorical text columns
categorical_columns = ["PROPRIETAIRE", "JURIDICTION", "PROVENANCE_DONNEE"]
for col in categorical_columns:
    df_cleaned[col] = df_cleaned[col].astype(str).str.strip().str.upper()

In [29]:
# Remove rows with invalid latitude/longitude values
df_cleaned = df_cleaned[
    (df_cleaned["LATITUDE"].between(-90, 90)) & (df_cleaned["LONGITUDE"].between(-180, 180))
    ]

In [30]:
# Fill missing ADRESSE values with 'UNKNOWN'
df_cleaned["ADRESSE"].fillna("UNKNOWN", inplace=True)
df_cleaned.size

374592

In [31]:
# Save the cleaned dataset
df_cleaned.to_csv("cleaned_borne_incendie.csv", index=False)