In [1]:
import joblib
import pandas as pd
import unicodedata
import re
import pickle
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, mean_squared_error

In [2]:
pipeline_path = "/Users/fabiancordenod/code/fqbq69/BIMpredict-/pipeline/randomforestmurspipeline.pkl"
pipeline = joblib.load(pipeline_path)

In [3]:
# Charger le CSV
csv_path = "/Users/fabiancordenod/code/fqbq69/BIMpredict-/datatest/test21020/murs21020.csv"
if not os.path.isfile(csv_path):
    print(f"Le fichier n'existe pas : {csv_path}")
    df_test = None
else:
    df_test = pd.read_csv(csv_path, delimiter=';', header=1)

# Nettoyer les noms de colonnes comme à l'entraînement
def clean_col(col):
    col = ''.join(c for c in unicodedata.normalize('NFD', col) if unicodedata.category(c) != 'Mn')
    col = col.lower()
    col = re.sub(r"[ \-\(\)]", "_", col)
    col = re.sub(r"[^a-z0-9_]", "", col)
    col = re.sub(r"_+", "_", col)
    col = col.strip("_")
    return col

if df_test is not None:
    df_test.columns = [clean_col(c) for c in df_test.columns]

In [4]:
df_test

Unnamed: 0,id,011ec_lot,012ec_ouvrage,013ec_localisation,014ec_mode_constructif,nom,hauteur,epaisseur,ai,as,...,code_dassemblage,retournement_aux_insertions,retournement_aux_extremites,couleur_vue_detail_faible,motif_vue_detail_faible,marque_de_type,protection_contre_lincendie,cout,fonction,largeur
0,972712,,,,,EC-BA ép. 20 EXT.,360000000000001,02,0,360000000000001,...,,0,0,0,,,,0,1,02
1,973155,,,,,EC-BA ép. 20 EXT.,360000000000001,02,0,360000000000001,...,,0,0,0,,,,0,1,02
2,973308,,,,,EC-BA ép. 20 EXT.,360000000000001,02,0,360000000000001,...,,0,0,0,,,,0,1,02
3,973495,,,,,EC-BA ép. 20 EXT.,360000000000001,02,0,360000000000001,...,,0,0,0,,,,0,1,02
4,973660,,,,,EC-BA ép. 20 EXT.,340000000000002,02,0,340000000000002,...,,0,0,0,,,,0,1,02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,1372239,,,,,EC-BA ép. 20 EXT.,360000000000001,02,0,360000000000001,...,,0,0,0,,,,0,1,02
255,1372521,,,,,EC-BA ép. 20 EXT.,329999999999998,02,-329999999999998,0,...,,0,0,0,,,,0,1,02
256,1372863,,,,,EC-BA ép. 20 EXT.,329999999999998,02,-329999999999998,0,...,,0,0,0,,,,0,1,02
257,1373629,,,,,EC-BA ép. 20 EXT.,360000000000001,02,0,360000000000001,...,,0,0,0,,,,0,1,02


In [5]:
# Les targets à prédire (après nettoyage)
targets = [
    "011ec_lot",
    "012ec_ouvrage",
    "013ec_localisation",
    "014ec_mode_constructif"
]

# Colonnes explicatives attendues par le pipeline
features = pipeline.named_steps['preprocessor'].feature_names_in_

# S'assurer que toutes les colonnes sont présentes
for col in features:
    if col not in df_test.columns:
        df_test[col] = np.nan
        print(f"Colonne manquante ajoutée : {col}")
X_test = df_test[features].copy()


In [6]:
y_pred = pipeline.predict(X_test)

In [7]:
y_pred

array([['GO', 'MUR', 'EXTERIEUR', 'BANCHE'],
       ['GO', 'MUR', 'INTERIEUR', 'BANCHE'],
       ['GO', 'MUR', 'EXTERIEUR', 'BANCHE'],
       ...,
       ['GO', 'MUR', 'EXTERIEUR', 'BANCHE'],
       ['GO', 'MUR', 'INTERIEUR', 'BANCHE'],
       ['GO', 'MUR', 'EXTERIEUR', 'BANCHE']], dtype=object)

In [8]:
y_pred_df = pd.DataFrame(y_pred, columns=targets)

In [9]:
y_pred_df['013ec_localisation'].value_counts()

INTERIEUR    228
EXTERIEUR     31
Name: 013ec_localisation, dtype: int64

In [10]:
# Mettre les prédictions dans un DataFrame
df_pred = pd.DataFrame(y_pred, columns=targets)

# Afficher les premières lignes
print(df_pred.head())

# Sauvegarder si besoin
df_pred.to_csv("resultats_predictions_murs21020.csv", index=False)

  011ec_lot 012ec_ouvrage 013ec_localisation 014ec_mode_constructif
0        GO           MUR          EXTERIEUR                 BANCHE
1        GO           MUR          INTERIEUR                 BANCHE
2        GO           MUR          EXTERIEUR                 BANCHE
3        GO           MUR          INTERIEUR                 BANCHE
4        GO           MUR          INTERIEUR                 BANCHE
