### Imports

In [6]:
import pandas as pd
import joblib
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

Pipeline saved to models/final_pipeline.pkl


### Build and Save a Unified Pipeline Using Saved Components

In [14]:
# === 1. Custom Transformer to Drop Columns ===


class DataTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, columns_to_drop):
        self.columns_to_drop = columns_to_drop

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X.drop(self.columns_to_drop, axis=1, errors='ignore', inplace=True)
        return X

# === 2. Load Previously Trained Components ===
ordinal_encoder = joblib.load('models/ordinal_encoder.pkl')
onehot_encoder = joblib.load('models/one_hot_encoder.pkl')
scaler = joblib.load('models/scaler.pkl')
feature_selector = joblib.load('models/feature_selector.pkl')
model = joblib.load('models/random_forest_model.pkl')

# === 3. Define Columns for Each Transformer ===
columns_to_drop = ['titre', 'localisation', 'annee-modele']

ordinal_columns = ['etat']
onehot_columns = ['boite-de-vitesses',
                  'type-de-carburant', 'marque', 'modele', 'origine']
numerical_columns = ['kilometrage', 'age', 'puissance-fiscale']  #  commented out

# === 4. Create ColumnTransformer (using your fitted transformers) ===
preprocessor = ColumnTransformer(transformers=[
    ('onehot', onehot_encoder, onehot_columns),
    ('ordinal', ordinal_encoder, ordinal_columns),
    ('scaler', scaler, numerical_columns)
])

# === 5. Build the Full Pipeline including feature selection ===
pipeline = Pipeline([
    ('drop_cols', DataTransformer(columns_to_drop)),
    ('preprocess', preprocessor),
    ('feature_selection', feature_selector),
    ('model', model)
])

df = pd.read_csv('data/data_cleaned.csv')
X = df.drop(columns=['prix'])
y = df['prix']
# Fit the pipeline to the data
pipeline.fit(X, y)

# === 6. Save the Full Pipeline ===
joblib.dump(pipeline, 'models/final_pipeline.pkl')
print("Pipeline saved to models/final_pipeline.pkl")

Pipeline saved to models/final_pipeline.pkl


### Load pipeline and predict

In [15]:
# === 1. Load the saved pipeline ===
pipeline = joblib.load('models/final_pipeline.pkl')

# === 2. Load or prepare your input data ===
# For example, a new sample or test data
# Replace this with your real test data
data = pd.DataFrame([{
    'etat': 'bon',
    'boite-de-vitesses': 'manuelle',
    'type-de-carburant': 'essence',
    'marque': 'renault',
    'modele': 'clio',
    'origine': 'maroc',
    'kilometrage': 120000,
    'age': 5,
    'puissance-fiscale': 6,
    'annee-modele': '2018'
}])

# === 3. Predict with the full pipeline ===
predictions = pipeline.predict(data)

# === 4. Show results ===
print("Prediction:", predictions[0])

Prediction: 170021.0
