In [37]:
import pickle
import pandas as pd

In [38]:
car_dict = {
    "Levy": 1399,
    "Manufacturer": "LEXUS",
    "Model": "RX 450",
    "Prod. year": 2010,
    "Category": "Jeep",
    "Leather interior": "Yes",
    "Fuel type": "Hybrid",
    "Engine volume": 3.5,
    "Mileage": 186005,
    "Cylinders": 6.0,
    "Gear box type": "Automatic",
    "Drive wheels": "4x4",
    "Wheel": "Left wheel",
    "Color": "Silver",
    "Airbags": 12
}

# Preprocessing Plan for Model Input

## Input Handling
- Ensure proper handling of input features in `replace_categorical_by_numerical`:
  - **Engine Volume**: Accept as a numerical input.
  - **Levy**: Input as a number; if not provided, use `0` as the default value.
  - **Mileage**: Input as a numerical value.

## Feature Engineering
- Generate a new feature: **`Age`** (e.g., from the car's manufacturing year).
  
## Feature Removal
- Remove the features: **`ID`, `Doors`, `Prod. year`** from the dataset.

## Categorical Encoding
1. **One-Hot Encoded Columns**:
   - **`Leather interior`**: Categories → `Yes`, `No`.
   - **`Gear box type`**: Categories → `Automatic`, `Tiptronic`, `Variator`, `Manual`.
   - **`Drive wheels`**: Categories → `4x4`, `Front`, `Rear`.
   - **`Wheel`**: Categories → `Left wheel`, `Right-hand drive`.

2. **Label Encoded Columns**:
   - **`Manufacturer`**
   - **`Model`**
   - **`Category`**
   - **`Fuel type`**
   - **`Color`**

## Numerical Scaling
- Scale the following numerical columns:
  - **`Levy`**
  - **`Engine volume`**
  - **`Mileage`**
  - **`Age`**

In [39]:
data = pd.DataFrame([car_dict])
data

Unnamed: 0,Levy,Manufacturer,Model,Prod. year,Category,Leather interior,Fuel type,Engine volume,Mileage,Cylinders,Gear box type,Drive wheels,Wheel,Color,Airbags
0,1399,LEXUS,RX 450,2010,Jeep,Yes,Hybrid,3.5,186005,6.0,Automatic,4x4,Left wheel,Silver,12


In [40]:
from datetime import datetime

data['Age'] = datetime.now().year - data['Prod. year']

data = data.drop(columns=['Doors', 'Prod. year'], errors='ignore')

In [41]:
data

Unnamed: 0,Levy,Manufacturer,Model,Category,Leather interior,Fuel type,Engine volume,Mileage,Cylinders,Gear box type,Drive wheels,Wheel,Color,Airbags,Age
0,1399,LEXUS,RX 450,Jeep,Yes,Hybrid,3.5,186005,6.0,Automatic,4x4,Left wheel,Silver,12,14


In [42]:
one_hot_columns = ['Leather interior', 'Gear box type', 'Drive wheels', 'Wheel']

with open('../models/one_hot_encoder.pkl', 'rb') as f:
    one_hot_encoder = pickle.load(f)

In [43]:
one_hot_columns = ['Leather interior', 'Gear box type', 'Drive wheels', 'Wheel']

# 3. One-Hot Encode Categorical Columns
with open('../models/one_hot_encoder.pkl', 'rb') as f:
    one_hot_encoder = pickle.load(f)
    
# One-hot encoding for the new data
encoded_data = one_hot_encoder.transform(data[one_hot_columns])
encoded_data_df = pd.DataFrame(encoded_data, columns=one_hot_encoder.get_feature_names_out(one_hot_columns), index=data.index)

# Concatenate the one-hot encoded data and drop original columns
data = pd.concat([data, encoded_data_df], axis=1)
data.drop(columns=one_hot_columns, inplace=True)

In [44]:
# 4. Label Encode Categorical Columns
label_encode_columns = ['Manufacturer', 'Model', 'Category', 'Fuel type', 'Color']

# Load the saved label encoders
with open('../models/label_encoders.pkl', 'rb') as f:
    label_encoders = pickle.load(f)

# Apply label encoding
for column in label_encode_columns:
    le = label_encoders[column]
    data[column] = le.transform(data[column])

In [45]:
data

Unnamed: 0,Levy,Manufacturer,Model,Category,Fuel type,Engine volume,Mileage,Cylinders,Color,Airbags,...,Leather interior_Yes,Gear box type_Automatic,Gear box type_Manual,Gear box type_Tiptronic,Gear box type_Variator,Drive wheels_4x4,Drive wheels_Front,Drive wheels_Rear,Wheel_Left wheel,Wheel_Right-hand drive
0,1399,28,1037,4,2,3.5,186005,6.0,12,12,...,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


In [46]:
# 5. Scale column
numerical_columns = ['Levy', 'Engine volume', 'Mileage', 'Age']

with open('../models/scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
    
data[numerical_columns] = scaler.transform(data[numerical_columns])

In [48]:
data.columns

Index(['Levy', 'Manufacturer', 'Model', 'Category', 'Fuel type',
       'Engine volume', 'Mileage', 'Cylinders', 'Color', 'Airbags', 'Age',
       'Leather interior_No', 'Leather interior_Yes',
       'Gear box type_Automatic', 'Gear box type_Manual',
       'Gear box type_Tiptronic', 'Gear box type_Variator', 'Drive wheels_4x4',
       'Drive wheels_Front', 'Drive wheels_Rear', 'Wheel_Left wheel',
       'Wheel_Right-hand drive'],
      dtype='object')

In [49]:
with open('../models/model.pkl', 'rb') as f:
    model = pickle.load(f)

In [50]:
model.predict(data)

array([13377.36903993])