In [237]:
import pickle
import pandas as pd

In [238]:
car_dict = {
    "Levy": 1399,
    "Manufacturer": "LEXUS",
    "Model": "RX 450",
    "Prod. year": 2010,
    "Category": "Jeep",
    "Leather interior": "Yes",
    "Fuel type": "Hybrid",
    "Engine volume": 3.5,
    "Mileage": 186005,
    "Cylinders": 6.0,
    "Gear box type": "Automatic",
    "Drive wheels": "4x4",
    "Wheel": "Left wheel",
    "Color": "Silver",
    "Airbags": 12
}


# Preprocessing Plan for Model Input
## Input Handling

-    Ensure proper handling of input features in 
- `replace_categorical_by_numerical`:
     -    **Engine Volume**: Accept as a numerical input.
     -    **Levy**: Input as a number; if not provided, use `0` as the default value.
     -    **Mileage**: Input as a numerical value.

## Feature Engineering

  -  Generate a new feature: **`Age`** (e.g., from the car's manufacturing year).

## Feature Removal

  -  Remove the features: **`ID`, `Doors`, `Prod. year`** from the dataset.

## Categorical Encoding

1.   **One-Hot Encoded Columns**:
-    **`Leather interior`**: Categories → `Yes`, `No`.
-    **`Gear box type`**: Categories → `Automatic`, `Tiptronic`, `Variator`, `Manual`.
-    **`Drive wheels`**: Categories → `4x4`, `Front`, `Rear`.
-    **`Wheel: Categories`** → `Left wheel`, `Right-hand drive`.
-    **`Fuel type`**: Categories → `Hybrid` , `Petrol` , `Diesel` , `CNG` , `Plug-in Hybrid` , `LPG` , `Hydrogen`
       
2.   **Label Encoded Columns**:
-    `Manufacturer`
-    `Model`
-    `Category`
-    `Color`

3.    **Numerical Scaling**

   - **Scale the following numerical columns:**
- - Levy
- - Engine volume
- - Mileage
- - Age



In [239]:
data = pd.DataFrame([car_dict])
data

Unnamed: 0,Levy,Manufacturer,Model,Prod. year,Category,Leather interior,Fuel type,Engine volume,Mileage,Cylinders,Gear box type,Drive wheels,Wheel,Color,Airbags
0,1399,LEXUS,RX 450,2010,Jeep,Yes,Hybrid,3.5,186005,6.0,Automatic,4x4,Left wheel,Silver,12


In [240]:
from datetime import datetime

data['Age'] = datetime.now().year - data['Prod. year']

data = data.drop(columns=['Doors', 'Prod. year'], errors='ignore')

In [241]:
data

Unnamed: 0,Levy,Manufacturer,Model,Category,Leather interior,Fuel type,Engine volume,Mileage,Cylinders,Gear box type,Drive wheels,Wheel,Color,Airbags,Age
0,1399,LEXUS,RX 450,Jeep,Yes,Hybrid,3.5,186005,6.0,Automatic,4x4,Left wheel,Silver,12,16


In [242]:
with open(r'../models/one_hot_encoder.pkl' ,'rb') as f:
    one_hot_encoder = pickle.load(f)

In [243]:
columns_one_hot_encoding =['Gear box type' , 'Drive wheels' , 'Wheel' ,'Fuel type']

encoded_data = one_hot_encoder.transform(data[columns_one_hot_encoding])
encoded_data_df = pd.DataFrame(encoded_data , columns=one_hot_encoder.get_feature_names_out(columns_one_hot_encoding) , index = data.index)
data = pd.concat([data,encoded_data_df] , axis = 1)
data = data.drop(columns = columns_one_hot_encoding)
data

Unnamed: 0,Levy,Manufacturer,Model,Category,Leather interior,Engine volume,Mileage,Cylinders,Color,Airbags,...,Drive wheels_Rear,Wheel_Left wheel,Wheel_Right-hand drive,Fuel type_CNG,Fuel type_Diesel,Fuel type_Hybrid,Fuel type_Hydrogen,Fuel type_LPG,Fuel type_Petrol,Fuel type_Plug-in Hybrid
0,1399,LEXUS,RX 450,Jeep,Yes,3.5,186005,6.0,Silver,12,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [244]:
data.columns

Index(['Levy', 'Manufacturer', 'Model', 'Category', 'Leather interior',
       'Engine volume', 'Mileage', 'Cylinders', 'Color', 'Airbags', 'Age',
       'Gear box type_Automatic', 'Gear box type_Manual',
       'Gear box type_Tiptronic', 'Gear box type_Variator', 'Drive wheels_4x4',
       'Drive wheels_Front', 'Drive wheels_Rear', 'Wheel_Left wheel',
       'Wheel_Right-hand drive', 'Fuel type_CNG', 'Fuel type_Diesel',
       'Fuel type_Hybrid', 'Fuel type_Hydrogen', 'Fuel type_LPG',
       'Fuel type_Petrol', 'Fuel type_Plug-in Hybrid'],
      dtype='object')

In [245]:
one_hot_encoder

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'


In [246]:
columns_label_encoding = ['Manufacturer' , 'Model' , 'Category' ,'Color']

with open(r'../models/label_encoders.pkl' , 'rb') as f:
    label_encoding = pickle.load(f)

In [247]:
label_encoding

{'Manufacturer': LabelEncoder(),
 'Model': LabelEncoder(),
 'Category': LabelEncoder(),
 'Color': LabelEncoder()}

In [248]:
for col in columns_label_encoding:
    le = label_encoding[col]
    data[col] = le.transform(data[col])

In [249]:
data

Unnamed: 0,Levy,Manufacturer,Model,Category,Leather interior,Engine volume,Mileage,Cylinders,Color,Airbags,...,Drive wheels_Rear,Wheel_Left wheel,Wheel_Right-hand drive,Fuel type_CNG,Fuel type_Diesel,Fuel type_Hybrid,Fuel type_Hydrogen,Fuel type_LPG,Fuel type_Petrol,Fuel type_Plug-in Hybrid
0,1399,28,1037,4,Yes,3.5,186005,6.0,12,12,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [250]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Levy                      1 non-null      int64  
 1   Manufacturer              1 non-null      int64  
 2   Model                     1 non-null      int64  
 3   Category                  1 non-null      int64  
 4   Leather interior          1 non-null      object 
 5   Engine volume             1 non-null      float64
 6   Mileage                   1 non-null      int64  
 7   Cylinders                 1 non-null      float64
 8   Color                     1 non-null      int64  
 9   Airbags                   1 non-null      int64  
 10  Age                       1 non-null      int64  
 11  Gear box type_Automatic   1 non-null      float64
 12  Gear box type_Manual      1 non-null      float64
 13  Gear box type_Tiptronic   1 non-null      float64
 14  Gear box type_

In [251]:
numerical_columns = ['Levy','Engine volume', 'Mileage' , 'Age']

with open(r'../models/scaler.pkl','rb') as f:
    scalar = pickle.load(f)

In [252]:
data[numerical_columns]  = scalar.transform(data[numerical_columns])

In [253]:
data['Leather interior'] = data['Leather interior'].map({'Yes' : 1 , 'No':0})

In [254]:
data

Unnamed: 0,Levy,Manufacturer,Model,Category,Leather interior,Engine volume,Mileage,Cylinders,Color,Airbags,...,Drive wheels_Rear,Wheel_Left wheel,Wheel_Right-hand drive,Fuel type_CNG,Fuel type_Diesel,Fuel type_Hybrid,Fuel type_Hydrogen,Fuel type_LPG,Fuel type_Petrol,Fuel type_Plug-in Hybrid
0,1.809144,28,1037,4,1,2.295052,0.693962,6.0,12,12,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [255]:
with open(r'../models/model.pkl' , 'rb') as f:
    Random_Forest = pickle.load(f)

In [256]:
Random_Forest

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [257]:
Random_Forest.predict(data)

array([13394.77424352])

In [258]:
simple = {'Levy': -1.2534288417261614,
 'Manufacturer': 35.0,
 'Model': 391.0,
 'Category': 3.0,
 'Leather interior': 1.0,
 'Engine volume': -0.8913595917671067,
 'Mileage': 0.24631097952193284,
 'Cylinders': 4.0,
 'Color': 12.0,
 'Airbags': 4.0,
 'Age': 2.315280471338464,
 'Gear box type_Automatic': 0.0,
 'Gear box type_Manual': 1.0,
 'Gear box type_Tiptronic': 0.0,
 'Gear box type_Variator': 0.0,
 'Drive wheels_4x4': 0.0,
 'Drive wheels_Front': 1.0,
 'Drive wheels_Rear': 0.0,
 'Wheel_Left wheel': 1.0,
 'Wheel_Right-hand drive': 0.0,
 'Fuel type_CNG': 0.0,
 'Fuel type_Diesel': 0.0,
 'Fuel type_Hybrid': 0.0,
 'Fuel type_Hydrogen': 0.0,
 'Fuel type_LPG': 0.0,
 'Fuel type_Petrol': 1.0,
 'Fuel type_Plug-in Hybrid': 0.0}

In [259]:
data_2 = pd.DataFrame([simple])
data_2

Unnamed: 0,Levy,Manufacturer,Model,Category,Leather interior,Engine volume,Mileage,Cylinders,Color,Airbags,...,Drive wheels_Rear,Wheel_Left wheel,Wheel_Right-hand drive,Fuel type_CNG,Fuel type_Diesel,Fuel type_Hybrid,Fuel type_Hydrogen,Fuel type_LPG,Fuel type_Petrol,Fuel type_Plug-in Hybrid
0,-1.253429,35.0,391.0,3.0,1.0,-0.89136,0.246311,4.0,12.0,4.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [260]:
Random_Forest.predict(data_2)

array([3529.55])

In [261]:
simple_2 ={'Levy': 0.7912053317746903,
 'Manufacturer': 20.0,
 'Model': 698.0,
 'Category': 10.0,
 'Leather interior': 1.0,
 'Engine volume': 0.6179932406860239,
 'Mileage': -0.4914649376261593,
 'Cylinders': 4.0,
 'Color': 14.0,
 'Airbags': 4.0,
 'Age': -0.773310714915757,
 'Gear box type_Automatic': 1.0,
 'Gear box type_Manual': 0.0,
 'Gear box type_Tiptronic': 0.0,
 'Gear box type_Variator': 0.0,
 'Drive wheels_4x4': 0.0,
 'Drive wheels_Front': 1.0,
 'Drive wheels_Rear': 0.0,
 'Wheel_Left wheel': 1.0,
 'Wheel_Right-hand drive': 0.0,
 'Fuel type_CNG': 0.0,
 'Fuel type_Diesel': 1.0,
 'Fuel type_Hybrid': 0.0,
 'Fuel type_Hydrogen': 0.0,
 'Fuel type_LPG': 0.0,
 'Fuel type_Petrol': 0.0,
 'Fuel type_Plug-in Hybrid': 0.0}

In [262]:
data_3 = pd.DataFrame([simple_2])

In [263]:
Random_Forest.predict(data_3)

array([35790.18])