# **1. Import required libraries**

In [8]:
import pandas as pd
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.preprocessing import PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder, FunctionTransformer
import numpy as np
import pickle
import joblib

# **2. Data Processing**

In [10]:
# Custom function to handle label encoding
def label_encode(df, columns):
    label_encoders = {}
    for column in columns:
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le
    return df, label_encoders

# Custom function for full preprocessing
def full_preprocessing(df):
    columns_for_encoding = ['origin', 'car_model', 'exterior_color', 'interior_color', 'engine', 'transmission', 'drive_type']
    df, label_encoders = label_encode(df, columns_for_encoding)
    df = df.dropna()
    return df

# FunctionTransformer to wrap the custom functions
label_encode_transformer = FunctionTransformer(label_encode, kw_args={'columns': ['origin', 'car_model', 'exterior_color', 'interior_color', 'engine', 'transmission', 'drive_type']})
full_preprocessing_transformer = FunctionTransformer(full_preprocessing)

# Define the pipeline
pipeline = Pipeline([
    ('full_preprocessing', full_preprocessing_transformer)
])



In [None]:
num_of_doors,seating_capacity,engine_capacity,fuel_consumption,Brand_Ranking,Yeo_Johnson_mileage,age


In [11]:
# Load data
df = pd.read_csv('D:\Project\Car-Evaluation\Dataset\Final\DataVersion3.csv')

price_column = df['price_in_billion']
df = df.drop(columns=['price_in_billion','ad_id','car_name'])

# Apply pipeline
df_processed = pipeline.fit_transform(df)

# Divide into features and target
features = df_processed
target = price_column

# Show the processed data
print(features.head())
print(target.head())

   origin  car_model  exterior_color  interior_color  num_of_doors  \
0       0          7              14               8             2   
1       1          5               0               0             5   
2       0          2              12               1             5   
3       1          5              14               0             5   
4       0          2              12               8             5   

   seating_capacity  engine  engine_capacity  transmission  drive_type  \
0                 2       3              1.0             1           4   
1                 7       3              3.4             0           2   
2                 8       3              2.0             0           4   
3                 5       3              1.8             0           3   
4                 8       3              2.0             0           4   

   fuel_consumption  Brand_Ranking  Yeo_Johnson_mileage  age  
0              10.1             19             2.312535    2  
1       

In [12]:
# Save processor to pickle
joblib.dump(pipeline, 'poly_regress_processor.pkl')

['poly_regress_processor.pkl']

# **3. Build Multi Regression Model**

In [13]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
poly_features = PolynomialFeatures(degree=2, include_bias=True)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)

# Train Regression model on Polynomial features
model = LinearRegression()
model.fit(X_train_poly, y_train)
#ridge_model = Ridge(alpha=1.0)
#ridge_model.fit(X_train_poly, y_train)

In [14]:
print(X_train.shape)
print(X_train_poly.shape)

(20635, 14)
(20635, 120)


# **4. Evaluate Model**

In [15]:
y_pred = model.predict(X_test_poly)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("Root Mean Squared Error (RMSE):", rmse)
print("R2 Score:", r2)

Root Mean Squared Error (RMSE): 1.089449608731293
R2 Score: 0.691909922473662


In [16]:
joblib.dump(model,"pr_model.joblib")

['pr_model.joblib']