In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load your dataset
car = pd.read_csv('Cleaned_Car_data.csv')

# Define features and target variable
X = car[['name', 'company', 'year', 'kms_driven', 'fuel_type']]
y = car['Price']

# Preprocessing: OneHotEncoding for categorical variables
categorical_features = ['name', 'company', 'fuel_type']
numeric_features = ['year', 'kms_driven']

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', 'passthrough', numeric_features)
    ])

# Create a pipeline with preprocessing and model
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', LinearRegression())
])

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model_pipeline.fit(X_train, y_train)

# Save the model
import pickle
with open('LinearRegressionModel.pkl', 'wb') as f:
    pickle.dump(model_pipeline, f)
