In [ ]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


In [ ]:
# Load the dataset
car_df = pd.read_csv('Cardetails.csv')


In [ ]:
# Drop unnecessary columns and handle missing values
car_df.drop(columns='torque', inplace=True)
car_df.dropna(inplace=True)
car_df.drop_duplicates(inplace=True)


In [ ]:
# Utility functions for cleaning and transforming data
def get_brand_name(car_name):
    return car_name.split()[0].strip()

def clean_numeric(value):
    value = value.split(' ')[0].strip()
    return float(value) if value else 0.0


In [ ]:
# Transform data using utility functions
car_df['name'] = car_df['name'].apply(get_brand_name)
car_df['mileage'] = car_df['mileage'].apply(clean_numeric)
car_df['max_power'] = car_df['max_power'].apply(clean_numeric)
car_df['engine'] = car_df['engine'].apply(clean_numeric)

# Encode categorical variables
encode_dicts = {
    'name': {name: idx+1 for idx, name in enumerate(car_df['name'].unique())},
    'transmission': {'Manual': 1, 'Automatic': 2},
    'seller_type': {'Individual': 1, 'Dealer': 2, 'Trustmark Dealer': 3},
    'fuel': {'Diesel': 1, 'Petrol': 2, 'LPG': 3, 'CNG': 4},
    'owner': {'First Owner': 1, 'Second Owner': 2, 'Third Owner': 3, 'Fourth & Above Owner': 4, 'Test Drive Car': 5}
}
for col, mapping in encode_dicts.items():
    car_df[col].replace(mapping, inplace=True)


In [ ]:
# Prepare data for modeling
input_data = car_df.drop(columns=['selling_price'])
output_data = car_df['selling_price']

# Split data
x_train, x_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(x_train, y_train)

# Make predictions
predictions = model.predict(x_test)
print(predictions)


In [ ]:
# Predicting for new input
new_data = pd.DataFrame(
    [[9, 2023, 9000, 2, 1, 1, 1, 20.3, 1199.0, 84.0, 5.0]],
    columns=['name', 'year', 'km_driven', 'fuel', 'seller_type', 'transmission', 'owner', 'mileage', 'engine', 'max_power', 'seats']
)
new_prediction = model.predict(new_data)
print(new_prediction)
