In [1]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Step 2: Load dataset
df = pd.read_csv('Sport car price.csv')

# Step 3: Convert Price to numeric (remove commas)
df['Price (in USD)'] = df['Price (in USD)'].replace(r'[\,]', '', regex=True).astype(float)

# Step 4: Drop non-numeric / problematic columns
columns_to_drop = ['Car Model', 'Engine Size (L)']  # Engine Size contains 'Electric'
for col in columns_to_drop:
    if col in df.columns:
        df = df.drop(col, axis=1)

# Step 5: Encode categorical columns (Car Make)
df = pd.get_dummies(df, columns=['Car Make'], drop_first=True)

# Step 6: Convert remaining numeric columns from string to float
numeric_cols = ['Horsepower', 'Torque (lb-ft)', '0-60 MPH Time (seconds)']
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col].replace(r'[\,]', '', regex=True), errors='coerce')

# Step 7: Fill any missing values with mean
df = df.fillna(df.mean())

# Step 8: Define Features (X) and Target (y)
X = df.drop('Price (in USD)', axis=1)
y = df['Price (in USD)']

# Step 9: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# Step 10: Initialize & Train Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 11: Make Predictions
y_pred = model.predict(X_test)

# Step 12: Compare first 5 Predictions vs Actual
print("Predicted Prices:", y_pred[:5])
print("Actual Prices:", y_test.values[:5])

# Step 13: Evaluate Model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2  = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)


X_train shape: (805, 41)
X_test shape: (202, 41)
y_train shape: (805,)
y_test shape: (202,)
Predicted Prices: [ 101972.82888936  204356.69762118  258040.84927354  -17670.32140706
 -100755.96568233]
Actual Prices: [ 75250. 201000. 310000. 220000.  71500.]
Mean Absolute Error (MAE): 116221.79836443882
Mean Squared Error (MSE): 106583300634.4189
R² Score: 0.8243126430040657
