In [None]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

#Load Data
df = pd.read_csv("car_Data.csv")

#Check for missing values
print("ðŸ”¹ Missing Values:\n", df.isnull().sum())

#Drop missing values (if any)
df.dropna(inplace=True)

#Encode categorical variables
categorical_cols = ["Make", "Model", "FuelType"]  # Adjust column names if needed
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Store encoders for reference

#Define features & target
X = df.drop(columns=["Price"])  # Features
y = df["Price"]  # Target variable

#Calculate Variance Inflation Factor (VIF)
vif_data = pd.DataFrame()
vif_data["Feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

print("\nðŸ”¹ Variance Inflation Factor (VIF) Scores:")
print(vif_data.sort_values(by="VIF", ascending=False))

#Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Train Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

#Predictions
y_pred = model.predict(X_test)

#Model Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nðŸ”¹ Model Performance Metrics:")
print(f"Mean Squared Error: {mse:.2f}")
print(f"RÂ² Score: {r2:.2f}")


ðŸ”¹ Missing Values:
 Make          0
Model         0
Year          0
Mileage       0
EngineSize    0
FuelType      0
Price         0
dtype: int64

ðŸ”¹ Variance Inflation Factor (VIF) Scores:
      Feature        VIF
2        Year  17.257069
4  EngineSize   8.488756
3     Mileage   4.635105
1       Model   3.827806
0        Make   3.542478
5    FuelType   2.815736

ðŸ”¹ Model Performance Metrics:
Mean Squared Error: 0.08
RÂ² Score: 1.00
