In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('CarPrice_Assignment.csv')

# Display the first few rows of the dataset
print(df.head())

# Check for missing values
print(df.isnull().sum())

# Get basic statistics
print(df.describe())

# Check data types
print(df.info())
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

# Handle missing values (if any)
df.fillna(df.median(), inplace=True)

# Convert categorical variables into numerical values
label_encoder = LabelEncoder()
df['fueltype'] = label_encoder.fit_transform(df['fueltype'])
df['aspiration'] = label_encoder.fit_transform(df['aspiration'])
df['doornumber'] = label_encoder.fit_transform(df['doornumber'])
df['carbody'] = label_encoder.fit_transform(df['carbody'])
df['drivewheel'] = label_encoder.fit_transform(df['drivewheel'])
df['enginelocation'] = label_encoder.fit_transform(df['enginelocation'])
df['enginetype'] = label_encoder.fit_transform(df['enginetype'])
df['cylindernumber'] = label_encoder.fit_transform(df['cylindernumber'])
df['fuelsystem'] = label_encoder.fit_transform(df['fuelsystem'])

# Split the data into training and testing sets
X = df.drop(['price', 'car_ID', 'CarName'], axis=1)
y = df['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
from sklearn.ensemble import RandomForestRegressor

# Feature importance using Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Get feature importances
importances = rf.feature_importances_
feature_importances = pd.DataFrame({'feature': X.columns, 'importance': importances})
feature_importances = feature_importances.sort_values(by='importance', ascending=False)
print(feature_importances)
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosting': GradientBoostingRegressor()
}

# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2 = r2_score(y_test, y_pred)
    results[name] = {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'R2': r2}

# Display results
for name, metrics in results.items():
    print(f"{name}: {metrics}")
    # Compare model performance
for name, metrics in results.items():
    print(f"{name}:")
    print(f"  MAE: {metrics['MAE']}")
    print(f"  MSE: {metrics['MSE']}")
    print(f"  RMSE: {metrics['RMSE']}")
    print(f"  R2: {metrics['R2']}")


   car_ID  symboling                   CarName fueltype aspiration doornumber  \
0       1          3        alfa-romero giulia      gas        std        two   
1       2          3       alfa-romero stelvio      gas        std        two   
2       3          1  alfa-romero Quadrifoglio      gas        std        two   
3       4          2               audi 100 ls      gas        std       four   
4       5          2                audi 100ls      gas        std       four   

       carbody drivewheel enginelocation  wheelbase  ...  enginesize  \
0  convertible        rwd          front       88.6  ...         130   
1  convertible        rwd          front       88.6  ...         130   
2    hatchback        rwd          front       94.5  ...         152   
3        sedan        fwd          front       99.8  ...         109   
4        sedan        4wd          front       99.4  ...         136   

   fuelsystem  boreratio  stroke compressionratio horsepower  peakrpm citympg  \

  df.fillna(df.median(), inplace=True)


             feature  importance
14        enginesize    0.557068
11        curbweight    0.296978
22        highwaympg    0.045955
19        horsepower    0.026930
9           carwidth    0.014485
8          carlength    0.009149
7          wheelbase    0.007700
20           peakrpm    0.006920
21           citympg    0.006640
17            stroke    0.005352
16         boreratio    0.004605
18  compressionratio    0.003700
10         carheight    0.003509
15        fuelsystem    0.002330
4            carbody    0.002135
5         drivewheel    0.001456
13    cylindernumber    0.001367
2         aspiration    0.001131
0          symboling    0.001130
12        enginetype    0.000839
3         doornumber    0.000525
1           fueltype    0.000081
6     enginelocation    0.000016
