Corolla



In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Load the dataset (ensure you have the correct path)
data_path = '/content/ToyotaCorolla (1).csv'
toyota_corolla_data = pd.read_csv(data_path, encoding='ISO-8859-1')

# Selecting features and target variable
features = ['Age_08_04', 'KM', 'Fuel_Type', 'HP', 'Met_Color', 'Automatic', 'cc', 'Doors', 'Quarterly_Tax', 'Weight']
target = 'Price'

# Encoding categorical variables and feature scaling
toyota_corolla_encoded = pd.get_dummies(toyota_corolla_data[features + [target]], drop_first=True)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(toyota_corolla_encoded.drop('Price', axis=1))
scaled_features_df = pd.DataFrame(scaled_features, columns=toyota_corolla_encoded.drop('Price', axis=1).columns)

# Splitting the data
X = scaled_features_df
y = toyota_corolla_encoded['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train models
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor(n_estimators=100)
}

# Dictionary to store R^2 values
r2_values = {}

# Train each model, make predictions, and calculate R^2 values
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2_values[name] = r2_score(y_test, y_pred)

# Display R^2 values
r2_values_df = pd.DataFrame(list(r2_values.items()), columns=['Model', 'R^2 Value']).sort_values(by='R^2 Value', ascending=False)
print(r2_values_df)


                     Model  R^2 Value
4  Random Forest Regressor   0.916851
3  Decision Tree Regressor   0.870289
2         Lasso Regression   0.835601
1         Ridge Regression   0.835519
0        Linear Regression   0.835293


The Random Forest Regressor performs the best with an
2
R
2
  value of approximately 0.917, indicating a strong predictive power on this dataset
