In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

housing_data=pd.read_csv("Housing.csv")

# Step 1: Preprocess the dataset

# One-hot encode categorical variables
categorical_columns = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 
                       'airconditioning', 'prefarea', 'furnishingstatus']
housing_data_encoded = pd.get_dummies(housing_data, columns=categorical_columns, drop_first=True)

# Separate features (X) and target variable (y)
X = housing_data_encoded.drop(columns=['price'])
y = housing_data_encoded['price']

# Standardize numerical features for models like Ridge, Lasso, and SVR
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets (80%-20%)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Inspect shapes of train/test sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((436, 13), (109, 13), (436,), (109,))

In [12]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Dictionary to store model performance
performance = {}

# Function to evaluate and store model performance
def evaluate_model(model_name, y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    performance[model_name] = {'MAE': mae, 'MSE': mse, 'R2 Score': r2}

# 1. Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
evaluate_model("Linear Regression", y_test, y_pred_linear)

# 2. Ridge Regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_test)
evaluate_model("Ridge Regression", y_test, y_pred_ridge)

# 3. Lasso Regression
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)
y_pred_lasso = lasso_model.predict(X_test)
evaluate_model("Lasso Regression", y_test, y_pred_lasso)

# 4. Polynomial Regression (degree 2)
poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
y_pred_poly = poly_model.predict(X_test_poly)
evaluate_model("Polynomial Regression", y_test, y_pred_poly)

# 5. Decision Tree Regression
tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)
evaluate_model("Decision Tree Regression", y_test, y_pred_tree)

# 6. Random Forest Regression
forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
forest_model.fit(X_train, y_train)
y_pred_forest = forest_model.predict(X_test)
evaluate_model("Random Forest Regression", y_test, y_pred_forest)

# 7. Support Vector Regression (SVR)
svr_model = SVR(kernel='rbf', C=100, epsilon=0.1)
svr_model.fit(X_train, y_train)
y_pred_svr = svr_model.predict(X_test)
evaluate_model("Support Vector Regression", y_test, y_pred_svr)

# Display the performance of all models
performance_df = pd.DataFrame(performance).T
#performance_df.sort_values(by="R2 Score", ascending=False)
print(performance_df)

                                    MAE           MSE      R2 Score
Linear Regression          9.700434e+05  1.754319e+12  6.529243e-01
Ridge Regression           9.698183e+05  1.754769e+12  6.528352e-01
Lasso Regression           9.700434e+05  1.754319e+12  6.529242e-01
Polynomial Regression      7.295525e+16  2.900745e+35 -5.738857e+22
Decision Tree Regression   1.195266e+06  2.642803e+12  4.771459e-01
Random Forest Regression   1.017471e+06  1.959406e+12  6.123496e-01
Support Vector Regression  1.762567e+06  5.563030e+12 -1.005941e-01
