In [14]:
import pandas as pd

# Load the dataset
file_path = 'Housing.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

# Check for any missing values
print(data.isnull().sum())

# Display dataset information
print(data.info())


      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  
price               0
area                0
bedrooms            0
bathrooms           0
stories    

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Assuming the target variable is 'price' (modify if different)
target_column = 'price'

# Separate features and target
X = data.drop(columns=[target_column])
y = data[target_column]

# One-hot encode categorical features (if any)
X = pd.get_dummies(X, drop_first=True)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (optional, but helpful for Ridge Regression)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [16]:
print(X)

     area  bedrooms  bathrooms  stories  parking  mainroad_yes  guestroom_yes  \
0    7420         4          2        3        2          True          False   
1    8960         4          4        4        3          True          False   
2    9960         3          2        2        2          True          False   
3    7500         4          2        2        3          True          False   
4    7420         4          1        2        2          True           True   
..    ...       ...        ...      ...      ...           ...            ...   
540  3000         2          1        1        2          True          False   
541  2400         3          1        1        0         False          False   
542  3620         2          1        1        0          True          False   
543  2910         3          1        1        0         False          False   
544  3850         3          1        2        0          True          False   

     basement_yes  hotwater

In [17]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize and train the model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Predictions
y_pred_linear = linear_model.predict(X_test)

# Evaluation
mse_linear = mean_squared_error(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)

print("Linear Regression Results:")
print(f"Mean Squared Error: {mse_linear}")
print(f"R^2 Score: {r2_linear}")


Linear Regression Results:
Mean Squared Error: 1754318687330.668
R^2 Score: 0.6529242642153176


In [18]:
from sklearn.linear_model import Ridge

# Initialize and train the model with a regularization parameter alpha
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)

# Predictions
y_pred_ridge = ridge_model.predict(X_test)

# Evaluation
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print("Ridge Regression Results:")
print(f"Mean Squared Error: {mse_ridge}")
print(f"R^2 Score: {r2_ridge}")


Ridge Regression Results:
Mean Squared Error: 1754839327446.8064
R^2 Score: 0.6528212603810126


In [19]:
from sklearn.tree import DecisionTreeRegressor

# Initialize and train the model
tree_model = DecisionTreeRegressor(max_depth=5, random_state=42)
tree_model.fit(X_train, y_train)

# Predictions
y_pred_tree = tree_model.predict(X_test)

# Evaluation
mse_tree = mean_squared_error(y_test, y_pred_tree)
r2_tree = r2_score(y_test, y_pred_tree)

print("Decision Tree Regression Results:")
print(f"Mean Squared Error: {mse_tree}")
print(f"R^2 Score: {r2_tree}")


Decision Tree Regression Results:
Mean Squared Error: 2701167171509.852
R^2 Score: 0.46559904406211106


In [20]:
print("Model Performance Comparison:")
print(f"Linear Regression - MSE: {mse_linear}, R^2: {r2_linear}")
print(f"Ridge Regression - MSE: {mse_ridge}, R^2: {r2_ridge}")
print(f"Decision Tree Regression - MSE: {mse_tree}, R^2: {r2_tree}")


Model Performance Comparison:
Linear Regression - MSE: 1754318687330.668, R^2: 0.6529242642153176
Ridge Regression - MSE: 1754839327446.8064, R^2: 0.6528212603810126
Decision Tree Regression - MSE: 2701167171509.852, R^2: 0.46559904406211106


In [22]:
import numpy as np
class LinearRegressionScratch:
    def fit(self, X, y):
        # Use the Normal Equation: w = (X^T * X)^-1 * X^T * y
        X_transpose = X.T
        self.weights = np.linalg.inv(X_transpose.dot(X)).dot(X_transpose).dot(y)
        
    def predict(self, X):
        return X.dot(self.weights)

# Train and evaluate the model
linear_model = LinearRegressionScratch()
linear_model.fit(X_train, y_train)

# Predictions
y_pred_linear = linear_model.predict(X_test)

# Calculate MSE
mse_linear = np.mean((y_pred_linear - y_test) ** 2)
print(f"Linear Regression MSE: {mse_linear}")


Linear Regression MSE: 25280545834691.613


In [23]:
class RidgeRegressionScratch:
    def __init__(self, alpha=1.0):
        self.alpha = alpha

    def fit(self, X, y):
        # Use the modified Normal Equation: w = (X^T * X + alpha * I)^-1 * X^T * y
        X_transpose = X.T
        identity_matrix = np.eye(X.shape[1])
        self.weights = np.linalg.inv(X_transpose.dot(X) + self.alpha * identity_matrix).dot(X_transpose).dot(y)

    def predict(self, X):
        return X.dot(self.weights)

# Train and evaluate the Ridge Regression model
ridge_model = RidgeRegressionScratch(alpha=1.0)
ridge_model.fit(X_train, y_train)

# Predictions
y_pred_ridge = ridge_model.predict(X_test)

# Calculate MSE
mse_ridge = np.mean((y_pred_ridge - y_test) ** 2)
print(f"Ridge Regression MSE: {mse_ridge}")


Ridge Regression MSE: 25283175553767.258
