In [79]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [125]:
class Linear_Regression():

  #initiating the parameters (learning rate & no of iterations)
  def __init__(self, learning_rate, no_of_iterations):
     self.learning_rate = learning_rate
     self.no_of_iterations = no_of_iterations

  def fit(self, x, y):

    # no of training examples & no of features

    self.m, self.n = x.shape # no of rows & columns

    # initiating the weight and bias

    self.w = np.zeros(self.n)
    self.b = 0
    self.x = x
    self.y = y

    #implementing gradient descent

    for i in range(self.no_of_iterations):
      self.update_weights()

  def update_weights(self):
    y_prediction = self.predict(self.x)

    #calculate gradients

    dw = -(2 * (self.x.T).dot(self.y - y_prediction))/ self.m

    db = -2*np.sum(self.y - y_prediction)/self.m

    #updating the weights
    self.w = self.w - self.learning_rate*dw
    self.b = self.b - self.learning_rate*db


  def predict(self, x):
    return x.dot(self.w) + self.b


Data Pre-Processing

In [126]:
housing_df = pd.read_csv('/content/Housing.csv')

In [127]:
housing_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [128]:
housing_df.tail()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished
544,1750000,3850,3,1,2,yes,no,no,no,no,0,no,unfurnished


In [129]:
housing_df.shape

(545, 13)

In [130]:
housing_df.isnull().sum()

Unnamed: 0,0
price,0
area,0
bedrooms,0
bathrooms,0
stories,0
mainroad,0
guestroom,0
basement,0
hotwaterheating,0
airconditioning,0


In [131]:
mapping_yes_no = {"yes": 1, "no": 0}
mapping_furnishing = {"unfurnished": 0, "semi-furnished": 1, "furnished": 2}

# Apply mappings
for col in ["mainroad", "guestroom", "basement", "hotwaterheating", "airconditioning", "prefarea"]:
    housing_df[col] = housing_df[col].map(mapping_yes_no)

housing_df["furnishingstatus"] = housing_df["furnishingstatus"].map(mapping_furnishing)
print(housing_df)

        price  area  bedrooms  bathrooms  stories  mainroad  guestroom  \
0    13300000  7420         4          2        3         1          0   
1    12250000  8960         4          4        4         1          0   
2    12250000  9960         3          2        2         1          0   
3    12215000  7500         4          2        2         1          0   
4    11410000  7420         4          1        2         1          1   
..        ...   ...       ...        ...      ...       ...        ...   
540   1820000  3000         2          1        1         1          0   
541   1767150  2400         3          1        1         0          0   
542   1750000  3620         2          1        1         1          0   
543   1750000  2910         3          1        1         0          0   
544   1750000  3850         3          1        2         1          0   

     basement  hotwaterheating  airconditioning  parking  prefarea  \
0           0                0           

In [132]:
x = housing_df.iloc[:, 1:].values
y = housing_df.iloc[:, 0].values

In [133]:
print(x)
print(y)

[[7420    4    2 ...    2    1    2]
 [8960    4    4 ...    3    0    2]
 [9960    3    2 ...    2    1    1]
 ...
 [3620    2    1 ...    0    0    0]
 [2910    3    1 ...    0    0    2]
 [3850    3    1 ...    0    0    0]]
[13300000 12250000 12250000 12215000 11410000 10850000 10150000 10150000
  9870000  9800000  9800000  9681000  9310000  9240000  9240000  9100000
  9100000  8960000  8890000  8855000  8750000  8680000  8645000  8645000
  8575000  8540000  8463000  8400000  8400000  8400000  8400000  8400000
  8295000  8190000  8120000  8080940  8043000  7980000  7962500  7910000
  7875000  7840000  7700000  7700000  7560000  7560000  7525000  7490000
  7455000  7420000  7420000  7420000  7350000  7350000  7350000  7350000
  7343000  7245000  7210000  7210000  7140000  7070000  7070000  7035000
  7000000  6930000  6930000  6895000  6860000  6790000  6790000  6755000
  6720000  6685000  6650000  6650000  6650000  6650000  6650000  6650000
  6629000  6615000  6615000  6580000  6510

Standardizing and Splitting the dataset into training and test data

In [134]:
scaler = StandardScaler()
scaler.fit(x)
standardized_data = scaler.transform(x)
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()
x = standardized_data
x_train, x_test, y_train, y_test = train_test_split(x, y , test_size=0.3, random_state= 2)

In [139]:
print(x)
print(y)

[[ 1.04672629  1.40341936  1.42181174 ...  1.51769249  1.80494113
   1.40628573]
 [ 1.75700953  1.40341936  5.40580863 ...  2.67940935 -0.55403469
   1.40628573]
 [ 2.21823241  0.04727831  1.42181174 ...  1.51769249  1.80494113
   0.09166185]
 ...
 [-0.70592066 -1.30886273 -0.57018671 ... -0.80574124 -0.55403469
  -1.22296203]
 [-1.03338891  0.04727831 -0.57018671 ... -0.80574124 -0.55403469
   1.40628573]
 [-0.5998394   0.04727831 -0.57018671 ... -0.80574124 -0.55403469
  -1.22296203]]
[ 4.56636513e+00  4.00448405e+00  4.00448405e+00  3.98575468e+00
  3.55497918e+00  3.25530927e+00  2.88072189e+00  2.88072189e+00
  2.73088693e+00  2.69342819e+00  2.69342819e+00  2.62974834e+00
  2.43121702e+00  2.39375829e+00  2.39375829e+00  2.31884081e+00
  2.31884081e+00  2.24392333e+00  2.20646459e+00  2.18773522e+00
  2.13154711e+00  2.09408838e+00  2.07535901e+00  2.07535901e+00
  2.03790027e+00  2.01917090e+00  1.97796629e+00  1.94425342e+00
  1.94425342e+00  1.94425342e+00  1.94425342e+00  1.9

Training the Linear Regression Model

In [195]:
model = Linear_Regression(learning_rate= 0.0002, no_of_iterations= 10000)
model.fit(x_train, y_train)

In [196]:
print("weights = ", model.w)
print("bias = ", model.b)

weights =  [0.26006805 0.04512236 0.25846569 0.18974275 0.06535929 0.07776211
 0.09487937 0.08855128 0.22956149 0.13350931 0.1498135  0.10661043]
bias =  -0.013144403871194265


In [197]:
# Make predictions on the training and test data
y_train_pred = model.predict(x_train)
y_test_pred = model.predict(x_test)

# Calculate MSE and R² for training data
train_mse = mean_squared_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)

# Calculate MSE and R² for test data
test_mse = mean_squared_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)

# Print the results
print("📊 Training Performance:")
print("  ➤ MSE:", train_mse)
print("  ➤ R² Score:", train_r2)

print("\n📊 Test Performance:")
print("  ➤ MSE:", test_mse)
print("  ➤ R² Score:", test_r2)

📊 Training Performance:
  ➤ MSE: 0.30275026817997913
  ➤ R² Score: 0.6900694461178505

📊 Test Performance:
  ➤ MSE: 0.3670899098648018
  ➤ R² Score: 0.6512173923705897


## Model Performance Interpretation

### 🔹 **Training Performance**
- **MSE (Mean Squared Error):** `0.3027`
  - A lower MSE indicates that the model's predictions are relatively close to the actual values.
- **R² Score:** `0.6900`
  - The model explains **69.00%** of the variance in the training data.
  - This suggests a **moderately strong fit**—the model captures most patterns but has room for improvement.

### 🔹 **Test Performance**
- **MSE (Mean Squared Error):** `0.3670`
  - The slightly higher MSE compared to training indicates **some generalization error**.
- **R² Score:** `0.6512`
  - The model explains **65.12%** of the variance in the test data.
  - Since the test R² is close to the train R², the model **is not overfitting**, but it may still benefit from fine-tuning.

### **Overall Insights**
- The model performs **reasonably well**, generalizing to unseen data with minimal overfitting.