In [218]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import StandardScaler

In [219]:
df = pd.read_csv("Housing.csv")
df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [220]:
# Linear regression class

import numpy as np

class LinearRegression:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.weights = None
        self.bias = 0
        self.X = None
        self.y = None

    def fit(self, X, y):
        """Initialize model parameters and store training data"""
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        self.X = X
        self.y = y

    

    def mean_square_error(self, y_true, y_pred):
        """Calculate MSE loss"""
        return np.mean((y_true - y_pred) ** 2)

    def train(self, verbose=False):
        """Run gradient descent optimization"""
        if self.X is None or self.y is None:
            raise ValueError("Data not provided. Call fit() with training data first.")

        n_samples = self.X.shape[0]

        for epoch in range(self.epochs):
            # Forward pass
            y_pred = self.predict(self.X)
            error = y_pred - self.y

            # Compute gradients
            dw = (2 / n_samples) * np.dot(self.X.T, error)
            db = (2 / n_samples) * np.sum(error)

            # Update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            # Optional logging
            if verbose:
                loss = self.mean_square_error(self.y, y_pred)
                print(f"Epoch {epoch + 1}/{self.epochs}, Loss: {loss:.4f}")

        return self.weights, self.bias
    
    def predict(self, X):
        """Make predictions using current parameters"""
        if self.weights is None:
            raise ValueError("Model not initialized. Call fit() first.")
        return np.dot(X, self.weights) + self.bias

In [221]:
model = LinearRegression(lr = 0.001, epochs= 6000)

In [222]:
# Handle categorical columns with one-hot encoding
df_encoded = pd.get_dummies(df, drop_first=True)

# Split features and target
X = df_encoded.drop("price", axis=1).values  # Features
y = df_encoded["price"].values

In [223]:
X_scaled = StandardScaler().fit_transform(X)

In [224]:
model.fit(X_scaled, y)
model.train(verbose=True)

Epoch 1/6000, Loss: 26213832729318.5312
Epoch 2/6000, Loss: 26100629807937.7344
Epoch 3/6000, Loss: 25988006891891.7344
Epoch 4/6000, Loss: 25875960406126.1523
Epoch 5/6000, Loss: 25764486802250.0898
Epoch 6/6000, Loss: 25653582558307.7734
Epoch 7/6000, Loss: 25543244178552.1914
Epoch 8/6000, Loss: 25433468193220.9062
Epoch 9/6000, Loss: 25324251158313.9258
Epoch 10/6000, Loss: 25215589655373.5977
Epoch 11/6000, Loss: 25107480291266.5625
Epoch 12/6000, Loss: 24999919697967.7148
Epoch 13/6000, Loss: 24892904532346.1523
Epoch 14/6000, Loss: 24786431475953.1133
Epoch 15/6000, Loss: 24680497234811.8555
Epoch 16/6000, Loss: 24575098539209.4922
Epoch 17/6000, Loss: 24470232143490.7070
Epoch 18/6000, Loss: 24365894825853.4258
Epoch 19/6000, Loss: 24262083388146.3203
Epoch 20/6000, Loss: 24158794655668.2266
Epoch 21/6000, Loss: 24056025476969.3438
Epoch 22/6000, Loss: 23953772723654.3281
Epoch 23/6000, Loss: 23852033290187.1641
Epoch 24/6000, Loss: 23750804093697.8125
Epoch 25/6000, Loss: 2365

(array([ 528939.23745889,   85279.24777199,  495897.61449711,
         389934.06774728,  238547.96928898,  147054.0992509 ,
         115277.15366109,  166435.79905458,  179010.25132464,
         402290.6131005 ,  276336.06549895,  -22591.11692737,
        -192590.22963327]),
 4766700.309726651)

In [225]:
new_data_dict = {
    'area': [8960],
    'bedrooms': [3],
    'bathrooms': [2],
    'stories': [2],
    'mainroad': ['yes'],
    'guestroom': ['no'],
    'basement': ['yes'],
    'hotwaterheating': ['no'],
    'airconditioning': ['yes'],
    'parking': [1],
    'prefarea': ['yes'],
    'furnishingstatus': ['semi-furnished']
}
new_df = pd.DataFrame(new_data_dict)

# Apply one-hot encoding to match the training data structure
new_df_encoded = pd.get_dummies(new_df, drop_first=True)

# Ensure new data has the same columns as the training data, filling missing columns with 0
training_columns = df_encoded.drop("price", axis=1).columns
new_df_encoded = new_df_encoded.reindex(columns=training_columns, fill_value=0)

# Scale the new data using the previously fitted scaler
new_data_scaled = scaler.transform(new_df_encoded)

# Make prediction
prediction = model.predict(new_data_scaled)
print(f"Prediction: {prediction}")


Prediction: [5726827.70080388]


