## Regression

In [160]:
import numpy as np
import pandas as pd

In [161]:
np.random.seed(42)
n_samples = 1000

bedrooms = np.random.randint(1, 6, n_samples)
bathrooms = np.random.randint(1, 4, n_samples)
square_footage = np.random.randint(500, 4500, n_samples)
house_age = np.random.randint(0, 100, n_samples)

noise = np.random.normal(0, 10000, n_samples)
house_price = 50000 + (bedrooms * 5000) + (bathrooms * 10000) + \
    (square_footage * 50) - (house_age * 200) + noise

data = pd.DataFrame({
    'Bedrooms': bedrooms,
    'Bathrooms': bathrooms,
    'Square_Footage': square_footage,
    'House_Age': house_age,
    'House_Price': house_price
})

data

Unnamed: 0,Bedrooms,Bathrooms,Square_Footage,House_Age,House_Price
0,4,1,3716,77,244138.099678
1,5,1,4109,74,275803.789814
2,3,3,4219,8,313900.963794
3,5,2,802,89,106971.307714
4,5,1,3291,15,237965.500596
...,...,...,...,...,...
995,2,3,1570,81,134430.542687
996,1,2,2510,97,192673.463117
997,1,2,898,74,117965.973694
998,4,2,2233,75,184994.718486


In [162]:
from sklearn.linear_model import LinearRegression,Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

In [163]:
data

Unnamed: 0,Bedrooms,Bathrooms,Square_Footage,House_Age,House_Price
0,4,1,3716,77,244138.099678
1,5,1,4109,74,275803.789814
2,3,3,4219,8,313900.963794
3,5,2,802,89,106971.307714
4,5,1,3291,15,237965.500596
...,...,...,...,...,...
995,2,3,1570,81,134430.542687
996,1,2,2510,97,192673.463117
997,1,2,898,74,117965.973694
998,4,2,2233,75,184994.718486


In [164]:
X,y = data.drop('House_Price',axis=1),data['House_Price']


In [165]:
X = (X - X.mean(axis=0)) / X.std(axis=0)

In [166]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [167]:
lr_model = LinearRegression()
ridge = Ridge(alpha=0.3)
decision_tree = DecisionTreeRegressor()

In [168]:
lr_model.fit(X_train,y_train)
ridge.fit(X_train,y_train)
decision_tree.fit(X_train,y_train)

In [169]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [170]:
models = [lr_model,ridge,decision_tree]

In [171]:
def evaluate(model):
    y_pred = model.predict(X_test)
    print(type(model).__name__)
    print(mean_absolute_error(y_pred,y_test))
    print(mean_squared_error(y_pred,y_test))
    print(r2_score(y_pred,y_test))

In [172]:
for model in models:
    evaluate(model)

LinearRegression
7494.233207091648
84437912.4269087
0.977328560441844
Ridge
7492.804182859481
84420120.70139675
0.9773154168845644
DecisionTreeRegressor
12625.223956873422
240444184.42687598
0.9348837441191594


In [173]:
def compute_cost(X, y, W, b):
    y_pred = np.dot(X, W) + b
    n = X.shape[0]
    cost = np.sum((y - y_pred) ** 2) / n
    return cost


def gradient_descent(X, y, W, b):
    learning_rate = 0.001
    prev_cost = 0
    for _ in range(100):
        cost = compute_cost(X, y, W, b)
        print(cost)
        tolerance = 1e-6
        if abs(cost - prev_cost) < tolerance:
            break
        cost = prev_cost
        y_pred = np.dot(X, W) + b
        error = y - y_pred
        n = X.shape[0]
        dw = -np.dot(X.T, error) / n
        db = -np.sum(error) / n
        W -= learning_rate * dw
        b -= learning_rate * db

    W -= learning_rate * dw
    b -= learning_rate * db
    return W, b


W = np.zeros(shape=(X.shape[1]))
b = 3
w, b = gradient_descent(X_train, y_train, W, b)

44322587196.30834
44233037554.89996
44143669315.537834
44054482110.603615
43965475573.22422
43876649337.27033
43788003037.354935
43699536308.83179
43611248787.793884
43523140111.07201
43435209916.233215
43347457841.579315
43259883526.14542
43172486609.698456
43085266732.73564
42998223536.48304
42911356662.89407
42824665754.64804
42738150455.148636
42651810408.52251
42565645259.61779
42479654654.00261
42393838237.963646
42308195658.50467
42222726563.34508
42137430600.91849
42052307420.37122
41967356671.56092
41882578005.055046
41797971072.1295
41713535524.767136
41629271015.656364
41545177198.18969
41461253726.4623
41377500255.270645
41293916440.111015
41210501937.17809
41127256403.36359
41044179496.25475
40961270874.13304
40878530195.97269
40795957121.439255
40713551310.88828
40631312425.36386
40549240126.597244
40467334077.00546
40385593939.68991
40304019378.43498
40222610057.70669
40141365642.65124
40060285799.09369
39979370193.53658
39898618493.15855
39818030365.81291
39737605480.02