## Regression

In [357]:
import numpy as np
import pandas as pd

In [358]:
np.random.seed(42)
n_samples = 1000

bedrooms = np.random.randint(1, 6, n_samples)
bathrooms = np.random.randint(1, 4, n_samples)
square_footage = np.random.randint(500, 4500, n_samples)
house_age = np.random.randint(0, 100, n_samples)

noise = np.random.normal(0, 10000, n_samples)
house_price = 50000 + (bedrooms * 5000) + (bathrooms * 10000) + \
    (square_footage * 50) - (house_age * 200) + noise

data = pd.DataFrame({
    'Bedrooms': bedrooms,
    'Bathrooms': bathrooms,
    'Square_Footage': square_footage,
    'House_Age': house_age,
    'House_Price': house_price
})

data

Unnamed: 0,Bedrooms,Bathrooms,Square_Footage,House_Age,House_Price
0,4,1,3716,77,244138.099678
1,5,1,4109,74,275803.789814
2,3,3,4219,8,313900.963794
3,5,2,802,89,106971.307714
4,5,1,3291,15,237965.500596
...,...,...,...,...,...
995,2,3,1570,81,134430.542687
996,1,2,2510,97,192673.463117
997,1,2,898,74,117965.973694
998,4,2,2233,75,184994.718486


In [359]:
from sklearn.linear_model import LinearRegression,Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

In [360]:
data

Unnamed: 0,Bedrooms,Bathrooms,Square_Footage,House_Age,House_Price
0,4,1,3716,77,244138.099678
1,5,1,4109,74,275803.789814
2,3,3,4219,8,313900.963794
3,5,2,802,89,106971.307714
4,5,1,3291,15,237965.500596
...,...,...,...,...,...
995,2,3,1570,81,134430.542687
996,1,2,2510,97,192673.463117
997,1,2,898,74,117965.973694
998,4,2,2233,75,184994.718486


In [361]:
X,y = data.drop('House_Price',axis=1),data['House_Price']


In [362]:
X = (X - X.mean(axis=0)) / X.std(axis=0)

In [363]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [364]:
lr_model = LinearRegression()
ridge = Ridge(alpha=0.3)
decision_tree = DecisionTreeRegressor()

In [365]:
lr_model.fit(X_train,y_train)
ridge.fit(X_train,y_train)
decision_tree.fit(X_train,y_train)

In [366]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [367]:
models = [lr_model,ridge,decision_tree]

In [368]:
def evaluate(model):
    y_pred = model.predict(X_test)
    print(type(model).__name__)
    print(mean_absolute_error(y_pred,y_test))
    print(mean_squared_error(y_pred,y_test))
    print(r2_score(y_pred,y_test))

In [369]:
for model in models:
    evaluate(model)

LinearRegression
7494.233207091648
84437912.4269087
0.977328560441844
Ridge
7492.804182859481
84420120.70139675
0.9773154168845644
DecisionTreeRegressor
12625.223956873422
240444184.42687598
0.9348837441191594


In [373]:
def compute_cost(X, y, W, b):
    y_pred = np.dot(X, W) + b
    n = X.shape[0]
    cost = np.sum((y - y_pred) ** 2) / n
    return np.sqrt(cost)


def gradient_descent(X, y, W, b):
    learning_rate = 0.001
    prev_cost = 0
    for i in range(1000):
        cost = compute_cost(X, y, W, b)
        print(i,cost)
        tolerance = 10
        if abs(cost - prev_cost) < tolerance:
            break
        prev_cost = cost
        y_pred = np.dot(X, W) + b
        error = y - y_pred
        n = X.shape[0]
        dw = -np.dot(X.T, error) / n + (W / n)
        db = -np.sum(error)
        W -= learning_rate * dw
        b -= learning_rate * db

    W -= learning_rate * dw
    b -= learning_rate * db
    return W, b


W = np.zeros(shape=(X.shape[1]))
b = 3
w, b = gradient_descent(X_train, y_train, W, b)

0 210529.3024647836
1 71174.63141541819
2 59071.31339159454
3 58482.61878519438
4 58406.44304021484
5 58350.94091479799
6 58296.31929603303
7 58241.78551458509
8 58187.307783473996
9 58132.88477940331
10 58078.51639880074
11 58024.202586979656
12 57969.94329126217
13 57915.73845910219
14 57861.58803801023
15 57807.49197555037
16 57753.45021934005
17 57699.46271705001
18 57645.52941640425
19 57591.65026517999
20 57537.825211207564
21 57484.054202370426
22 57430.33718660506
23 57376.674111900946
24 57323.06492630047
25 57269.50957789892
26 57216.00801484441
27 57162.56018533781
28 57109.166037632705
29 57055.825520035374
30 57002.53858090465
31 56949.30516865199
32 56896.125231741295
33 56842.998718688956
34 56789.92557806374
35 56736.905758486755
36 56683.93920863142
37 56631.02587722338
38 56578.165713040435
39 56525.35866491257
40 56472.60468172181
41 56419.903712402214
42 56367.25570593981
43 56314.66061137254
44 56262.11837779025
45 56209.62895433455
46 56157.192290198815
47 56104.8