In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df= pd.read_csv("Boston.csv")

df.head()

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [3]:
df= df.drop(df.columns[0], axis=1)
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [4]:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

In [5]:
X= np.array(df.drop(columns= ['medv']))
y= np.array(df['medv'])

In [6]:
avg= np.mean(X, axis=0)
dev= np.std(X, axis=0)
X_rescaled= (X- avg)/dev
print(X[0])
X_rescaled[0]

[9.1780e-02 0.0000e+00 4.0500e+00 0.0000e+00 5.1000e-01 6.4160e+00
 8.4100e+01 2.6463e+00 5.0000e+00 2.9600e+02 1.6600e+01 3.9550e+02
 9.0400e+00]


array([-0.40983668, -0.48772236, -1.03402724, -0.27259857, -0.38609067,
        0.18715116,  0.55208139, -0.54607682, -0.52300145, -0.66660821,
       -0.85792914,  0.42570183, -0.50645674])

In [7]:
X.shape

(506, 13)

In [8]:
X_train= X_rescaled[:450]
y_train= y[:450]
X_test= X_rescaled[450:]
y_test= y[450:]

In [9]:
def cost(x,y,w,b, l1):
    m= x.shape[0]
    cost=0.0
    for i in range(m):
        cost+= ((np.dot(x[i],w)+b)- y[i])**2
    cost /=(2*m)
    l1_penalty = l1 * np.sum(np.abs(w))
    total= l1_penalty+ cost
    return total

In [10]:
def soft_threshold(w, threshold):
    return np.sign(w) * np.maximum(np.abs(w) - threshold, 0)

In [11]:
def gradient(x,y,w,b, l1):
    m, n = x.shape
    predictions = np.dot(x, w) + b
    errors = predictions - y
    dw = np.dot(x.T, errors) / m
    db = np.sum(errors) / m
    return dw, db

In [12]:
def descent(x,y,w,b, alpha, iterations, l1):
    cost_hist=[]
    w_tmp= w;
    b_tmp= b;

    for i in range(iterations):
        dw, db= gradient(x,y,w_tmp, b_tmp, l1)
        w_tmp= w_tmp- alpha* dw
        b_tmp= b_tmp- alpha* db
        w_tmp = soft_threshold(w_tmp, alpha * l1)
        if i<1000:
            cost_hist.append(cost(x,y,w_tmp, b_tmp,l1))
        if i%100==0:
            print(f"Iteration {i:4d}: Cost {cost_hist[-1]:8.2f}   ")
    return w_tmp, b_tmp, cost_hist

In [13]:
def r2_score(y_true, y_pred):
    ss_res= np.sum((y_true- y_pred)**2)
    ss_mean= np.sum((y_true- np.mean(y_true))**2)
    r2= 1- (ss_res/ ss_mean)
    return r2

In [14]:
def predict(x, w, b):
    y= np.dot(w,x)+b
    return y

In [15]:
m=X_train[0].shape
w_init= np.zeros(m)
b_init=0
alpha= 0.01
iterations= 1000
l1= 0.01
w_final, b_final, cost_track= descent(X_train, y_train, w_init, b_init, alpha, iterations, l1)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")

Iteration    0: Cost   290.12   
Iteration  100: Cost    46.96   
Iteration  200: Cost    16.57   
Iteration  300: Cost    12.33   
Iteration  400: Cost    11.62   
Iteration  500: Cost    11.44   
Iteration  600: Cost    11.34   
Iteration  700: Cost    11.28   
Iteration  800: Cost    11.24   
Iteration  900: Cost    11.21   
b,w found by gradient descent: 22.54,[-0.74096019  0.82509165 -0.13170728  0.58333036 -1.56264289  2.85020871
  0.05958049 -2.69407989  1.36369411 -0.94740803 -1.98356817  0.87980378
 -3.89494217] 


In [16]:
y_test_pred= predict(X_test.T, w_final, b_final)
r2_test= r2_score(y_test, y_test_pred)
r2_test

np.float64(0.6153917709774964)

In [17]:
y_train_pred= predict(X_train.T, w_final, b_final)
r2_train= r2_score(y_train, y_train_pred)
r2_train

np.float64(0.7475804755407753)