In [None]:
import numpy as np

In [None]:
from sklearn.datasets import fetch_california_housing
cal_data = fetch_california_housing("~/data/sklearn_datasets/")

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /root/data/sklearn_datasets/


**1**

In [None]:
cal_data

{'DESCR': '.. _california_housing_dataset:\n\nCalifornia Housing dataset\n--------------------------\n\n**Data Set Characteristics:**\n\n    :Number of Instances: 20640\n\n    :Number of Attributes: 8 numeric, predictive attributes and the target\n\n    :Attribute Information:\n        - MedInc        median income in block\n        - HouseAge      median house age in block\n        - AveRooms      average number of rooms\n        - AveBedrms     average number of bedrooms\n        - Population    block population\n        - AveOccup      average house occupancy\n        - Latitude      house block latitude\n        - Longitude     house block longitude\n\n    :Missing Attribute Values: None\n\nThis dataset was obtained from the StatLib repository.\nhttp://lib.stat.cmu.edu/datasets/\n\nThe target variable is the median house value for California districts.\n\nThis dataset was derived from the 1990 U.S. census, using one row per census\nblock group. A block group is the smallest geograp

In [None]:
data = cal_data['data']
y = cal_data['target']

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X=scaler.fit_transform(data)

**2**

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(X_train, y_train)
print(f'intercept={reg.intercept_:.5f}')
for i in range(len(reg.coef_)):
  print(f'w{i+1}={reg.coef_[i]:.5f}')

intercept=3.67668
w1=6.40956
w2=0.49202
w3=-16.84340
w4=26.04256
w5=-0.01272
w6=-3.75773
w7=-3.98365
w8=-4.39237


In [None]:
X_train_aug = np.concatenate([np.ones([np.shape(X_train)[0], 1]),X_train ], axis=1)

**3**

In [None]:
def gradfn(weights, X, y):
    n, m = np.shape(X)
    yhat = np.matmul(X, weights)
    error = yhat - y
    return np.matmul(np.transpose(X), error)/float(n)

In [None]:
def solve_via_gradient_descent(X, y, print_every=100000,
                               niter=500000, eta=0.005):
    n, m = np.shape(X)
    # initialize all the weights to random values
    w = np.random.rand(m)
    for k in range(niter):
        dw = gradfn(w, X, y)
        w = w - eta*dw
        if k % print_every == 0:
            print (f'Weight after {k} iteration: {str(w)};  gradient: {str(dw)}')
    return w


In [None]:
w=solve_via_gradient_descent( X=X_train_aug, y=y_train)
print('\n')
for i in range(len(w)):
  print(f'w{i}={w[i]:.5f}')


Weight after 0 iteration: [0.63693395 0.68796717 0.17189942 0.62116875 0.15661067 0.51575117
 0.95431616 0.75332951 0.6675728 ];  gradient: [-0.56821907 -0.22736522 -0.33433638 -0.02074878 -0.01198127 -0.02175952
 -0.000676   -0.14237237 -0.26304206]
Weight after 100000 iteration: [ 2.62643499  5.7839958   0.68670524  0.67095016  0.43536803  0.55386986
  0.63956384 -2.80931448 -2.96181623];  gradient: [-2.82819954e-03  6.85067276e-04  3.59568818e-04 -6.28074980e-05
 -8.57639046e-04  1.71934089e-04  6.09145497e-04  2.83433919e-03
  3.25559179e-03]
Weight after 200000 iteration: [ 3.5196432   5.56101439  0.57089571  0.76532921  0.8819064   0.39916778
  0.353356   -3.70527785 -3.98833335];  gradient: [-0.00104509  0.00026551  0.00013896 -0.00025557 -0.00089894  0.00037
  0.00053998  0.00105375  0.00120238]
Weight after 300000 iteration: [ 3.85538923  5.47480376  0.52537865  0.89269003  1.31836609  0.22393965
  0.09650861 -4.04663532 -4.37630072];  gradient: [-4.04096704e-04  1.05244558e-0

**4**

In [None]:
def gradfn(weights, X, y):
    mini = 50
    mini, m = np.shape(X)
    yhat = np.matmul(X, weights)
    error = yhat - y
    return np.matmul(np.transpose(X), error)/float(mini)

In [None]:
def solve_via_gradient_descent(X, y, print_every=100000,
                               niter=500000, eta=0.005):
    mini = 50
    mini, m = np.shape(X)
    # initialize all the weights to random values
    w = np.random.rand(m)
    for k in range(niter):
        dw = gradfn(w, X, y)
        w = w - eta*dw
        if k % print_every == 0:
            print (f'Weight after {k} iteration: {str(w)};  gradient: {str(dw)}')
    return w


In [None]:
w=solve_via_gradient_descent( X=X_train_aug, y=y_train)
print('\n')
for i in range(len(w)):
  print(f'w{i}={w[i]:.5f}')

Weight after 0 iteration: [0.13722881 0.27971355 0.1256776  0.72893078 0.7551274  0.465159
 0.5125831  0.90019417 0.6324199 ];  gradient: [-1.14652533 -0.36907076 -0.64797445 -0.03962819 -0.02488289 -0.04500204
 -0.00190773 -0.32375765 -0.54436636]
Weight after 100000 iteration: [ 2.57267727  5.79805722  0.69589694  0.71475349  0.96895535  0.5635025
  0.22735626 -2.7732422  -2.91890876];  gradient: [-2.94540138e-03  7.07655920e-04  3.74266622e-04  8.01959984e-05
 -7.41732044e-04  1.23653772e-04  5.54436656e-04  2.94548716e-03
  3.38568800e-03]
Weight after 200000 iteration: [ 3.50120083  5.56869846  0.5754638   0.74898722  1.36529389  0.42294814
 -0.03231289 -3.70156967 -3.98350276];  gradient: [-0.00108305  0.00027092  0.00014433 -0.00015396 -0.00081164  0.00035602
  0.00048858  0.00108623  0.00124103]
Weight after 300000 iteration: [ 3.84803859  5.4815016   0.5282177   0.83220068  1.76300191  0.25106174
 -0.2643013  -4.05150411 -4.38185688];  gradient: [-4.15310383e-04  1.04924044e-0

**5**

In [None]:
def linreg(w,b, X):
  return np.matmul(X,w) +b

In [None]:
intercept=3.676
w1=6.40956
w2=0.49202
w3=-16.84340
w4=26.04256
w5=-0.01272
w6=-3.75773
w7=-3.98365
w8=-4.39237
w=np.array([w1,w2,w3,w4,w5,w6,w7,w8]).T
b=intercept

y_pred = linreg(w,b,X_test)
print(y_pred)

[0.72558873 1.77052077 2.70482166 ... 2.18244031 1.57046107 1.42783734]


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

print(f'MAE = {mean_absolute_error(y_pred,y_test):.3f}')
MSE= mean_squared_error(y_pred,y_test)
print(f'MSE = {MSE:.3f}')
print(f'RMSE= {np.sqrt(MSE):.3f}')

MAE = 0.532
MSE = 0.531
RMSE= 0.729


In [None]:
y_pred=reg.predict(X_test)
print(y_pred)

[0.72626999 1.77120249 2.70550382 ... 2.18312266 1.57114216 1.42851936]


In [None]:
print(f'MAE = {mean_absolute_error(y_pred,y_test):.3f}')
MSE= mean_squared_error(y_pred,y_test)
print(f'MSE = {MSE:.3f}')
print(f'RMS E= {np.sqrt(MSE):.3f}')

MAE = 0.532
MSE = 0.531
RMS E= 0.729
