In [3]:
import numpy as np
from sklearn.datasets import fetch_california_housing

In [4]:
fetch_california_housing()

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

In [5]:
housing = fetch_california_housing()
X = housing['data']
y = housing['target']

In [6]:
y.shape

(20640,)

In [7]:
def heaviside(x):
    return np.where(x > 0, 1, 0)

In [8]:
def perceptron(input):
    w = np.random.randn(input.shape[1])
    b = np.random.randn()
    y = input @ w + b
    return heaviside(y)

In [9]:
X_n = np.random.randn(100, 5)
arr = perceptron(X_n)
ones = arr.sum()
print(ones)
print(len(arr) - ones)

52
48


In [10]:
def perceptrons(input, output_size):
    w = np.random.randn(input.shape[1], output_size) #(8, 3)
    b = np.random.randn(output_size) #(3,)
    y = input @ w + b # (2000, 8) @ (8, 3) + (3, ) = (2000, 3)
    return heaviside(y)

In [11]:
X_in = np.random.randn(2000, 8)
perceptrons(X_in, 3).shape

(2000, 3)

In [12]:
class Perceptrons:
    def __init__(self, perceptrons_num):
        self.perceptrons_num = perceptrons_num
        self.w = None
        self.b = None
    
    def fit(self, X):
        n_samples, n_features = X.shape
        self.w = np.random.randn(n_features, self.perceptrons_num)
        self.b = np.random.rand(self.perceptrons_num)

    def _heaviside(self, x):
        return (x > 0).astype(int)
    
    def predict(self):
        y = X @ self.w + self.b
        return self._heaviside(y)
    
    @property
    def parameters_size(self):
        return (self.perceptrons_num + 1) * self.w.shape[0]

In [13]:
perceptrons5d = Perceptrons(5)
perceptrons5d.fit(X)
print(perceptrons5d.predict().shape)
print(f'Parameters = {perceptrons5d.parameters_size}')

(20640, 5)
Parameters = 48


In [31]:
class MLP:
    def __init__(self, input_layer: int, hidden_layers: list[int] = [], output_layer: int = 1, lr = 0.01, epoches = 500):
        self.input_layer = input_layer
        self.hidden_layers = hidden_layers
        self.output_layer = output_layer
        self.layer_sizes = [self.input_layer] + self.hidden_layers + [self.output_layer]
        self.Ws = []
        self.bs = []
        self.epoches = epoches
        self.lr = lr

    def fit(self, X, Y):
        for local_in, local_out in zip(self.layer_sizes[:-1], self.layer_sizes[1:]):
            w = np.random.randn(local_in, local_out) * np.sqrt(2 / local_in)
            b = np.random.randn(local_out)
            self.Ws.append(w)
            self.bs.append(b)
        n = X.shape[0]
        for _ in range(self.epoches):
            As = [X] # until activation function
            Zs = [] # after activation function
            for i in range(len(self.bs)):
                Z = As[-1] @ self.Ws[i] + self.bs[i]
                Zs.append(Z)
                if i < len(self.bs) - 1:
                    A = self._ReLu(Z)
                else:
                    A = Z
                As.append(A)
            dZ = (1 / n) * (Zs[-1] - Y)
            for i in reversed(range(len(self.bs))):
                dw = As[i].T @ dZ
                db = np.sum(dZ, axis = 0)
                self.Ws[i] -= self.lr * dw
                self.bs[i] -= self.lr * db
                if i > 0:
                    dZ = (dZ @ self.Ws[i].T) * self._ReLu_deriv(Zs[i-1])

    def _ReLu(self, x):
        return np.maximum(0, x)
    
    def _ReLu_deriv(self, x):
        return (x > 0).astype(float)
    
    def predict(self, X):
        h_in = X
        i = 0
        for w, b in zip(self.Ws, self.bs):
            h_out = h_in @ w + b
            i += 1
            if i < len(self.bs):
                h_out = self._ReLu(h_out)
            h_in = h_out
            print(h_out.shape)
        return h_out 

In [15]:
from sklearn.preprocessing import StandardScaler
scaling = StandardScaler()
X_scaled = scaling.fit_transform(X)
print(X_scaled[:3])

[[ 2.34476576  0.98214266  0.62855945 -0.15375759 -0.9744286  -0.04959654
   1.05254828 -1.32783522]
 [ 2.33223796 -0.60701891  0.32704136 -0.26333577  0.86143887 -0.09251223
   1.04318455 -1.32284391]
 [ 1.7826994   1.85618152  1.15562047 -0.04901636 -0.82077735 -0.02584253
   1.03850269 -1.33282653]]


In [28]:
y.shape
y = y.reshape(-1, 1)
y.shape

(20640, 1)

In [32]:
model_mlp = MLP(8, [6, 3], 1)
model_mlp.fit(X_scaled[:2000], y[:2000])

In [33]:
from sklearn.metrics import r2_score, mean_squared_error
y_pred = model_mlp.predict(X_scaled[:2000])
print(f'r2_score: {r2_score(y[:2000], y_pred)}')
print(f'MSE: {mean_squared_error(y[:2000], y_pred)}')

(2000, 6)
(2000, 3)
(2000, 1)
r2_score: 0.6373558187683056
MSE: 0.3367980551450935


In [16]:
y.shape

(20640,)

In [17]:
X_scaled.shape

(20640, 8)

In [18]:
mlp = MLP(input_layer = 8, output_layer = 1)
mlp.fit(X, y)
y_pred = mlp.predict(X)

Ws = [array([[-1.52310299],
       [-0.20953575],
       [ 1.22734701],
       [ 0.70598892],
       [ 0.65412998],
       [ 0.12346728],
       [-0.64285539],
       [ 0.61657689]])]
bs = [array([1.29504565])]
(20640, 1)


In [19]:
mlp = MLP(input_layer = 8, hidden_layers = [4, 10], output_layer = 6)
mlp.fit(X_in, y)
mlp.predict(X_in).shape

Ws = [array([[ 1.40326571,  0.50554501, -1.87701178,  0.93666589],
       [ 0.41736527,  0.05219873, -1.06870999, -1.63990642],
       [-1.05639149,  0.59305028,  0.50430289,  1.71984919],
       [ 0.65310165,  1.63619018,  1.17064483,  0.70700291],
       [ 0.30723428,  0.71269161,  0.83045162, -0.45735044],
       [-0.0464326 ,  0.87430256,  0.30678673, -0.92831784],
       [-1.37795019, -0.06446397,  1.75555416,  0.64223653],
       [ 0.85541481, -0.52199255, -0.82014052,  0.08370822]]), array([[ 0.50015358, -1.63758277, -0.23128666, -1.72670454,  0.41970393,
        -2.21064535,  0.54921128,  1.72270729, -1.45909151, -0.70895422],
       [-0.96525813, -0.26855417, -0.15619839, -0.50029562,  0.0278765 ,
        -2.14144041,  1.03518018, -0.6906321 ,  0.48415674,  1.53435614],
       [-2.97294984,  1.47238013,  0.00769668,  0.51574753,  0.69402399,
        -0.81228523,  1.23609861,  1.16360618,  0.4979128 ,  0.87776851],
       [-1.15267521, -0.13201561,  0.02549828, -1.07386323, -0.

(2000, 6)

In [20]:
for i in mlp.Ws:
    print(i.shape)

(8, 4)
(4, 10)
(10, 6)


In [21]:
for i in mlp.bs:
    print(i.shape)

(4,)
(10,)
(6,)


In [22]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


pipe = Pipeline([
    ('std', StandardScaler()),
    ('model', MLPRegressor(max_iter = 500)) 
])
pipe.fit(X, y)
y_pred = pipe.predict(X)

print(f'r2_score: {r2_score(y, y_pred)}')
print(f'MSE: {mean_squared_error(y, y_pred)}')

r2_score: 0.8005470023052399
MSE: 0.26558169893267397
