In [1]:
import numpy as np
from matplotlib import pyplot as plt
np.random.seed(42)

In [2]:
# different broadcast rules in numpy
shapes = [[(3,1),(3,4)],[(3,1),(1,4)],[(3,1),(3,1)],[(3,4),(3,1)],[(3,4),(1,4)],[(3,4),(3,4)]]
for aa,bb in shapes:
    a = np.random.randn(*aa)
    b = np.random.randn(*bb)
    print(a.shape,'[+-*/]',b.shape,'=',(a-b).shape)

(3, 1) [+-*/] (3, 4) = (3, 4)
(3, 1) [+-*/] (1, 4) = (3, 4)
(3, 1) [+-*/] (3, 1) = (3, 1)
(3, 4) [+-*/] (3, 1) = (3, 4)
(3, 4) [+-*/] (1, 4) = (3, 4)
(3, 4) [+-*/] (3, 4) = (3, 4)


In [98]:
w1 = np.random.rand(100).reshape(1,-1)
w2 = np.random.rand(100).reshape(-1,1)
np.dot(w1,w2)
w1.shape,w2.shape

((1, 100), (100, 1))

In [33]:
# build a simple logistic regression from scratch and use gradient descent

class LR:
    def __init__(self,inp_size=2) -> None:
        # z = x1 * w1 + x2 * w2 + b
        self.w = np.random.rand(inp_size,1) #  left features X right features[<>]
        self.b = np.random.rand(1,1)
    
    def fpass(self,x):
        z = x @ self.w + self.b
        a = self.sigmoid(z)
        return a
    
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))

    def BCE(self,target,predicted):
        return -(target*np.log(predicted) + (1-target)*np.log(1-predicted))
    
    def train(self,x,y,epoch=100,lr=0.01):
        batch = x.shape[0]
        for _ in range(1,epoch+1):
            if _%10==0:
                print(self.w,self.b)
            z = x @ self.w + self.b # (z = xw + b)
            a = self.sigmoid(z)
            loss = self.BCE(y,a)
         
            da = -(y/a - (1-y)/(1-a))
            dz = da * (a - a**2) #dz = a - y
            dw = (x.T @ dz)/batch
            db = dz.sum(axis=0,keepdims=True)/batch

            self.w -= lr*dw
            self.b -= lr*db





In [34]:
np.random.seed(42)
model = LR(2)
x = np.random.rand(64,2)
y = np.random.rand(64,1)
model.train(x,y)

[[0.36002527]
 [0.93590538]] [[0.70417022]]
[[0.34425671]
 [0.91976212]] [[0.67396657]]
[[0.32886769]
 [0.90394858]] [[0.64451325]]
[[0.31385917]
 [0.88846648]] [[0.6158102]]
[[0.29923137]
 [0.87331686]] [[0.58785601]]
[[0.28498377]
 [0.85850015]] [[0.56064798]]
[[0.27111518]
 [0.84401614]] [[0.53418209]]
[[0.25762372]
 [0.82986402]] [[0.50845315]]
[[0.24450688]
 [0.81604238]] [[0.4834548]]
[[0.23176154]
 [0.80254927]] [[0.4591796]]


In [70]:
# build a simple logistic regression from scratch and use gradient descent

class MLP:
    def __init__(self,layer_sizes=[]) -> None:
        # z = x1 * w1 + x2 * w2 + b
        self.layer_sizes = layer_sizes
        self.w = [np.random.rand(layer_sizes[i],layer_sizes[i+1]) for i in range(len(layer_sizes)-1)]
        self.b = [np.random.rand(1,layer_sizes[i+1]) for i in range(len(layer_sizes)-1)]

    
    def fpass(self,x):
        z = x @ self.w + self.b
        a = self.sigmoid(z)
        return a
    
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))

    def sigmoid_prime(self,z):
        return z*(1-z)
    
    def relu(self,z):
        return np.maximum(0,z)
    
    def relu_prime(self,z):
        return np.where(z>0,1,0)
    
    def tanh(self,z):
        return np.tanh(z)

    def BCE(self,target,predicted):
        return -(target*np.log(predicted) + (1-target)*np.log(1-predicted))
    
    def BCE_prime(self,target,predicted):
        return -(target/predicted - (1-target)/(1-predicted))
    
    def train(self,x,y,epoch=100,lr=0.01):
        batch = x.shape[0]
        for _ in range(1,epoch+1):
            if _%10==0:
                print(self.w[0],self.b[0])
            a = []
            z = []
            a.append(x)
            layers = len(self.layer_sizes)-1
            for i in range(layers):
                z.append(a[i] @ self.w[i] + self.b[i])
                if i == layers-1:
                    if self.layer_sizes[i+1] == 1:
                        a.append(self.sigmoid(z[i]))
                    else:
                        a.append(self.relu(z[i]))
                else:
                    a.append(self.relu(z[i]))
            loss = self.BCE(y,a[-1])



            # z1 = x @ self.w[0] + self.b[0] # (z = xw + b)--> (batch,inp) @ (inp,hidden) = (batch,hidden)
            # a1 = self.relu(z1) # (batch,hidden)
            # z2 = a1 @ self.w[1] + self.b[1] # (batch,hidden) @ (hidden,out) = (batch,out)
            # a2 = self.sigmoid(z2)
            # loss = self.BCE(y,a2)
         
            
            for i in range(layers,-1,-1):
                print(i)
                if i == layers:
                    da = self.BCE_prime(y,a[i])/batch
                    dz = da * self.sigmoid_prime(a[i])
                else:
                    da = dz @ self.w[i].T
                    print(da.shape,z[i].shape,i)
                    dz = da * self.relu_prime(z[i])
                dw = (a[i-1].T @ dz)
                db = dz.sum(axis=0,keepdims=True)
                
                self.w[i-1] -= lr*dw
                self.b[i-1] -= lr*db

            # da2 = self.BCE_prime(y,a2)/batch
            # dz2 = da2 * self.sigmoid_prime(a2) #dz = a - y
            # dw2 = (a1.T @ dz2)
            # db2 = dz2.sum(axis=0,keepdims=True)

            # da1 = dz2 @ self.w[1].T # (batch,out(right)) @ (out(right),hidden(left)) = (batch,hidden)
            # dz1 = da1 * self.relu_prime(z1) #dz = 0 if z<=0 else 1
            # dw1 = (x.T @ dz1)
            # db1 = dz1.sum(axis=0,keepdims=True)
            

            # self.w[0] -= lr*dw1
            # self.b[0] -= lr*db1
            # self.w[1] -= lr*dw2
            # self.b[1] -= lr*db2





In [71]:
np.random.seed(42)
model = MLP([2,3,1])
x = np.random.rand(64,2)
y = np.random.rand(64,1)
model.train(x,y)

(64, 3) (64, 1) 1
(64, 2) (64, 3) 0


ValueError: operands could not be broadcast together with shapes (64,2) (64,3) 

In [57]:
np.random.seed(42)
model = MLP([2,3,1])
x = np.random.rand(64,2)
y = np.random.rand(64,1)
model.train(x,y)

[[0.37384904 0.93556763 0.72177065]
 [0.59797668 0.14108111 0.14591216]] [[ 0.70665659 -0.01043557  0.94897219]]
[[0.37393544 0.91997898 0.71195717]
 [0.59806113 0.12578111 0.13628008]] [[ 0.70683177 -0.04218498  0.92898436]]
[[0.37477155 0.90567136 0.70362157]
 [0.59887699 0.11181432 0.12814274]] [[ 0.70852453 -0.07116322  0.91210107]]
[[0.37620339 0.89260767 0.69660276]
 [0.60026635 0.09913935 0.12133036]] [[ 0.71140889 -0.09747025  0.89795894]]
[[0.37808528 0.88089085 0.69073661]
 [0.60208194 0.08831482 0.11566986]] [[ 0.71518381 -0.11923351  0.88619056]]
[[0.38028622 0.87030132 0.6858639 ]
 [0.60419343 0.0786862  0.11099452]] [[ 0.71958521 -0.13857553  0.87644556]]
[[0.38269093 0.86083939 0.68183967]
 [0.60648723 0.0703608  0.10715542]] [[ 0.72438331 -0.1552477   0.86841572]]
[[0.38520277 0.85233609 0.67853114]
 [0.6088697  0.06301402 0.10401698]] [[ 0.72938906 -0.17012118  0.86182221]]
[[0.38774252 0.84475951 0.67582162]
 [0.61126487 0.05652231 0.10146144]] [[ 0.73444945 -0.183194