In [1]:
%run "000_shared.ipynb"

In [29]:
np.random.seed(0)

class FUtilz:
    ## ===== aggregators 
    @staticmethod
    def identity(x):
        return x
    @staticmethod 
    def linear(x,w,b):
        #print("linear inputs:", np.round(x,3), np.round(w,4), np.round(b,4) )
        xo = np.dot(w.T, x)+b
        #print("Z: ", xo)
        return xo
    
    ## ===== activators
    @staticmethod 
    def relu(x):
        return max(0,x)
    @staticmethod
    def sfunc(x):
        #return  (1 + np.exp(x))/(np.exp(x).sum())
        return (x - (-1))/(1 - (-1))*x
    
    ## ===== output dists
    @staticmethod
    def softmax(inz_arr):
        pz = np.exp(inz_arr) 
        #print(pz.shape)
        pxs = (pz)/np.sum(pz)
        return pxs
        
    ## ===== losses
    @staticmethod
    def hinge_loss(y, yhat, thresh=1e-06, margin=0.15):
        d = np.abs(y-yhat) + margin  > thresh
        #print(d, np.abs(y-yhat), thresh)
        return int( d )
    @staticmethod
    def mse_loss(y, yhat):
        return np.linalg.norm(np.array(y)  - np.array(yhat) )**2
    
class Node:
    def __init__(self, in_weights, agg_func, act_func, bias=0):
        self.in_weights = in_weights
        self.in_bias = bias
        self.agg_func = agg_func
        self.act_func = act_func
        self.value = 0
        
    ## TODO: clean up the calls :/!! AND adjacency/one-hot on input layer??
    def fwd_pass(self, X):
        if self.agg_func is None:
            self.value = X
        elif self.act_func is None:
            self.value = self.agg_func(X, self.in_weights, self.in_bias )
        else:
            self.value = self.act_func( self.agg_func(X, self.in_weights, self.in_bias ) )
        return self.value 
        
    def __str__(self):
        return f"{round(self.value,4)}" #{len(self.in_weights)}-->
    
    
class Layer:
    def __init__(self, n_nodes, n_incoming, 
                 agg_func=None, act_func=None, 
                 weights_0=None, bias_0=None,
                 isInput=False, isSharedWeights=True): 
        self.isInput = isInput
        
        #print(n_nodes, n_incoming,  0.1 * np.random.randn(n_incoming))
        ## setup weights 
        if self.isInput:
            weights = np.eye(n_nodes) #np.array([1])
        else: 
            if weights_0 is not None:
                weights = weights_0 
            else:
                if isSharedWeights:
                    w = 0.1 * np.random.randn(n_incoming)
                    weights = [w for i in range(n_nodes)] 
                else:
                    weights = [ 0.1 * np.random.randn(n_incoming) for i in range(n_nodes) ]  
            weights = np.array(weights) 
        bias=np.zeros(n_nodes) if bias_0 is None else bias_0
        
        ## setup nodes
        #print(weights.shape, bias.shape)
        self.nodes =np.array([ Node(weights[i], agg_func, act_func, bias=bias[i]) for i in range(n_nodes) ] )
        
    def fwd_pass(self, X):
        return np.array([n.fwd_pass(X) for n in self.nodes])
    
    ## TODO: fix who holds this + backprop exec 
    def update_weights(self, weights, biases): 
        for i, n in enumerate(self.nodes):
            n.in_weights = weights[i]
            n.in_bias = biases[i]
    
    @property
    def n_outputs(self):
        return len(self.nodes)
    @property
    def weights(self): 
        return [ (n.in_weights, n.in_bias) for n in self.nodes]
    
    def __str__(self):
        s = [f"n={len(self.nodes)}: "]
        for n in self.nodes:
            s.append(f"[{str(n)}]")            
        return ", ".join(s)
        
class Network:
    def __init__(self, n_inputs, n_hidden, n_outputs, 
                 w_hidden=3, isSharedWeights=False,
                 agg_func=FUtilz.linear, act_func=FUtilz.relu, 
                 out_func=FUtilz.sfunc): 
        
        self.inlayer = Layer( n_inputs, 1, isInput=True, 
                             agg_func=agg_func, act_func=FUtilz.identity)
        
        #self.hiddenlayers = [Layer(w_hidden, n_inputs,  agg_func=agg_func, act_func=act_func,isSharedWeights=isSharedWeights) for i in range(n_hidden)]
        self.hiddenlayers = []
        for i in range(n_hidden):
            n = self.hiddenlayers[i-1].n_outputs if i > 0 else n_inputs 
            self.hiddenlayers.append( Layer(w_hidden, n, 
                                   agg_func=agg_func, act_func=act_func,
                                   isSharedWeights=isSharedWeights) )
            
        self.outlayer = Layer(n_outputs,  self.hiddenlayers[-1].n_outputs, 
                              agg_func=agg_func, act_func=out_func)
        
        self.predz = None
        
    ## TODO: output layer maps per node @ fwd and backprop
    def fwd_pass(self, X):
        ### --- FNN ---
        ## 1. agg and activate 
        x = self.inlayer.fwd_pass(X)
        for h in self.hiddenlayers:
            x = h.fwd_pass(x)
        x = self.outlayer.fwd_pass(x)
        ## 2. pdf
        self.predz = FUtilz.softmax(x)
        return self.predz
    
    def propagate(self, predz, yz):
        ## 3. loss
        loss = FUtilz.mse_loss(yz, predz)
        #print("\n-----\nLOSS: ", loss, "<< ", np.round(predz, 3), yz, "\n-----\n")
        ## 4. gd backprop             
        ## 5. update weights per layer  
        ## FAKING IT FOR NOW
        def update_weights(h, ploss):
            #print(h.weights)
            #w, b = h.weights ### arrrgggg
            W, B = [], []
            for w, b in h.weights:
                r = 0.01 * np.random.rand() 
                b +=  -(ploss.sum() / w.sum()**2)*r 
                if ploss.sum() <= w.mean():
                    w = (w - ploss.sum()/w.max())*r
                w += ((w/w.sum()) + (w/w.max()))*r/w.sum()
                W.append(w)
                B.append(b)
            h.update_weights(W, B)
            return np.array(W)
            
        fkl = update_weights(self.outlayer, loss)
        for i in reversed( range(len(self.hiddenlayers)) ):
            fkl = update_weights( self.hiddenlayers[i], fkl)  
        
        w = [f"\t{i}" for i in self.outlayer.weights]
        w = "\n".join(w)
        return loss, w
        
    def __str__(self): 
        l = [ f"I: {str(self.inlayer)}"]
        for i, h in enumerate(self.hiddenlayers):
            l.append( f"H{(i+1)}: {str(h)}")
        l.append(f"O:  {str(self.outlayer)}")
        
        pz = np.round(self.predz, 4) if self.predz is not None else self.predz
        l.append(f"Px: {str(pz) }" )
        
        w = [f"\t{i}" for i in self.outlayer.weights]
        w = "\n".join(w)
        l.append(f"Wo: {(w)}")
        
        return "\n".join(l)
        

In [54]:
np.random.seed(10)
OK_THRESH = 0.37
N_OUTS = 3
N_INS = 7
EPOCHS = 20

X = np.random.randn(N_INS)*np.random.randint(10)
y = np.random.randint(0,2, size=N_OUTS)

def train_something(tX, ty, n_hidden, n_outs=N_OUTS, w_hidden=3, isSharedWeights=False): 
    net = Network(len(tX), n_hidden, n_outs, w_hidden=w_hidden, isSharedWeights=isSharedWeights)
    #print(net)
    #net.propagate(net.fwd_pass(X), y)
    for i in range(EPOCHS):
        outz = net.fwd_pass(X)
        predz = np.array(np.array(outz) > OK_THRESH, dtype=int)
        loss, w = net.propagate(predz, y) 
        print(f"Epoch {(i+1)}/{EPOCHS}: {loss}") #\n{w}
        if loss <= 1e-05:
            break

    print("\n\n",net)
    print("FIN: p --> y", predz, " --> ", y)

train_something(X,y, n_hidden=2)

Epoch 1/20: 1.0
Epoch 2/20: 1.0
Epoch 3/20: 1.0
Epoch 4/20: 1.0
Epoch 5/20: 1.0
Epoch 6/20: 1.0
Epoch 7/20: 1.0
Epoch 8/20: 1.0
Epoch 9/20: 1.0
Epoch 10/20: 1.0
Epoch 11/20: 1.0
Epoch 12/20: 1.0
Epoch 13/20: 1.0
Epoch 14/20: 1.0
Epoch 15/20: 1.0
Epoch 16/20: 1.0
Epoch 17/20: 1.0
Epoch 18/20: 1.0
Epoch 19/20: 1.0
Epoch 20/20: 1.0


 I: n=7: , [7.9895], [4.2917], [-9.2724], [-0.0503], [3.728], [-4.3205], [1.5931]
H1: n=3: , [0.2799], [0.3681], [0.3199]
H2: n=3: , [0], [0], [0]
O:  n=3: , [-0.09], [-0.1011], [-0.0937]
Px: [0.335  0.3313 0.3337]
Wo: 	(array([ 0.51841654, -0.17360098,  0.38198392]), -0.7694898890544014)
	(array([ 0.51561871, -0.17266408,  0.3799224 ]), -0.7377521972650453)
	(array([ 0.51643337, -0.17293688,  0.38052267]), -0.7591455926252884)
FIN: p --> y [0 0 0]  -->  [0 1 0]


In [55]:
train_something(X, y, n_hidden=6)

Epoch 1/20: 1.0
Epoch 2/20: 1.0
Epoch 3/20: 1.0
Epoch 4/20: 1.0
Epoch 5/20: 1.0
Epoch 6/20: 1.0
Epoch 7/20: 1.0
Epoch 8/20: 1.0
Epoch 9/20: 1.0
Epoch 10/20: 1.0
Epoch 11/20: 2.0000000000000004
Epoch 12/20: 2.0000000000000004
Epoch 13/20: 2.0000000000000004
Epoch 14/20: 1.0
Epoch 15/20: 1.0
Epoch 16/20: 1.0
Epoch 17/20: 1.0
Epoch 18/20: 1.0
Epoch 19/20: 1.0
Epoch 20/20: 1.0


 I: n=7: , [7.9895], [4.2917], [-9.2724], [-0.0503], [3.728], [-4.3205], [1.5931]
H1: n=3: , [0], [1.2237], [0]
H2: n=3: , [14.6866], [126.3177], [0]
H3: n=3: , [0], [30204.186], [0]
H4: n=3: , [68803.287], [10487.1033], [0]
H5: n=3: , [0], [9369.9039], [0]
H6: n=3: , [0], [163.6912], [2616.6929]
O:  n=3: , [3778.458], [3268.4227], [4212.848]
Px: [nan nan nan]
Wo: 	(array([0.43006021, 0.25130627, 0.01797112]), -0.6019875106320232)
	(array([0.41322876, 0.24147079, 0.01726778]), -0.6074435260165008)
	(array([0.4632241 , 0.27068563, 0.01935695]), -0.6656433235088529)
FIN: p --> y [0 0 0]  -->  [0 1 0]
