In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras import Sequential
from tensorflow.keras.losses import MeanSquaredError, BinaryCrossentropy
from tensorflow.keras.activations import sigmoid
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)
from collections import namedtuple

2023-05-23 09:07:10.827876: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def sigmoid(z):
    z = np.clip( z, -500, 500 ) #protect against overflow
    return 1 / (1 + np.exp(-z))

def init_weights(weights_amount, neurons_amount, randomize = False):
    '''
    Params:
        weights_amount  : how many weights do we have for one neuron, equals to amount of inputs
        neurons_amount  : how many neurons do we have in a layer
    Returns:
        [M x N] matrix of ones or randoms, where
            M (row count): the amount of neurons
            N (col count): the amount of weighs
    '''
    size = (neurons_amount, weights_amount)
    return np.random.default_rng().standard_normal(size) if randomize else np.ones(size)

def init_biases(neurons_amount, randomize = False):
    size = (neurons_amount, 1)
    return np.random.default_rng().standard_normal(size) if randomize else np.ones(size)

In [3]:
def create_coffee_dataset():
    """
    Copied from lab_coffee_utils
    """
    rng = np.random.default_rng(2)
    X = rng.random(400).reshape(-1,2)
    X[:,1] = X[:,1] * 4 + 11.5          # 12-15 min is best
    X[:,0] = X[:,0] * (285-150) + 150  # 350-500 F (175-260 C) is best
    Y = np.zeros(len(X))
    
    i=0
    for t,d in X:
        y = -3/(260-175)*t + 21
        if (t > 175 and t < 260 and d > 12 and d < 15 and d<=y ):
            Y[i] = 1
        else:
            Y[i] = 0
        i += 1

    return (X, Y.reshape(-1,1))

In [4]:
X,Y = create_coffee_dataset()

# Create normalization layer
# Can be used to normalized other values, not from existing dataset
norm_l = tf.keras.layers.Normalization(axis=-1)
norm_l.adapt(X)

2023-05-23 09:07:24.112121: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
Layer = namedtuple("Layer", ["W", "b", "a_in", "a_out", "z"])
LayerBack = namedtuple("LayerBack", ["dw", "da", "db"])

In [6]:
def execute_layer(W, b, a_in) -> Layer:
    function_input = np.matmul(W, a_in) + b
    function_out = sigmoid(function_input)
    return Layer(W, b, a_in, function_out, function_input)

def sigmoid_derivative(a):
    return sigmoid(a) * (1 - sigmoid(a))

def layer_derivatives(l: Layer) -> LayerBack:    
    dz = sigmoid_derivative(l.z)    
    print(f'A_In = {l.a_in.shape}, W = {l.W.shape}, dz = {dz.shape}')

    dw = l.a_in * dz
    da = (l.W.T) * dz
    db = dz
    return LayerBack(dw, da, db)

In [11]:
# First layer, 3 neurons, 2 inputs X1 and X2
W0 = init_weights(2, 3, True)
B0 = init_biases(3, True)

# Second layer, 1 neuron, 3 inputs from previous layer
W1 = init_weights(3, 1, True)
B1 = init_biases(1, True)

# normalize inputs and convert to numpy
sample_data = norm_l(X).numpy().T

# print(W0)
# print(B0)
# print(sample_data)

epochs = 1

for i in range(epochs):
    layer1 = execute_layer(W0, B0, sample_data)
    layer2 = execute_layer(W1, B1, layer1.a_out)
    diff = Y.T - layer2.a_out
    print(diff.shape)
    layer2_back = layer_derivatives(layer2)
    layer1_back = layer_derivatives(layer1)
    W1 = W1 - diff * layer2_back.dw
    B1 = B1 - diff * layer2_back.db



(1, 200)
A_In = (3, 200), W = (1, 3), dz = (1, 200)
A_In = (2, 200), W = (3, 2), dz = (3, 200)


ValueError: operands could not be broadcast together with shapes (2,200) (3,200) 

In [None]:
a = np.array([
    [1, 2],
    [3, 4],
    [ 5, 6]
])
b = np.array([[1], [2], [3]])
c = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
print(b.shape, c.shape)
c * b


In [10]:
a = np.array([[1], [2], [3]])
b = np.array([10, 100])
b * a

array([[ 10, 100],
       [ 20, 200],
       [ 30, 300]])