In [1]:
from neural_network import *
from activation_functions import *
from loss_functions import *
from layers import *
import random

# seed = 239435901
# np.random.seed(seed)
# random.seed(seed)

In [2]:
single_neuron_model = Neural_Network(
    num_input_neurons = 1,
    loss_function = MC_MSE(),
    batch_size = 50,
    learning_rate = 10**-1,
    layers = [
        Neural_Layer(1, Identity_Function()),
    ]
)\
    .set_full_validation(True)\
    .compile()

In [3]:
coefficients = (-4, 3)
x_range = (-5, 5)

def generate_data(size):
    X = np.random.uniform(x_range[0], x_range[1], (1, size))
    Y = coefficients[0] * X + coefficients[1]
    return X, Y

In [4]:
single_neuron_model.evaluate_model_on_test_data(*generate_data(10_000))

124.25797924866286

In [5]:
# for _ in range(10_000):
#     single_neuron_model.train_model_on_minibatch(
#         *generate_data(50)
#     )

In [6]:
single_neuron_model.evaluate_model_on_test_data(*generate_data(10_000))

126.53735296845707

In [7]:
single_neuron_model.get_layers()[0].print_parameters()

Weights (1, 1):   [[-0.24260702]]
Bias (1,):   [0.1]


In [8]:
def make_model_linear() -> Neural_Network:
    model = Neural_Network(
        num_input_neurons = 3,
        loss_function = MC_MSE(),
        batch_size = 5,
        learning_rate = 10**-7,
        layers = [
            Neural_Layer(2, Identity_Function()),
        ]
    )\
        .set_full_validation(True)\
        .compile()
    
    assert model._num_input_neurons == 3
    assert model._layers[0]._input_size == 3

    return model


In [9]:
target_linear_network = make_model_linear()

# target_linear_network._layers[0]._bias_v = np.random.uniform(-3, 3, (3,))

target_linear_network._layers[0]._bias_v = np.full((2,), random.uniform(-0.5, 0.5))
target_linear_network._layers[0]._weights_m = np.random.uniform(-3, 3, (2,3))

assert target_linear_network._layers[0]._bias_v.shape == target_linear_network._layers[0]._bias_v_dimensions
assert target_linear_network._layers[0]._weights_m.shape == target_linear_network._layers[0]._wieghts_m_dimensions


target_linear_network.get_layers()[0].print_parameters()

Weights (2, 3):   [[ 1.6071369  -1.0380957  -1.15683498]
 [ 1.25064229 -2.09613302 -0.95908897]]
Bias (2,):   [-0.32639901 -0.32639901]


In [10]:
learner_linear_network = make_model_linear()

In [11]:
x_range = (-5, 5)

def generate_data(size):
    X = np.random.uniform(x_range[0], x_range[1], (3, size))
    Y = target_linear_network.make_predicitons(X)
    return X, Y

In [12]:
learner_linear_network.evaluate_model_on_test_data(*generate_data(10_000))

58.600536151791644

In [13]:
for i in range(10**5):
    loss = learner_linear_network.train_model_on_minibatch(
        *generate_data(5)
    )
    if i % 10**4 == 0:
        print(loss)
print(loss)


69.42727441782147
50.92150064903568
55.68099327360042
23.49771822959499
41.87793057614715
26.672075961546422
40.702999026713066
112.91011277983455
87.44533678270092
66.7594474983633
68.86401135477256


In [14]:
learner_linear_network.evaluate_model_on_test_data(*generate_data(10_000))

57.174545300973115

In [15]:
batch_size = 5
input_size = 3
output_size = 2

target_layer = Neural_Layer(
    number_neurons=output_size,
    activation_function=Identity_Function()
)


training_layer = Neural_Layer(
    number_neurons=output_size,
    activation_function=Identity_Function()
)

learner_linear_network.reset_parameters()

for layer in (target_layer, training_layer):
    layer\
        .set_batch_size(batch_size)\
        .set_input_size(input_size)\
        .compile()



def add_message(some_function, message):
    def wrapper(*args, **kwargs):
        print(message)
        return some_function(*args, **kwargs)
    wrapper.__name__ = some_function.__name__
    wrapper.__annotations__ = some_function.__annotations__

    return wrapper




training_layer.initialise_random_parameters = add_message(training_layer.initialise_random_parameters, "reseting training layer parameters")
target_layer.initialise_random_parameters = add_message(target_layer.initialise_random_parameters, "reseting target layer parameters")

learner_linear_network._layers[0].initialise_random_parameters = add_message(
    learner_linear_network._layers[0].initialise_random_parameters, "reseting learner network first layer parameters"
)
target_linear_network._layers[0].initialise_random_parameters = add_message(
    target_linear_network._layers[0].initialise_random_parameters, "reseting target network first layer parameters"
)


loss_function = MC_MSE()

loss_function\
    .set_batch_size(batch_size)\
    .set_vector_size(output_size)\
    .compile()

<loss_functions.MC_MSE at 0x1abcbe44510>

In [16]:
x_range = (-5, 5)
learning_rate = 2**-7
# target_bias_range = (-0.5, 0.5)
target_bias_range = (0.5, 0.5)


target_layer._bias_v = target_linear_network._layers[0]._bias_v
target_layer._weights_m = target_linear_network._layers[0]._weights_m
training_layer._bias_v = learner_linear_network._layers[0]._bias_v
training_layer._weights_m = learner_linear_network._layers[0]._weights_m


assert (target_layer._bias_v == target_linear_network._layers[0]._bias_v).all()
assert (target_layer._weights_m == target_linear_network._layers[0]._weights_m).all()
assert (training_layer._bias_v == learner_linear_network._layers[0]._bias_v).all()
assert (training_layer._weights_m == learner_linear_network._layers[0]._weights_m).all()

target_layer.set_is_first_layer(True)
training_layer.set_is_first_layer(True)

assert target_layer._input_size == target_linear_network._layers[0]._input_size
assert training_layer._input_size == learner_linear_network._layers[0]._input_size



assert target_layer == target_linear_network._layers[0]
assert training_layer == learner_linear_network._layers[0]



In [17]:
X = np.random.uniform(low=x_range[0], high=x_range[1], size=(input_size, batch_size))
X

array([[ 3.4346986 , -2.07392311, -2.14960737, -0.84069387, -3.39811677],
       [-1.82255328, -0.15211496, -3.3967375 ,  1.26602168, -1.06374915],
       [-2.15907661,  3.61869114, -3.69000536, -1.08420678,  1.30589283]])

In [18]:
X = np.array([[ 4.10769364,  4.67115005, -4.58840154,  4.02854707, -2.41245063],
       [-3.97686015,  4.80541106,  4.37762451, -1.90813116,  1.07510244],
       [-0.54072051,  4.69063895, -0.85679802,  2.87590576, -1.50484245]])

Y_L = target_layer.foreward_propagate(X)


# fails, not sure why
# Y_N = learner_linear_network.make_predicitons(X)

Y_N, _ = target_linear_network._foreward_propagate(X)

assert (training_layer._bias_v == learner_linear_network._layers[0]._bias_v).all()
assert (training_layer._weights_m == learner_linear_network._layers[0]._weights_m).all()


assert Y_L.shape == Y_N.shape, f"{Y_L.shape} != {Y_N.shape}"
assert np.equal(Y_L, Y_N).all()

P_L = training_layer.foreward_propagate(X)
P_N, _ = learner_linear_network._foreward_propagate(X)

assert np.equal(P_L, P_N).all()


loss_L = loss_function.compute_loss(P_L, Y_L)
dldP_L = loss_function.compute_loss_gradient()

loss_N = learner_linear_network._loss_function.compute_loss(P_L, Y_L)
dldP_N = learner_linear_network._loss_function.compute_loss_gradient()

assert np.equal(P_L, P_N).all()

training_layer.back_propagate(dldP_L)

learner_linear_network._foreward_propagate(X, Y_N)
learner_linear_network._back_propogate()



training_layer.update_parameters(learning_rate)
learner_linear_network._update_parameters()


In [19]:
# rolling_window = 100
# rolling_losses = [None for _ in range(rolling_window)]

loss_check_rate = 50
last_loss = None
validation_data = generate_data(10000)
max_iterations = 10**5
loss_increased = False

i = 0
while i < max_iterations and not loss_increased:
    i += 1

    X = np.random.uniform(low=x_range[0], high=x_range[1], size=(input_size, batch_size))

    Y_L = target_layer.foreward_propagate(X)


    # fails, not sure why
    # Y_N = learner_linear_network.make_predicitons(X)

    Y_N, _ = target_linear_network._foreward_propagate(X)

    assert (training_layer._bias_v == learner_linear_network._layers[0]._bias_v).all()
    assert (training_layer._weights_m == learner_linear_network._layers[0]._weights_m).all()


    assert Y_L.shape == Y_N.shape, f"{Y_L.shape} != {Y_N.shape}"
    assert np.equal(Y_L, Y_N).all()

    P_L = training_layer.foreward_propagate(X)
    P_N, _ = learner_linear_network._foreward_propagate(X)

    assert np.equal(P_L, P_N).all()


    loss_L = loss_function.compute_loss(P_L, Y_L)
    dldP_L = loss_function.compute_loss_gradient()

    loss_N = learner_linear_network._loss_function.compute_loss(P_L, Y_L)
    dldP_N = learner_linear_network._loss_function.compute_loss_gradient()

    assert np.equal(P_L, P_N).all()

    training_layer.back_propagate(dldP_L)

    learner_linear_network._foreward_propagate(X, Y_N)
    learner_linear_network._back_propogate()



    training_layer.update_parameters(learning_rate)
    learner_linear_network._update_parameters()

    # if all(element is not None for element in rolling_losses):
    #     previous_mean_loss = sum(rolling_losses) / rolling_window
    #     rolling_losses[i%rolling_window] = learner_linear_network.evaluate_model_on_test_data(*generate_data(10_000))
    #     new_mean_loss = sum(rolling_losses) / rolling_window
    #     if new_mean_loss > previous_mean_loss:
    #         print("ending training as loss increasing")
    #         break
    # else:
    #     rolling_losses[i%rolling_window] = learner_linear_network.evaluate_model_on_test_data(*generate_data(10_000))

    if i % loss_check_rate == 0:
        new_loss = learner_linear_network.evaluate_model_on_test_data(*validation_data)

        if last_loss is not None:
            if new_loss > last_loss:
                print("ending training as loss increasing")
                loss_increased = True

        last_loss = new_loss


        

    if i % 10**2 == 0:
        print(f"iteration {i}: layer loss was {loss_L:.8f} and network loss was {loss_N:.8f}")

print(f"iteration {i}: layer loss was {loss_L:.8f} and network loss was {loss_N:.8f}")


iteration 100: layer loss was 10.05995208 and network loss was 10.05995208
iteration 200: layer loss was 0.55128104 and network loss was 0.55128104
iteration 300: layer loss was 0.07743950 and network loss was 0.07743950
iteration 400: layer loss was 0.05621966 and network loss was 0.05621966
iteration 500: layer loss was 0.06168134 and network loss was 0.06168134
iteration 600: layer loss was 0.02800586 and network loss was 0.02800586
ending training as loss increasing
iteration 700: layer loss was 0.03380906 and network loss was 0.03380906
iteration 700: layer loss was 0.03380906 and network loss was 0.03380906


In [20]:
learner_linear_network.evaluate_model_on_test_data(*generate_data(10_000))

0.03352638316190613