In [1]:
from layers import *
from activation_functions import *
from ml_exceptions import *
from loss_functions import *

In [2]:
dropout = Dropout_Layer(dropout_rate=0.4)\
    .set_batch_size(5)\
    .set_input_size(5)\
    .compile()

In [3]:
X = np.random.uniform(-1, 1, (5, 5))

In [4]:
dropout.foreward_propagate(X)

array([[ 0.26323242,  0.83579902, -0.789571  , -0.17239793, -0.68753686],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.49257682,  0.9658885 , -0.57021181,  0.97732169, -0.38910908],
       [ 0.06838749,  0.7968246 , -0.70529972,  0.94717784,  0.92977961],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]])

In [5]:
dropout.back_propagate(np.ones((5, 5)))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [6]:
batch_size = 25
input_size = 1
output_size = 1

linear_regression_model = Neural_Layer(
    number_neurons=output_size,
    activation_function=Identity_Function(),
)
linear_regression_model\
    .set_batch_size(batch_size)\
    .set_input_size(input_size)\
    .compile()


loss_function = MC_MSE()\
    .set_vector_size(output_size)\
    .set_batch_size(batch_size)\
    .compile()

In [7]:
# numpy supports broardcasting for simple functions
def target_function(X):
    return 3*X-5

In [8]:
x_range = (-10, 10)
learning_rate = 2**-8

linear_regression_model.initialise_random_parameters()

for i in range(10000):
    X = np.random.uniform(low=x_range[0], high=x_range[1], size=(1, batch_size))
    Y = target_function(X)
    P = linear_regression_model.foreward_propagate(X)


    loss = loss_function.compute_loss(P, Y)
    dldP = loss_function.compute_loss_gradient()

    linear_regression_model.back_propagate(dldP)
    linear_regression_model.update_parameters(learning_rate)

    if i==0 or (i+1) % 1000 == 0:
        print(f"iteration {i}: loss was {loss:.8f}")
        linear_regression_model.print_parameters()

iteration 0: loss was 211.40863644
Weights (1, 1):   [[1.60271108]]
Bias (1,):   [0.09760447]
iteration 999: loss was 14.31758836
Weights (1, 1):   [[3.05069413]]
Bias (1,):   [-1.27011551]
iteration 1999: loss was 7.46441743
Weights (1, 1):   [[3.01012405]]
Bias (1,):   [-2.2718899]
iteration 2999: loss was 3.96328895
Weights (1, 1):   [[3.02876215]]
Bias (1,):   [-3.00442509]
iteration 3999: loss was 2.11288890
Weights (1, 1):   [[2.98297203]]
Bias (1,):   [-3.54014353]
iteration 4999: loss was 1.19515988
Weights (1, 1):   [[3.0026349]]
Bias (1,):   [-3.93198253]
iteration 5999: loss was 0.61165700
Weights (1, 1):   [[2.99435242]]
Bias (1,):   [-4.21867254]
iteration 6999: loss was 0.32930600
Weights (1, 1):   [[2.99716888]]
Bias (1,):   [-4.42855929]
iteration 7999: loss was 0.17564925
Weights (1, 1):   [[3.00197678]]
Bias (1,):   [-4.58199846]
iteration 8999: loss was 0.09438499
Weights (1, 1):   [[2.99638574]]
Bias (1,):   [-4.69419392]
iteration 9999: loss was 0.04961663
Weights 

In [9]:
batch_size = 50
input_size = 3
output_size = 2

target_layer = Neural_Layer(
    number_neurons=output_size,
    activation_function=RELU()
)
training_layer = Neural_Layer(
    number_neurons=output_size,
    activation_function=RELU()
)

for layer in (target_layer, training_layer):
    layer\
        .set_batch_size(batch_size)\
        .set_input_size(input_size)\
        .compile()

loss_function = MC_MSE()


loss_function\
    .set_batch_size(batch_size)\
    .set_vector_size(output_size)\
    .compile()

<loss_functions.MC_MSE at 0x22546928e10>

In [10]:
x_range = (-5, 5)
learning_rate = 2**-6
# target_bias_range = (-0.5, 0.5)
target_bias_range = (0.5, 0.5)


target_weight_range = (-3, 3)

training_layer.initialise_random_parameters()
target_layer.initialise_random_parameters(
    bias_range=target_bias_range,
    weight_range=target_weight_range
)

target_parameters = target_layer.get_parameters()


for i in range(10**5):
    X = np.random.uniform(low=x_range[0], high=x_range[1], size=(input_size, batch_size))
    Y = target_layer.foreward_propagate(X)
    P = training_layer.foreward_propagate(X)

    loss = loss_function.compute_loss(P, Y)
    dldP = loss_function.compute_loss_gradient()

    training_layer.back_propagate(dldP)
    training_layer.update_parameters(learning_rate)

    if i==0 or (i+1) % 10**4 == 0:
        print(f"iteration {i}: loss was {loss:.8f}")
        
        # if isinstance(loss_function, MVC_MSE):
        #     mean_cost, variance_cost = loss_function.get_mean_cost_variance_cost()
        #     print(f"Mean was   {mean_cost:.8f}\nvaraince cost was   {variance_cost:.8f}\nprocessed varaince cost was   {(1+variance_cost)**variance_weighting:.8f}")

        # print(f"dldP =   {dldP}")

        training_parameters = training_layer.get_parameters()
        print("Weights error:")
        print(
            training_parameters["W"] - target_parameters["W"]
        )
        print("Bias error:")
        print(
            training_parameters["B"] - target_parameters["B"]
        )

iteration 0: loss was 42.95497457
Weights error:
[[ 1.10230619  0.66341568 -1.27300292]
 [ 1.73720566  2.37953754  2.73149331]]
Bias error:
[-0.39963806 -0.3994734 ]
iteration 9999: loss was 0.00002576
Weights error:
[[-0.00066254  0.00018009  0.00581501]
 [-0.00025353 -0.000918   -0.00037847]]
Bias error:
[-0.01980682 -0.00221177]
iteration 19999: loss was 0.00002989
Weights error:
[[-0.00044001  0.00063218  0.00507129]
 [ 0.00012181 -0.00055555 -0.00018791]]
Bias error:
[-0.01767181 -0.00026947]
iteration 29999: loss was 0.00001767
Weights error:
[[-0.00139192  0.00075914  0.00441723]
 [ 0.00028055  0.00018318  0.00031821]]
Bias error:
[-0.0152989   0.00179921]
iteration 39999: loss was 0.00002027
Weights error:
[[-0.00036307  0.00052979  0.00419521]
 [ 0.00048674  0.00069321  0.0011146 ]]
Bias error:
[-0.01392706  0.00295809]
iteration 49999: loss was 0.00001202
Weights error:
[[-0.00061932  0.00038937  0.00335133]
 [ 0.00043585  0.00068568  0.00093053]]
Bias error:
[-0.01252942  0.

In [11]:
# import matplotlib.pyplot as plt

# def plot_training_statistics(mean_costs, variance_costs, num_minibatches=10_000):
#     window_size=50
#     # Calculate rolling averages
#     rolling_mean_costs = np.convolve(mean_costs, np.ones(window_size)/window_size, mode='valid')
#     rolling_variance_costs = np.convolve(variance_costs, np.ones(window_size)/window_size, mode='valid')

#     # Plotting
#     fig, ax1 = plt.subplots()

#     # Set the limits of x and y axis
#     ax1.set_xlim(0, num_minibatches)
#     ax1.set_ylim(0, 1)
    
#     color = 'tab:red'
#     ax1.set_xlabel('Mini-batch Number')
#     ax1.set_ylabel('Mean Cost', color=color)
#     ax1.plot(rolling_mean_costs, color=color)
#     ax1.tick_params(axis='y', labelcolor=color)

#     ax2 = ax1.twinx()  # Instantiate a second axes that shares the same x-axis

#     # Set the limits for the second y axis
#     ax2.set_ylim(0, 1)
    
#     color = 'tab:blue'
#     ax2.set_ylabel('Variance of Cost', color=color)
#     ax2.plot(rolling_variance_costs, color=color)
#     ax2.tick_params(axis='y', labelcolor=color)

#     # Set spines to be at x=0, y=0
#     ax1.spines['bottom'].set_position('zero')
#     ax1.spines['left'].set_position('zero')

#     # Remove top and right spines
#     ax1.spines['top'].set_visible(False)
#     ax1.spines['right'].set_visible(False)
#     ax2.spines['top'].set_visible(False)
#     ax2.spines['right'].set_visible(False)

#     # Remove padding and margins
#     plt.tight_layout(pad=0)

#     # Set title
#     plt.title(f"Rolling Average of Mean Cost and Variance of Cost Over Training")
#     plt.show()

In [12]:
# def create_experiement_data(variance_weighting, mean_weighting, learning_rate, num_minibatches=10_000):
#     batch_size = 50
#     input_size = 3
#     output_size = 2

#     target_layer = Neural_Layer(
#         number_neurons=output_size,
#         activation_function=RELU()
#     )
#     training_layer = Neural_Layer(
#         number_neurons=output_size,
#         activation_function=RELU()
#     )

#     for layer in (target_layer, training_layer):
#         layer\
#             .set_batch_size(batch_size)\
#             .set_input_size(input_size)\
#             .compile()

#     loss_function = MVC_MSE(
#         variance_weighting = variance_weighting,
#         mean_weighting=mean_weighting
#     )


#     loss_function\
#         .set_batch_size(batch_size)\
#         .set_vector_size(output_size)\
#         .compile()
    
#     x_range = (-5, 5)
#     # target_bias_range = (-0.5, 0.5)
#     target_bias_range = (0.5, 0.5)


#     target_weight_range = (-3, 3)

#     training_layer.initialise_random_parameters()
#     target_layer.initialise_random_parameters(
#         bias_range=target_bias_range,
#         weight_range=target_weight_range
#     )

#     target_parameters = target_layer.get_parameters()

#     varaince_cost_data = []
#     mean_cost_data = []

#     for i in range(num_minibatches):
#         X = np.random.uniform(low=x_range[0], high=x_range[1], size=(input_size, batch_size))
#         Y = target_layer.foreward_propagate(X)
#         P = training_layer.foreward_propagate(X)

#         loss = loss_function.compute_loss(P, Y)
#         dldP = loss_function.compute_loss_gradient()

#         training_layer.back_propagate(dldP)
#         training_layer.update_parameters(learning_rate)

#         mean_cost, variance_cost = loss_function.get_mean_cost_variance_cost()
#         varaince_cost_data.append(variance_cost) 
#         mean_cost_data.append(mean_cost)

#         # if i==0 or (i+1) % 5_000 == 0:
#         #     print(f"iteration {i}: loss was {loss:.8f}")
            
#         #     print(f"Mean was   {mean_cost:.8f}\nvaraince cost was   {variance_cost:.8f}\nprocessed varaince cost was   {(1+variance_cost)**variance_weighting:.8f}")

#         #     # print(f"dldP =   {dldP}")

#         #     training_parameters = training_layer.get_parameters()
#         #     print("Weights error:")
#         #     print(
#         #         training_parameters["W"] - target_parameters["W"]
#         #     )
#         #     print("Bias error:")
#         #     print(
#         #         training_parameters["B"] - target_parameters["B"]
#         #     )

#     return mean_cost_data, varaince_cost_data

In [13]:
# plot_training_statistics(
#     *create_experiement_data(
#         variance_weighting=-0.30,
#         learning_rate = 2**-5
#    )
# )

In [14]:
# plot_training_statistics(
#     *create_experiement_data(
#         variance_weighting=0,
#         mean_weighting=1,
#         learning_rate = 2**-5
#    )
# )

In [15]:
# plot_training_statistics(
#     *create_experiement_data(
#         variance_weighting=1/4,
#         mean_weighting=1,
#         learning_rate = 2**-10
#     )
# )

In [16]:
# plot_training_statistics(
#     *create_experiement_data(
#         variance_weighting=1/2,
#         mean_weighting=1,
#         learning_rate = 2**-8
#     )
# )

In [17]:
# plot_training_statistics(
#     *create_experiement_data(
#         variance_weighting=3/4,
#         mean_weighting=1,
#         learning_rate = 2**-9
#     )
# )

In [18]:
# plot_training_statistics(
#     *create_experiement_data(
#         variance_weighting=1,
#         mean_weighting=1,
#         learning_rate = 2**-10
#     )
# )

In [19]:
# plot_training_statistics(
#     *create_experiement_data(
#         variance_weighting=1,
#         mean_weighting=0,
#         learning_rate=10**-4,
#         num_minibatches=1_000_000
#     ),
#     num_minibatches=1_000_000
# )