[https://colab.research.google.com/drive/1VpeE6UvEPRz9HmsHh1KS0XxXjYu533EC]

# Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
import pandas as pd
from scipy import signal
import keras
import numpy as np
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('expand_frame_repr', False)

from optimizers.sgd import SGD
from optimizers.adam import Adam
# from network import Network
# from layers.conv2d import Conv2D
# from layers.dense import FCLayer
# from layers.activation import ActivationLayer, SoftmaxLayer, tanh, tanh_prime, softmax, softmax_prime, relu, relu_prime, sigmoid, sigmoid_prime
# from layers.flatten import FlattenLayer
from losses import mse, categorical_crossentropy, binary_crossentropy, mae
from layers import Conv2D, FCLayer, ActivationLayer, FlattenLayer, DropoutLayer, PoolingLayer, GlobalAveragePoolingLayer, RNN, MaxPool2D
from autograd import tanh, relu, sigmoid, softmax, Module, Tensor
from utils import draw_computation_graph

2024-05-22 22:05:48.051316: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-22 22:05:48.225933: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-22 22:05:48.225974: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-22 22:05:48.255042: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-22 22:05:48.314827: I tensorflow/core/platform/cpu_feature_guar

In [2]:
def visualize_test_samples(x_test,y_test,samples,network):
  for test, true in zip(x_test[:samples], y_test[:samples]):
    pred = network.predict([test])[0][0]
    idx = np.argmax(pred)
    idx_true = np.argmax(true)
    plt.title('pred: %s, prob: %.2f, true: %d' % (idx, pred[idx], idx_true))
    plt.imshow(test, cmap='binary')
    plt.show()

In [3]:
def adjust_data(image,is_conv=False):
  image = np.cast['float32'](image)
  if is_conv:
    image = np.expand_dims(image,axis=-1)
  image/=255
  return image

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
x_train = adjust_data(x_train,is_conv=True)
y_train = keras.utils.to_categorical(y_train)
x_test = adjust_data(x_test,is_conv=True)
y_test = keras.utils.to_categorical(y_test)

In [5]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [7]:
# def create_batches(data, labels, batch_size):
#     num_batches = int(len(data) / batch_size)
#     data_batches = []
#     label_batches = []
    
#     for i in range(num_batches):
#         start = i * batch_size
#         end = min((i + 1) * batch_size, len(data))
#         data_batches.append(data[start:end])
#         label_batches.append(labels[start:end])
    
#     return np.array(data_batches), np.array(label_batches)

In [8]:
# batch_size = 32
# x_train_batches, y_train_batches = create_batches(x_train, y_train, batch_size)
# x_test_batches, y_test_batches = create_batches(x_test, y_test, batch_size)

In [9]:
# x_train_batches.shape, y_train_batches.shape, x_test_batches.shape, y_test_batches.shape

In [10]:
# x_train_batches = adjust_data(x_train_batches,True)
# y_train_batches = keras.utils.to_categorical(y_train_batches)
# x_test_batches = adjust_data(x_test_batches,True)
# y_test_batches = keras.utils.to_categorical(y_test_batches)

In [11]:
# x_train_batches.shape, y_train_batches.shape, x_test_batches.shape, y_test_batches.shape

# SGD

In [12]:
class Model(Module):
    def __init__(self) -> None:
        self.conv = Conv2D(filters=3, kernel_size=5, padding='valid')
        self.maxpool = MaxPool2D(pool_size=2, strides=1)
        self.flatten = FlattenLayer()
        self.linear1 = FCLayer(output_dim=20)
        self.linear2 = FCLayer(output_dim=10)

    def forward(self, inputs: Tensor, training=True) -> Tensor:
        x = self.conv(inputs, training=training)
        x1 = self.maxpool(x)
        x2 = self.flatten(x1, training=training)
        x3 = self.linear1(x2, training=training)
        x4 = tanh(x3)
        x5 = self.linear2(x4, training=training)
        x6 = tanh(x5)
        x7 = softmax(x6)
        return x7

optimizer = SGD(learning_rate=0.001)
batch_size = 32
model = Model()
x_train = Tensor(x_train)
y_train = Tensor(y_train)
i = 0
# print(x_train.shape)
# train_data = x_train.shape[0]
train_data = 1000

for epoch in range(50):
    epoch_loss = 0.0
    # print(f'before: {x_train.shape}')
    for start in range(0, train_data, batch_size):
        end = start + batch_size

        model.zero_grad()

        inputs = x_train[start:end]

        predicted = model(inputs)
        actual = y_train[start:end]
        # print(predicted.shape, actual.shape)
        loss = categorical_crossentropy(y_true=actual, y_pred=predicted)
        if i==0:
            draw_computation_graph(loss)
        i+=1
        # for param in model.parameters():
        #     print(param.name, param.id)
        # print(start, loss)
        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)
    # print(f'after: {x_train.shape}')

    print(epoch, epoch_loss)

0 2048.0813414951094
1 1707.7629585846835
2 1553.6914227549987
3 1448.9663620719796
4 1381.9981000480261
5 1309.722369372564
6 1254.9350448028144
7 1213.7559963481597
8 1173.7931039650628
9 1137.6871342093411
10 1110.7935089069817
11 1092.8674974686126
12 1073.8671768510292
13 1058.3378667344193
14 1045.5894116737684
15 1027.979069781539
16 1016.6574633903596
17 1006.551125824417
18 997.4419170206762


KeyboardInterrupt: 

# RNN

In [2]:
np.random.seed(42)

def create_dataset(num_sequences, sequence_length):
    return np.array([np.arange(start, start + sequence_length) for start in range(num_sequences)])

# Helper function to create inputs and targets from the sequences
def create_inputs_targets(data):
    X = data[:, :-1]  # all but the last item in each sequence as input
    Y = data[:, -1]  # all but the first item in each sequence as target (for sequence prediction)
    return X, Y

# Helper function to create batches
def create_batches(X, Y, batch_size):
    num_batches = len(X) // batch_size
    X_batches = np.array(np.array_split(X, num_batches))
    Y_batches = np.array(np.array_split(Y, num_batches))
    return X_batches, Y_batches

# Generate dataset
num_sequences = 1000  # The number of sequences you want
sequence_length = 4  # The length of each sequence
batch_size = 32  # The size of each batch

# Create dataset
dataset = create_dataset(num_sequences, sequence_length)

# Split dataset into training and testing sets (80-20 split)
train_size = int(num_sequences * 0.8)
train_set, test_set = dataset[:train_size], dataset[train_size:]

# Create inputs (X) and targets (Y) for training and testing
x_train, y_train = create_inputs_targets(train_set)
x_test, y_test = create_inputs_targets(test_set)
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

print(x_train.shape, y_train.shape)
# Shuffle training data
indices = np.arange(train_size)
np.random.shuffle(indices)

# Apply shuffled indices to create shuffled training data
x_train_shuffled = x_train[indices]
y_train_shuffled = y_train[indices]
# Normalize inputs
# x_train_max = np.max(x_train_shuffled)
# x_train_shuffled = x_train_shuffled / x_train_max
# x_test = x_test / x_train_max  # use the same scale as train set

# print(x_train_shuffled.shape, y_train_shuffled.shape)
# Create batches from the training and testing data
# x_train_batches, y_train_batches = create_batches(x_train_shuffled, y_train_shuffled, batch_size)
# x_test_batches, y_test_batches = create_batches(x_test, y_test, batch_size)


# Example: Print the first training batch
# print("First training batch (x_train, y_train):")
# print(x_train_batches.shape, y_train_batches.shape)
# x_train_batches[0], y_train_batches[0]

(800, 3, 1) (800,)


In [4]:
# net_conv_adam.add(RNN(hidden_size=20,output_size=30, return_sequences=False))

# net_conv_adam.add(FCLayer(output_dim=50))
# net_conv_adam.add(FCLayer(output_dim=1))
# net_conv_adam.add(SoftmaxLayer())

# train on 1000 samples
# we didn't implemented mini-batch GD
# net_conv_adam.use(mse, mse_prime)
# net_conv_adam.fit(x_train=x_train, y_train=y_train, epochs=100)
class Model(Module):
    def __init__(self) -> None:
        self.rnn = RNN(hidden_size=20,output_size=30, return_sequences=False, bidirectional=False, init='glorot_uniform')
        # self.rnn2 = RNN(hidden_size=20,output_size=30, return_sequences=False, init='glorot_uniform')
        self.flatten = FlattenLayer()
        # self.linear1 = FCLayer(output_dim=20)
        self.linear2 = FCLayer(output_dim=1)

    def forward(self, inputs: Tensor) -> Tensor:
        x = self.rnn(inputs)
        # x = self.rnn2(x)
        x2 = self.flatten(x)
        # x3 = self.linear1(x2)
        x4 = self.linear2(x2)
        return x4

optimizer = SGD(learning_rate=0.001)
batch_size = 32
model = Model()
x_train = Tensor(x_train_shuffled)
y_train = Tensor(np.expand_dims(y_train_shuffled,axis=-1))
i = 0
for epoch in range(1000):
    epoch_loss = 0.0

    for start in range(0, len(x_train_shuffled), batch_size):
        end = start + batch_size

        model.zero_grad()

        inputs = x_train[start:end]

        predicted = model(inputs)
        actual = y_train[start:end]
        # print(predicted.shape, actual.shape)
        loss = mse(y_true=actual, y_pred=predicted)
        if i==0:
            draw_computation_graph(loss)
        i+=1
        # for param in model.parameters():
        #     print(param.name, param.id)
        # print(start, loss)
        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)

    print(epoch, epoch_loss)

0 3071105.4242313574
1 1519365.3534559796
2 1335884.3266659623
3 1314244.880436437
4 1311661.7313077284
5 1311292.4955469666
6 1311170.1587656126
7 1311071.7958194683
8 1310974.5203389414
9 1310876.839759497
10 1310778.997427537
11 1310681.166784061
12 1310583.4183357004
13 1310485.7774524984
14 1310388.2527725673
15 1310290.8474389527
16 1310193.5620489758
17 1310096.3972843909
18 1309999.352734552
19 1309902.4284628998
20 1309805.6242607355
21 1309708.9399760473
22 1309612.3755748917
23 1309515.930724284
24 1309419.6054442017
25 1309323.399480707
26 1309227.3126449508
27 1309131.3448757564
28 1309035.4960095019
29 1308939.7659313332
30 1308844.1543197772
31 1308748.6611739653
32 1308653.286266676
33 1308558.0295468494
34 1308462.8907765357
35 1308367.8698288063
36 1308272.966540985
37 1308178.1806952825
38 1308083.5122791436
39 1307988.9610949385
40 1307894.5269820578
41 1307800.209751961
42 1307706.0092684915
43 1307611.9254625526
44 1307517.9580850687
45 1307424.1070095159
46 13073

In [5]:
test = np.array([[10,11,12], [11,12,13]])
test = np.expand_dims(test, axis=-1)
test = Tensor(test)
print(test.shape)
out = model(test)
np.round(out.data)

(2, 3, 1)


array([[239.],
       [255.]])

In [7]:
def learning_rate_decay(epoch, optimizer):
    if epoch != 0 and epoch % 50 == 0:
        optimizer.learning_rate *= 0.99



epoch 1: loss=0.05099273701819455
epoch 2: loss=13249.740727372679
epoch 3: loss=14749.407963355394
epoch 4: loss=15250.752942708701
epoch 5: loss=15492.481381005673
epoch 6: loss=15622.466162419676
epoch 7: loss=15695.41284485934
epoch 8: loss=15737.109844625535
epoch 9: loss=15761.141082024957
epoch 10: loss=15775.042803035232
epoch 11: loss=15783.09837473718
epoch 12: loss=15787.76982950266
epoch 13: loss=15790.479695590704
epoch 14: loss=15792.051856900975
epoch 15: loss=15792.963996259414
epoch 16: loss=15793.493200779878
epoch 17: loss=15793.800228715167
epoch 18: loss=15793.978352795524
epoch 19: loss=15794.081690347573
epoch 20: loss=15794.141639857253
epoch 21: loss=15794.176417997249
epoch 22: loss=15794.19659337004
epoch 23: loss=15794.208297317833
epoch 24: loss=15794.215086847486
epoch 25: loss=15794.219025468892
epoch 26: loss=15794.221310260786
epoch 27: loss=15794.222635661932
epoch 28: loss=15794.223404521004
epoch 29: loss=15794.22385053153
epoch 30: loss=15794.224109

In [8]:
test = np.array([[10,11,12]])
test = np.expand_dims(test, axis=-1)
np.round(net_conv_adam.predict(test))

array([[[777.]]])

In [9]:
net_conv_adam.summary()

Unnamed: 0,type,input_shape,output_shape,fc_layer_shape,kernels_shape,number_of_params
0,RNN,"(3, 1)","(3, 40)",,,880
1,RNN,"(3, 40)","(40,)",,,2440
2,FlattenLayer,"(40,)","(1, 40)",,,0
3,FCLayer,"(1, 40)","(1, 1)","(40, 1)",,41
4,Total number of params,,,,,3361


In [13]:
net_conv_sgd = Network()
net_conv_sgd.set_optimizer(SGD())
net_conv_sgd.add(RNN(hidden_size=20,output_size=30, return_sequences=False))
net_conv_sgd.add(FlattenLayer())
net_conv_sgd.add(FCLayer(output_dim=1))
# net_conv_adam.add(SoftmaxLayer())

# train on 1000 samples
# we didn't implemented mini-batch GD
net_conv_sgd.use(mse, mse_prime)
net_conv_sgd.fit(x_train=x_train_shuffled, y_train=y_train_shuffled, epochs=100)

  dX[i, :] = dht_dxt


epoch 1: loss=67342.6897958377
epoch 2: loss=66715.57087041353
epoch 3: loss=66715.57087041353
epoch 4: loss=66715.57087041353
epoch 5: loss=66715.57087041352
epoch 6: loss=66715.57087041352
epoch 7: loss=66715.57087041353
epoch 8: loss=66715.57087041353
epoch 9: loss=66715.57087041353
epoch 10: loss=66715.57087041353
epoch 11: loss=66715.57087041352
epoch 12: loss=66715.57087041353
epoch 13: loss=66715.57087041353
epoch 14: loss=66715.57087041352
epoch 15: loss=66715.57087041353
epoch 16: loss=66715.57087041353
epoch 17: loss=66715.57087041353
epoch 18: loss=66715.57087041353
epoch 19: loss=66715.57087041353
epoch 20: loss=66715.57087041353
epoch 21: loss=66715.57087041353
epoch 22: loss=66715.57087041353
epoch 23: loss=66715.57087041353
epoch 24: loss=66715.57087041353
epoch 25: loss=66715.57087041353
epoch 26: loss=66715.57087041353
epoch 27: loss=66715.57087041353
epoch 28: loss=66715.57087041353
epoch 29: loss=66715.57087041353
epoch 30: loss=66715.57087041353
epoch 31: loss=66715