[https://colab.research.google.com/drive/1VpeE6UvEPRz9HmsHh1KS0XxXjYu533EC]

# Imports

In [25]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
import pandas as pd
from scipy import signal
import keras
import numpy as np
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('expand_frame_repr', False)

from optimizers.sgd import SGD
from optimizers.adam import Adam
# from network import Network
# from layers.conv2d import Conv2D
# from layers.dense import FCLayer
# from layers.activation import ActivationLayer, SoftmaxLayer, tanh, tanh_prime, softmax, softmax_prime, relu, relu_prime, sigmoid, sigmoid_prime
# from layers.flatten import FlattenLayer
from losses import mse, categorical_crossentropy, binary_crossentropy, mae
from layers import Conv2D, FCLayer, ActivationLayer, FlattenLayer, DropoutLayer, GlobalAveragePoolingLayer, RNN, MaxPool2D, LSTM, GRU
from autograd import tanh, relu, sigmoid, softmax, Module, Tensor
from utils import draw_computation_graph

In [26]:
def visualize_test_samples(x_test,y_test,samples,network):
  for test, true in zip(x_test[:samples], y_test[:samples]):
    pred = network.predict([test])[0][0]
    idx = np.argmax(pred)
    idx_true = np.argmax(true)
    plt.title('pred: %s, prob: %.2f, true: %d' % (idx, pred[idx], idx_true))
    plt.imshow(test, cmap='binary')
    plt.show()

In [27]:
def adjust_data(image,is_conv=False):
  image = np.cast['float32'](image)
  if is_conv:
    image = np.expand_dims(image,axis=-1)
  image/=255
  return image

In [28]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [29]:
x_train = adjust_data(x_train,is_conv=True)
y_train = keras.utils.to_categorical(y_train)
x_test = adjust_data(x_test,is_conv=True)
y_test = keras.utils.to_categorical(y_test)

In [30]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28, 1), (60000, 10), (10000, 28, 28, 1), (10000, 10))

In [31]:
# def create_batches(data, labels, batch_size):
#     num_batches = int(len(data) / batch_size)
#     data_batches = []
#     label_batches = []
    
#     for i in range(num_batches):
#         start = i * batch_size
#         end = min((i + 1) * batch_size, len(data))
#         data_batches.append(data[start:end])
#         label_batches.append(labels[start:end])
    
#     return np.array(data_batches), np.array(label_batches)

In [32]:
# batch_size = 32
# x_train_batches, y_train_batches = create_batches(x_train, y_train, batch_size)
# x_test_batches, y_test_batches = create_batches(x_test, y_test, batch_size)

In [33]:
# x_train_batches.shape, y_train_batches.shape, x_test_batches.shape, y_test_batches.shape

In [34]:
# x_train_batches = adjust_data(x_train_batches,True)
# y_train_batches = keras.utils.to_categorical(y_train_batches)
# x_test_batches = adjust_data(x_test_batches,True)
# y_test_batches = keras.utils.to_categorical(y_test_batches)

In [35]:
# x_train_batches.shape, y_train_batches.shape, x_test_batches.shape, y_test_batches.shape

# SGD

In [36]:
class Model(Module):
    def __init__(self) -> None:
        self.conv = Conv2D(filters=3, kernel_size=5, padding='same', strides=2)
        self.maxpool = MaxPool2D(pool_size=2, strides=1, padding='same')
        self.flatten = FlattenLayer()
        self.linear1 = FCLayer(output_dim=20)
        self.linear2 = FCLayer(output_dim=10)

    def forward(self, inputs: Tensor, training=True) -> Tensor:
        x = self.conv(inputs, training=training)
        x1 = self.maxpool(x)
        x2 = self.flatten(x1, training=training)
        x3 = self.linear1(x2, training=training)
        x4 = tanh(x3)
        x5 = self.linear2(x4, training=training)
        x6 = tanh(x5)
        x7 = softmax(x6)
        return x7

optimizer = SGD(learning_rate=0.001)
batch_size = 32
model = Model()
x_train = Tensor(x_train)
y_train = Tensor(y_train)
i = 0
# print(x_train.shape)
# train_data = x_train.shape[0]
train_data = 1000

for epoch in range(50):
    epoch_loss = 0.0
    # print(f'before: {x_train.shape}')
    for start in range(0, train_data, batch_size):
        end = start + batch_size

        model.zero_grad()

        inputs = x_train[start:end]

        predicted = model(inputs)
        actual = y_train[start:end]
        # print(predicted.shape, actual.shape)
        loss = categorical_crossentropy(y_true=actual, y_pred=predicted)
        if i==0:
            draw_computation_graph(loss)
        i+=1
        # for param in model.parameters():
        #     print(param.name, param.id)
        # print(start, loss)
        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)
    # print(f'after: {x_train.shape}')

    print(epoch, epoch_loss)

inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: (15, 15), out_height: 14, out_width: 14
inp_padded: 

KeyboardInterrupt: 

In [13]:
class Model(Module):
    def __init__(self) -> None:
        self.conv = Conv2D(filters=3, kernel_size=5, padding='valid')
        self.maxpool = MaxPool2D(pool_size=2, strides=1)
        self.flatten = FlattenLayer()
        self.linear1 = FCLayer(output_dim=20)
        self.linear2 = FCLayer(output_dim=10)

    def forward(self, inputs: Tensor, training=True) -> Tensor:
        x = self.conv(inputs, training=training)
        x1 = self.maxpool(x)
        x2 = self.flatten(x1, training=training)
        x3 = self.linear1(x2, training=training)
        x4 = tanh(x3)
        x5 = self.linear2(x4, training=training)
        x6 = tanh(x5)
        x7 = softmax(x6)
        return x7

optimizer = SGD(learning_rate=0.001)
batch_size = 32
model = Model()
x_train = Tensor(x_train)
y_train = Tensor(y_train)
i = 0
# print(x_train.shape)
# train_data = x_train.shape[0]
train_data = 1000

for epoch in range(50):
    epoch_loss = 0.0
    # print(f'before: {x_train.shape}')
    for start in range(0, train_data, batch_size):
        end = start + batch_size

        model.zero_grad()

        inputs = x_train[start:end]

        predicted = model(inputs)
        actual = y_train[start:end]
        # print(predicted.shape, actual.shape)
        loss = categorical_crossentropy(y_true=actual, y_pred=predicted)
        if i==0:
            draw_computation_graph(loss)
        i+=1
        # for param in model.parameters():
        #     print(param.name, param.id)
        # print(start, loss)
        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)
    # print(f'after: {x_train.shape}')

    print(epoch, epoch_loss)

IndexError: too many indices for array: array is 0-dimensional, but 1 were indexed

# RNN

In [2]:
np.random.seed(42)

def create_dataset(num_sequences, sequence_length):
    return np.array([np.arange(start, start + sequence_length) for start in range(num_sequences)])

# Helper function to create inputs and targets from the sequences
def create_inputs_targets(data):
    X = data[:, :-1]  # all but the last item in each sequence as input
    Y = data[:, -1]  # all but the first item in each sequence as target (for sequence prediction)
    return X, Y

# Helper function to create batches
def create_batches(X, Y, batch_size):
    num_batches = len(X) // batch_size
    X_batches = np.array(np.array_split(X, num_batches))
    Y_batches = np.array(np.array_split(Y, num_batches))
    return X_batches, Y_batches

# Generate dataset
num_sequences = 1000  # The number of sequences you want
sequence_length = 4  # The length of each sequence
batch_size = 32  # The size of each batch

# Create dataset
dataset = create_dataset(num_sequences, sequence_length)

# Split dataset into training and testing sets (80-20 split)
train_size = int(num_sequences * 0.8)
train_set, test_set = dataset[:train_size], dataset[train_size:]

# Create inputs (X) and targets (Y) for training and testing
x_train, y_train = create_inputs_targets(train_set)
x_test, y_test = create_inputs_targets(test_set)
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

print(x_train.shape, y_train.shape)
# Shuffle training data
indices = np.arange(train_size)
np.random.shuffle(indices)

# Apply shuffled indices to create shuffled training data
x_train_shuffled = x_train[indices]
y_train_shuffled = y_train[indices]
# Normalize inputs
# x_train_max = np.max(x_train_shuffled)
# x_train_shuffled = x_train_shuffled / x_train_max
# x_test = x_test / x_train_max  # use the same scale as train set

# print(x_train_shuffled.shape, y_train_shuffled.shape)
# Create batches from the training and testing data
# x_train_batches, y_train_batches = create_batches(x_train_shuffled, y_train_shuffled, batch_size)
# x_test_batches, y_test_batches = create_batches(x_test, y_test, batch_size)


# Example: Print the first training batch
# print("First training batch (x_train, y_train):")
# print(x_train_batches.shape, y_train_batches.shape)
# x_train_batches[0], y_train_batches[0]

(800, 3, 1) (800,)


In [7]:
class Model(Module):
    def __init__(self) -> None:
        # self.rnn = RNN(hidden_size=20,output_size=30, return_sequences=False, bidirectional=False, init='glorot_uniform')
        self.lstm = LSTM(hidden_size=20, return_sequences=False, bidirectional=False)
        # self.rnn2 = RNN(hidden_size=20,output_size=30, return_sequences=False, init='glorot_uniform')
        self.flatten = FlattenLayer()
        # self.linear1 = FCLayer(output_dim=20)
        self.linear2 = FCLayer(output_dim=1)

    def forward(self, inputs: Tensor) -> Tensor:
        x = self.lstm(inputs)
        # x = self.rnn2(x)
        x2 = self.flatten(x)
        # x3 = self.linear1(x2)
        x4 = self.linear2(x2)
        return x4

optimizer = Adam(learning_rate=0.001)
batch_size = 32
model = Model()
x_train = Tensor(x_train_shuffled)
y_train = Tensor(np.expand_dims(y_train_shuffled,axis=-1))
i = 0
for epoch in range(10000):
    epoch_loss = 0.0

    for start in range(0, len(x_train_shuffled), batch_size):
        end = start + batch_size

        model.zero_grad()

        inputs = x_train[start:end]

        predicted = model(inputs)
        actual = y_train[start:end]
        # print(predicted.shape, actual.shape)
        loss = mse(y_true=actual, y_pred=predicted)
        if i==0:
            draw_computation_graph(loss)
        i+=1
        # for param in model.parameters():
        #     print(param.name, param.id)
        # print(start, loss)
        loss.backward()
        epoch_loss += loss.data

        optimizer.step(model)

    print(epoch, epoch_loss)

0 5385602.477084881
1 5384210.345911151
2 5382820.0918907365
3 5381430.623631913
4 5380041.73605945
5 5378653.353125469
6 5377265.437945912
7 5375877.9695326565
8 5374490.934548538
9 5373104.323776632
10 5371718.130456414
11 5370332.3493547905
12 5368946.976246537
13 5367562.00761595
14 5366177.440453294
15 5364793.2720965715
16 5363409.500176141
17 5362026.122527823
18 5360643.137142521
19 5359260.542143184
20 5357878.335759179
21 5356496.516310977
22 5355115.082186067
23 5353734.031829917
24 5352353.363741623
25 5350973.076456165
26 5349593.168549546
27 5348213.638634015
28 5346834.485349766
29 5345455.7073628865
30 5344077.30336952
31 5342699.272062712
32 5341321.612166389
33 5339944.322433597
34 5338567.401629453
35 5337190.848518929
36 5335814.66189331
37 5334438.840562567
38 5333063.3833341105
39 5331688.289032168
40 5330313.556492995
41 5328939.184566761
42 5327565.172111637
43 5326191.517999379
44 5324818.221111466
45 5323445.280332527
46 5322072.694556141
47 5320700.462694519


In [9]:
test = np.array([[10,11,12], [11,12,13]])
test = np.expand_dims(test, axis=-1)
test = Tensor(test)
print(test.shape)
out = model(test)
np.round(out.data)

(2, 3, 1)


array([[-89.],
       [-83.]])

In [7]:
def learning_rate_decay(epoch, optimizer):
    if epoch != 0 and epoch % 50 == 0:
        optimizer.learning_rate *= 0.99



epoch 1: loss=0.05099273701819455
epoch 2: loss=13249.740727372679
epoch 3: loss=14749.407963355394
epoch 4: loss=15250.752942708701
epoch 5: loss=15492.481381005673
epoch 6: loss=15622.466162419676
epoch 7: loss=15695.41284485934
epoch 8: loss=15737.109844625535
epoch 9: loss=15761.141082024957
epoch 10: loss=15775.042803035232
epoch 11: loss=15783.09837473718
epoch 12: loss=15787.76982950266
epoch 13: loss=15790.479695590704
epoch 14: loss=15792.051856900975
epoch 15: loss=15792.963996259414
epoch 16: loss=15793.493200779878
epoch 17: loss=15793.800228715167
epoch 18: loss=15793.978352795524
epoch 19: loss=15794.081690347573
epoch 20: loss=15794.141639857253
epoch 21: loss=15794.176417997249
epoch 22: loss=15794.19659337004
epoch 23: loss=15794.208297317833
epoch 24: loss=15794.215086847486
epoch 25: loss=15794.219025468892
epoch 26: loss=15794.221310260786
epoch 27: loss=15794.222635661932
epoch 28: loss=15794.223404521004
epoch 29: loss=15794.22385053153
epoch 30: loss=15794.224109

In [8]:
test = np.array([[10,11,12]])
test = np.expand_dims(test, axis=-1)
np.round(net_conv_adam.predict(test))

array([[[777.]]])

In [9]:
net_conv_adam.summary()

Unnamed: 0,type,input_shape,output_shape,fc_layer_shape,kernels_shape,number_of_params
0,RNN,"(3, 1)","(3, 40)",,,880
1,RNN,"(3, 40)","(40,)",,,2440
2,FlattenLayer,"(40,)","(1, 40)",,,0
3,FCLayer,"(1, 40)","(1, 1)","(40, 1)",,41
4,Total number of params,,,,,3361


In [13]:
net_conv_sgd = Network()
net_conv_sgd.set_optimizer(SGD())
net_conv_sgd.add(RNN(hidden_size=20,output_size=30, return_sequences=False))
net_conv_sgd.add(FlattenLayer())
net_conv_sgd.add(FCLayer(output_dim=1))
# net_conv_adam.add(SoftmaxLayer())

# train on 1000 samples
# we didn't implemented mini-batch GD
net_conv_sgd.use(mse, mse_prime)
net_conv_sgd.fit(x_train=x_train_shuffled, y_train=y_train_shuffled, epochs=100)

  dX[i, :] = dht_dxt


epoch 1: loss=67342.6897958377
epoch 2: loss=66715.57087041353
epoch 3: loss=66715.57087041353
epoch 4: loss=66715.57087041353
epoch 5: loss=66715.57087041352
epoch 6: loss=66715.57087041352
epoch 7: loss=66715.57087041353
epoch 8: loss=66715.57087041353
epoch 9: loss=66715.57087041353
epoch 10: loss=66715.57087041353
epoch 11: loss=66715.57087041352
epoch 12: loss=66715.57087041353
epoch 13: loss=66715.57087041353
epoch 14: loss=66715.57087041352
epoch 15: loss=66715.57087041353
epoch 16: loss=66715.57087041353
epoch 17: loss=66715.57087041353
epoch 18: loss=66715.57087041353
epoch 19: loss=66715.57087041353
epoch 20: loss=66715.57087041353
epoch 21: loss=66715.57087041353
epoch 22: loss=66715.57087041353
epoch 23: loss=66715.57087041353
epoch 24: loss=66715.57087041353
epoch 25: loss=66715.57087041353
epoch 26: loss=66715.57087041353
epoch 27: loss=66715.57087041353
epoch 28: loss=66715.57087041353
epoch 29: loss=66715.57087041353
epoch 30: loss=66715.57087041353
epoch 31: loss=66715