In [None]:
import subprocess
import torch
import numpy as np
import matplotlib.pyplot as plt
import math
import random
import torch.nn as nn
import mnist
mnist.init()
%matplotlib inline


class BFNN(torch.nn.Module):
    def __init__(self, d, L, n, V):
        super().__init__()
        self.d = d
        self.L = L
        self.n = n
        self.V = V
        self.theta = torch.nn.Parameter(torch.randn(self.n, self.L))

        # define layers
        #self.num_nodes = num_nodes
        #self.layers = []
        #for i in range(len(num_nodes)):
        #  if i == 0:
        #    self.layers.append(torch.nn.Linear(self.d, num_nodes[0]))
        #  else:
        #    self.layers.append(torch.nn.Linear(num_nodes[i-1], num_nodes[i]))
        #self.layers.append(torch.nn.Linear(num_nodes[-1], L))
        self.layer1 = torch.nn.Linear(d, 128)
        self.layer2 = torch.nn.Linear(128, 128)
        self.layer3 = torch.nn.Linear(128, 128)
        self.layer4 = torch.nn.Linear(128, 128)
        self.layerL = torch.nn.Linear(128, L)

        # define activation functions
        self.relu = torch.nn.ReLU()

    def forward(self, X):
        #for i in range(len(self.num_nodes)):
        #  X = self.layers[i](X)
        #  X = self.relu(X)
        #self.Psi = self.layers[-1](X)

        X = self.layer1(X)
        X = self.relu(X)
        X = self.layer2(X)
        X = self.relu(X)
        X = self.layer3(X)
        X = self.relu(X)
        X = self.layer4(X)
        X = self.relu(X)
        self.Psi = self.layerL(X)
        self.yhat = self.theta @ self.Psi.t()
        return self.yhat


class NeuralNetwork(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.body = nn.Sequential(
            nn.Linear(in_dim, 200),
            nn.ReLU(),
            nn.Linear(200, out_dim)
        )

    def forward(self, x):
        y = self.body(x)
        return y


def generate_sign(n = 2000, size = 28, length_min = 13, length_max = 20, width_min = 3, width_max = 5):
    center = math.floor(size / 2)
    sign = np.random.choice((-1, 1), size = n)
    length = np.random.choice(np.arange(length_min, length_max + 1), size = n)
    width = np.random.choice(np.arange(width_min, width_max + 1), size = n)
    start = np.random.choice(np.arange(4, size - length_max - 1), size = n) - center
    x = np.tile(np.arange(size), size) - center
    y = np.repeat(np.arange(size), size) - center

    sign_img = np.zeros((n, size**2))
    for i in range(n):
        sign_i = sign[i]
        length_i = length[i]
        width_i = width[i]
        start_i = start[i]
        minus_i = ((x >= start_i) &
                   (x <= start_i + length_i) &
                   (y >= - math.floor(width_i / 2)) &
                   (y <= - math.floor(width_i / 2) + width_i))
        if sign_i == -1:
            sign_img[i, :] = minus_i * 1
        else:
            mid_i = (length_i + 2 * start_i) / 2
            minus_i_2 = ((x >= math.floor(mid_i - width_i / 2)) &
                         (x <= math.floor(mid_i + width_i / 2)) &
                         (y >= -math.floor(length_i / 2)) &
                         (y <= -math.floor(length_i / 2) + length_i))
            sign_img[i, :] = (minus_i | minus_i_2) * 1

    sign_img[sign_img > 0] = 0.9 * sign_img[sign_img > 0] + np.random.normal(scale = 0.05, size = np.sum(sign_img > 0))
    return sign_img, sign


class Dataset(torch.utils.data.Dataset):
    def __init__(self, predictors, labels):
        self.labels = labels
        self.predictors = predictors

    def __len__(self):
        return self.labels.shape[0]

    def __getitem__(self, index):
        X = self.predictors[index, :]
        y = self.labels[index, :]

        return X, y


import torch.nn as nn

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        # fully connected layer, output 10 classes
        self.linear = nn.Linear(32 * 7 * 7, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        x = self.sigmoid(x)
        return x


class DNN_FB1(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(28*28*3,64)
        self.linear2 = nn.Linear(64, 784)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear2(x)
        return x


class DNN_FB2(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(28*28*3, 256)
        self.linear2 = nn.Linear(256, 256)
        self.linear3 = nn.Linear(256, 256)
        self.linear4 = nn.Linear(256, 784)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear3(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear4(x)
        return x


class DNN_FB3(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(28*28*3, 256)
        self.linear2 = nn.Linear(256, 256)
        self.linear3 = nn.Linear(256, 256)
        self.linear4 = nn.Linear(256, 256)
        self.linear5 = nn.Linear(256, 784)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear3(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear4(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.linear5(x)
        return x


class CNN_FB1(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 32, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.linear1 = nn.Linear(960, 64)
        self.linear2 = nn.Linear(64, 784)
        self.relu1 = nn.ReLU()
        self.dropout = nn.Dropout(0.3)


    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        x = self.relu1(x)
        x = self.dropout(x)
        x = self.linear2(x)
        return x


class CNN_FB2(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 32, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.linear1 = nn.Linear(160, 64)
        self.linear2 = nn.Linear(64, 784)
        self.relu1 = nn.ReLU()
        self.dropout = nn.Dropout(0.3)


    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        x = self.relu1(x)
        x = self.dropout(x)
        x = self.linear2(x)
        return x


class CNN_FB3(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 128, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(128, 64, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 32, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(32, 32, 3, 1, "same"),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.linear1 = nn.Linear(160, 128)
        self.linear2 = nn.Linear(128, 784)
        self.relu1 = nn.ReLU()
        self.dropout = nn.Dropout(0.3)


    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        x = self.relu1(x)
        x = self.dropout(x)
        x = self.linear2(x)
        return x

In [None]:
train_img, train_label, test_img, test_label = mnist.load()
train_img = train_img / 255
test_img = test_img / 255

In [None]:
train_label_1_idx = np.where(train_label == 1)[0]
train_label_2_idx = np.where(train_label == 2)[0]
train_label_3_idx = np.where(train_label == 3)[0]
test_label_1_idx = np.where(test_label == 1)[0]
test_label_2_idx = np.where(test_label == 2)[0]
test_label_3_idx = np.where(test_label == 3)[0]

n = 2000
n_train = 1000
n_test = 1000

num_exp = 50
result_mse = np.zeros((num_exp, 12))
result_acc = np.zeros((num_exp, 12))
L = 50
for exp in range(num_exp):
    random.seed(exp)
    torch.manual_seed(exp)
    np.random.seed(exp)
    #####################################################################
    # generate images
    sign_img, sign = generate_sign(n = n)
    train_sign = sign[0:n_train]
    train_sign_img = sign_img[0:n_train, :]
    test_sign = sign[n_train:n]
    test_sign_img = sign_img[n_train:n, :]
    train_1s = train_img[np.random.choice(train_label_1_idx, size = n_train, replace = False), :]
    train_2s = train_img[np.random.choice(train_label_2_idx, size = n_train, replace = False), :]
    test_1s = test_img[np.random.choice(test_label_1_idx, size = n_train, replace = False), :]
    test_2s = test_img[np.random.choice(test_label_2_idx, size = n_train, replace = False), :]

    train_predictors = np.zeros((n_train, 28*28*3))
    train_outcomes = np.zeros((n_train, 28*28))
    test_predictors = np.zeros((n_test, 28*28*3))
    test_outcomes = np.zeros((n_test, 28*28))
    train_outcomes_label = np.zeros(n_train)
    test_outcomes_label = np.zeros(n_test)
    for i in range(n_train):
        train_1_i = train_1s[i, :].reshape((28, 28))
        train_2_i = train_2s[i, :].reshape((28, 28))
        train_sign_i = train_sign[i]
        train_sign_img_i = train_sign_img[i, :].reshape((28, 28))
        train_predictor_i = np.hstack((train_2_i, train_sign_img_i, train_1_i))
        if train_sign_i == -1:
            label_img_i = train_img[np.random.choice(train_label_1_idx, size = 1, replace = False), :]
            train_outcomes_label[i] = 1
        else:
            label_img_i = train_img[np.random.choice(train_label_3_idx, size = 1, replace = False), :]
            train_outcomes_label[i] = 3
        train_predictors[i, :] = train_predictor_i.reshape(-1)
        train_outcomes[i, :] = label_img_i

    for i in range(n_test):
        test_1_i = test_1s[i, :].reshape((28, 28))
        test_2_i = test_2s[i, :].reshape((28, 28))
        test_sign_i = test_sign[i]
        test_sign_img_i = test_sign_img[i, :].reshape((28, 28))
        test_predictor_i = np.hstack((test_2_i, test_sign_img_i, test_1_i))
        if test_sign_i == -1:
            label_img_i = test_img[np.random.choice(test_label_1_idx, size = 1, replace = False), :]
            test_outcomes_label[i] = 1
        else:
            label_img_i = test_img[np.random.choice(test_label_3_idx, size = 1, replace = False), :]
            test_outcomes_label[i] = 3
        test_predictors[i, :] = test_predictor_i.reshape(-1)
        test_outcomes[i, :] = label_img_i

    np.savetxt("train_predictors.txt", train_predictors)
    np.savetxt("test_predictors.txt", test_predictors)
    np.savetxt("train_outcomes.txt", train_outcomes)
    np.savetxt("test_outcomes.txt", test_outcomes)

    #####################################################################
    # train cnn mnist classifier
    cnn = CNN().to("cuda")
    cnn_optimizer = torch.optim.Adam(cnn.parameters(), lr = 0.001)
    cnn_loss = nn.functional.binary_cross_entropy

    cnnX = torch.tensor(train_outcomes, dtype = torch.float32).reshape((n_train, 1, 28, 28)).to("cuda")
    cnny = torch.tensor(train_outcomes_label, dtype = torch.float32).reshape((n_train, 1)).to("cuda")
    cnny[cnny == 1] = 0
    cnny[cnny == 3] = 1

    cnn_dataset = Dataset(cnnX, cnny)
    cnn_dataloader = torch.utils.data.DataLoader(cnn_dataset, batch_size = 64, shuffle = True)

    num_epochs = 100
    cnn.train()
    for epoch in range(num_epochs):
        for (idx, (X_batch, y_batch)) in enumerate(cnn_dataloader):
            X_batch = X_batch.to("cuda")
            y_batch = y_batch.to("cuda")
            output = cnn(X_batch)
            loss = cnn_loss(output, y_batch)

            # clear gradients for this training step
            cnn_optimizer.zero_grad()

            # backpropagation, compute gradients
            loss.backward()
            # apply gradients
            cnn_optimizer.step()


    #####################################################################
    # DNN1
    dnn_fb = DNN_FB1().to("cuda")
    dnn_train_predictors = torch.tensor(train_predictors, dtype = torch.float32).reshape((n_train, 28*84)).to("cuda")
    dnn_train_outcomes = torch.tensor(train_outcomes, dtype = torch.float32).reshape((n_train, 784)).to("cuda")
    dnn_fb_loss = nn.functional.mse_loss
    dnn_fb_optim = torch.optim.Adam(dnn_fb.parameters(), lr = 1e-3)

    dnn_fb_dataset = Dataset(dnn_train_predictors, dnn_train_outcomes)
    dnn_fb_dataloader = torch.utils.data.DataLoader(dnn_fb_dataset, batch_size = 64, shuffle = True)

    num_epochs = 50
    dnn_fb.train()
    for epoch in range(num_epochs):
        for (idx, (X_batch, y_batch)) in enumerate(dnn_fb_dataloader):
            X_batch = X_batch.to("cuda")
            y_batch = y_batch.to("cuda")
            output = dnn_fb(X_batch)
            loss = dnn_fb_loss(output, y_batch)

            dnn_fb_optim.zero_grad()
            loss.backward()
            dnn_fb_optim.step()

    dnn_train_pred = dnn_fb(dnn_train_predictors).reshape((n_train, 1, 28, 28)).to("cuda")
    dnn_test_pred = dnn_fb(torch.tensor(test_predictors, dtype = torch.float32).to("cuda").reshape((n_test, 28*84))).reshape((n_test, 1, 28, 28))

    result_acc[exp, 0] = sum( (cnn(dnn_train_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (train_outcomes_label == 3) ) / n_train
    result_acc[exp, 1] = sum( (cnn(dnn_test_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (test_outcomes_label == 3) ) / n_test
    result_mse[exp, 0] = np.mean((dnn_train_pred.detach().to("cpu").numpy().reshape((n_train, 784)) - train_outcomes)**2)
    result_mse[exp, 1] = np.mean((dnn_test_pred.detach().to("cpu").numpy().reshape((n_test, 784)) - test_outcomes)**2)


    #####################################################################
    # DNN2
    dnn_fb = DNN_FB2().to("cuda")
    dnn_train_predictors = torch.tensor(train_predictors, dtype = torch.float32).reshape((n_train, 28*84)).to("cuda")
    dnn_train_outcomes = torch.tensor(train_outcomes, dtype = torch.float32).reshape((n_train, 784)).to("cuda")
    dnn_fb_loss = nn.functional.mse_loss
    dnn_fb_optim = torch.optim.Adam(dnn_fb.parameters(), lr = 1e-3)

    dnn_fb_dataset = Dataset(dnn_train_predictors, dnn_train_outcomes)
    dnn_fb_dataloader = torch.utils.data.DataLoader(dnn_fb_dataset, batch_size = 64, shuffle = True)

    num_epochs = 50
    dnn_fb.train()
    for epoch in range(num_epochs):
        for (idx, (X_batch, y_batch)) in enumerate(dnn_fb_dataloader):
            X_batch = X_batch.to("cuda")
            y_batch = y_batch.to("cuda")
            output = dnn_fb(X_batch)
            loss = dnn_fb_loss(output, y_batch)

            dnn_fb_optim.zero_grad()
            loss.backward()
            dnn_fb_optim.step()

    dnn_train_pred = dnn_fb(dnn_train_predictors).reshape((n_train, 1, 28, 28))
    dnn_test_pred = dnn_fb(torch.tensor(test_predictors, dtype = torch.float32).to("cuda").reshape((n_test, 28*84))).reshape((n_test, 1, 28, 28))

    result_acc[exp, 2] = sum( (cnn(dnn_train_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (train_outcomes_label == 3) ) / n_train
    result_acc[exp, 3] = sum( (cnn(dnn_test_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (test_outcomes_label == 3) ) / n_test
    result_mse[exp, 2] = np.mean((dnn_train_pred.detach().to("cpu").numpy().reshape((n_train, 784)) - train_outcomes)**2)
    result_mse[exp, 3] = np.mean((dnn_test_pred.detach().to("cpu").numpy().reshape((n_test, 784)) - test_outcomes)**2)


    #####################################################################
    # DNN3
    dnn_fb = DNN_FB3().to("cuda")
    dnn_train_predictors = torch.tensor(train_predictors, dtype = torch.float32).reshape((n_train, 28*84)).to("cuda")
    dnn_train_outcomes = torch.tensor(train_outcomes, dtype = torch.float32).reshape((n_train, 784)).to("cuda")
    dnn_fb_loss = nn.functional.mse_loss
    dnn_fb_optim = torch.optim.Adam(dnn_fb.parameters(), lr = 1e-3)

    dnn_fb_dataset = Dataset(dnn_train_predictors, dnn_train_outcomes)
    dnn_fb_dataloader = torch.utils.data.DataLoader(dnn_fb_dataset, batch_size = 64, shuffle = True)

    num_epochs = 50
    dnn_fb.train()
    for epoch in range(num_epochs):
        for (idx, (X_batch, y_batch)) in enumerate(dnn_fb_dataloader):
            X_batch = X_batch.to("cuda")
            y_batch = y_batch.to("cuda")
            output = dnn_fb(X_batch)
            loss = dnn_fb_loss(output, y_batch)

            dnn_fb_optim.zero_grad()
            loss.backward()
            dnn_fb_optim.step()

    dnn_train_pred = dnn_fb(dnn_train_predictors).reshape((n_train, 1, 28, 28))
    dnn_test_pred = dnn_fb(torch.tensor(test_predictors, dtype = torch.float32).to("cuda").reshape((n_test, 28*84))).reshape((n_test, 1, 28, 28))

    result_acc[exp, 4] = sum( (cnn(dnn_train_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (train_outcomes_label == 3) ) / n_train
    result_acc[exp, 5] = sum( (cnn(dnn_test_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (test_outcomes_label == 3) ) / n_test
    result_mse[exp, 4] = np.mean((dnn_train_pred.detach().to("cpu").numpy().reshape((n_train, 784)) - train_outcomes)**2)
    result_mse[exp, 5] = np.mean((dnn_test_pred.detach().to("cpu").numpy().reshape((n_test, 784)) - test_outcomes)**2)

    #####################################################################
    # CNN1
    cnn_fb = CNN_FB1().to("cuda")
    cnn_train_predictors = torch.tensor(train_predictors, dtype = torch.float32).reshape((n_train, 1, 28, 84)).to("cuda")
    cnn_train_outcomes = torch.tensor(train_outcomes, dtype = torch.float32).reshape((n_train, 784)).to("cuda")
    cnn_fb_loss = nn.functional.mse_loss
    cnn_fb_optim = torch.optim.Adam(cnn_fb.parameters(), lr = 1e-3)

    cnn_fb_dataset = Dataset(cnn_train_predictors, cnn_train_outcomes)
    cnn_fb_dataloader = torch.utils.data.DataLoader(cnn_fb_dataset, batch_size = 64, shuffle = True)

    num_epochs = 50
    cnn_fb.train()
    for epoch in range(num_epochs):
        for (idx, (X_batch, y_batch)) in enumerate(cnn_fb_dataloader):
            X_batch = X_batch.to("cuda")
            y_batch = y_batch.to("cuda")
            output = cnn_fb(X_batch)
            loss = cnn_fb_loss(output, y_batch)

            cnn_fb_optim.zero_grad()
            loss.backward()
            cnn_fb_optim.step()

    cnn_train_pred = cnn_fb(cnn_train_predictors).reshape((n_train, 1, 28, 28))
    cnn_test_pred = cnn_fb(torch.tensor(test_predictors, dtype = torch.float32).to("cuda").reshape((n_test, 1, 28, 84))).reshape((n_test, 1, 28, 28))

    result_acc[exp, 6] = sum( (cnn(cnn_train_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (train_outcomes_label == 3) ) / n_train
    result_acc[exp, 7] = sum( (cnn(cnn_test_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (test_outcomes_label == 3) ) / n_test
    result_mse[exp, 6] = np.mean((cnn_train_pred.detach().to("cpu").numpy().reshape((n_train, 784)) - train_outcomes)**2)
    result_mse[exp, 7] = np.mean((cnn_test_pred.detach().to("cpu").numpy().reshape((n_test, 784)) - test_outcomes)**2)

    #####################################################################
    # CNN2
    cnn_fb = CNN_FB2().to("cuda")
    cnn_train_predictors = torch.tensor(train_predictors, dtype = torch.float32).reshape((n_train, 1, 28, 84)).to("cuda")
    cnn_train_outcomes = torch.tensor(train_outcomes, dtype = torch.float32).reshape((n_train, 784)).to("cuda")
    cnn_fb_loss = nn.functional.mse_loss
    cnn_fb_optim = torch.optim.Adam(cnn_fb.parameters(), lr = 1e-3)

    cnn_fb_dataset = Dataset(cnn_train_predictors, cnn_train_outcomes)
    cnn_fb_dataloader = torch.utils.data.DataLoader(cnn_fb_dataset, batch_size = 64, shuffle = True)

    num_epochs = 50
    cnn_fb.train()
    for epoch in range(num_epochs):
        for (idx, (X_batch, y_batch)) in enumerate(cnn_fb_dataloader):
            X_batch = X_batch.to("cuda")
            y_batch = y_batch.to("cuda")
            output = cnn_fb(X_batch)
            loss = cnn_fb_loss(output, y_batch)

            cnn_fb_optim.zero_grad()
            loss.backward()
            cnn_fb_optim.step()

    cnn_train_pred = cnn_fb(cnn_train_predictors).reshape((n_train, 1, 28, 28))
    cnn_test_pred = cnn_fb(torch.tensor(test_predictors, dtype = torch.float32).to("cuda").reshape((n_test, 1, 28, 84))).reshape((n_test, 1, 28, 28))

    result_acc[exp, 8] = sum( (cnn(cnn_train_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (train_outcomes_label == 3) ) / n_train
    result_acc[exp, 9] = sum( (cnn(cnn_test_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (test_outcomes_label == 3) ) / n_test
    result_mse[exp, 8] = np.mean((cnn_train_pred.detach().to("cpu").numpy().reshape((n_train, 784)) - train_outcomes)**2)
    result_mse[exp, 9] = np.mean((cnn_test_pred.detach().to("cpu").numpy().reshape((n_test, 784)) - test_outcomes)**2)

    #####################################################################
    # CNN3
    cnn_fb = CNN_FB3().to("cuda")
    cnn_train_predictors = torch.tensor(train_predictors, dtype = torch.float32).reshape((n_train, 1, 28, 84)).to("cuda")
    cnn_train_outcomes = torch.tensor(train_outcomes, dtype = torch.float32).reshape((n_train, 784)).to("cuda")
    cnn_fb_loss = nn.functional.mse_loss
    cnn_fb_optim = torch.optim.Adam(cnn_fb.parameters(), lr = 1e-3)

    cnn_fb_dataset = Dataset(cnn_train_predictors, cnn_train_outcomes)
    cnn_fb_dataloader = torch.utils.data.DataLoader(cnn_fb_dataset, batch_size = 64, shuffle = True)

    num_epochs = 50
    cnn_fb.train()
    for epoch in range(num_epochs):
        for (idx, (X_batch, y_batch)) in enumerate(cnn_fb_dataloader):
            X_batch = X_batch.to("cuda")
            y_batch = y_batch.to("cuda")
            output = cnn_fb(X_batch)
            loss = cnn_fb_loss(output, y_batch)

            cnn_fb_optim.zero_grad()
            loss.backward()
            cnn_fb_optim.step()

    cnn_train_pred = cnn_fb(cnn_train_predictors).reshape((n_train, 1, 28, 28))
    cnn_test_pred = cnn_fb(torch.tensor(test_predictors, dtype = torch.float32).to("cuda").reshape((n_test, 1, 28, 84))).reshape((n_test, 1, 28, 28))

    result_acc[exp, 10] = sum( (cnn(cnn_train_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (train_outcomes_label == 3) ) / n_train
    result_acc[exp, 11] = sum( (cnn(cnn_test_pred).detach().to("cpu").numpy().reshape(-1) > 0.5) == (test_outcomes_label == 3) ) / n_test
    result_mse[exp, 10] = np.mean((cnn_train_pred.detach().to("cpu").numpy().reshape((n_train, 784)) - train_outcomes)**2)
    result_mse[exp, 11] = np.mean((cnn_test_pred.detach().to("cpu").numpy().reshape((n_test, 784)) - test_outcomes)**2)

    print(exp)

np.savetxt("result_mse.txt", result_mse)
np.savetxt("result_acc.txt", result_acc)

In [None]:
model = DNN_FB1()
sum([p.numel() for p in model.parameters() if p.requires_grad])

201552

In [None]:
model = DNN_FB2()
sum([p.numel() for p in model.parameters() if p.requires_grad])

935440

In [None]:
model = DNN_FB3()
sum([p.numel() for p in model.parameters() if p.requires_grad])

1001232

In [None]:
model = CNN_FB1()
sum([p.numel() for p in model.parameters() if p.requires_grad])

168496

In [None]:
model = CNN_FB2()
sum([p.numel() for p in model.parameters() if p.requires_grad])

98864

In [None]:
model = CNN_FB3()
sum([p.numel() for p in model.parameters() if p.requires_grad])

224528

In [None]:
model = BFNN(d = 2, L = 50, n = 1000, V = 28*28*3)
sum([p.numel() for p in model.parameters() if p.requires_grad])

106370

In [None]:
model = NeuralNetwork(50, 50)
sum([p.numel() for p in model.parameters() if p.requires_grad])

20250