In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.optim import Adam
from tqdm import tqdm
from torchvision.transforms import Lambda
# torch.nn.Module
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)


cuda


(آ) لودکردن دیتاست

In [2]:

class MNISTDataLoader:
    def __init__(self , shuffle=True):
        transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,)), Lambda(lambda x: torch.flatten(x))])
        self.train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
        self.test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
        self.shuffle = shuffle

    def train_loader(self,batch_size ):
        return DataLoader(self.train_dataset, batch_size=batch_size, shuffle=self.shuffle)

    def test_loader(self,batch_size):
        return DataLoader(self.test_dataset, batch_size=batch_size, shuffle=False)




In [3]:

data_loader = MNISTDataLoader()
train_data= data_loader.train_loader(5000)
test_data= data_loader.test_loader(1000)

# print(len(train_data))
# print(len(test_data))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 148572765.44it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 117951016.38it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 18692962.45it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 17590515.94it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






(ب)تولید داده

In [4]:
# make a random number that is not the excluded_number
def random_num(excluded_number):
    while True:
        number = torch.randint(low=0,high= 10, size=(1,)).item()
        if number != excluded_number:
            return number

# genarate wrong label
def Wrong_labels(y):
    y_neg_i = []
    for _, y_i in enumerate(y):
        num = random_num(y_i)
        y_neg_i.append(num)
    y_neg = torch.tensor(y_neg_i, device=device)
    return y_neg

# add y to the first 10 pixcel of each image
def combine_x_y(x, y, classes=10):
    x_new = x.clone()
    # add one hot vector to the first of each image
    x_new[:, :classes] *= 0.0
    x_new[range(x.shape[0]), y] = x.max()
    return x_new



(ج)پیاده سازی شبکه

In [5]:
class Layer(nn.Linear):
    def __init__(self, in_features, out_features, lr=0.03 , threshold = 2.0 ,num_epochs = 1000,
                 bias=True, device=None, dtype=None):
        super().__init__(in_features, out_features, bias, device, dtype)
        self.relu = torch.nn.ReLU()
        self.opt = Adam(self.parameters(), lr = lr)
        self.threshold = threshold
        self.num_epochs = num_epochs

    def forward(self, x):
        x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)
        Linear_relu_out = self.relu(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0))
        return Linear_relu_out

    def train(self, x_pos, x_neg):
        for i in tqdm(range(self.num_epochs)):
            # compute the mean squared output of the model for
            # positive samples (x_pos) and negative samples (x_neg)
            # print('x_pos:',x_pos.shape)
            g_pos = self.forward(x_pos).pow(2).mean(1)
            g_neg = self.forward(x_neg).pow(2).mean(1)
            # print('g_pos:',g_pos.shape)
            # This loss pushes pos (neg) samples to
            # values larger (smaller) than the self.threshold
            loss = torch.log1p( torch.exp(torch.cat([
                -g_pos + self.threshold,
                g_neg - self.threshold]))).mean()

            # clear the gradients of all optimized torch.Tensors
            self.opt.zero_grad()
            # gradients of the loss with respect to the model parameters
            loss.backward()
            #  update the parameters of the model based on the gradients
            self.opt.step()
            # detach() function is used to prevent further computation of gradients
            next_x_pos = self.forward(x_pos).detach()
            next_x_neg = self.forward(x_neg).detach()

        return next_x_pos, next_x_neg


In [6]:
class Net():
    def __init__(self, dims):
        self.layers = []
        for indx in range(len(dims) - 1):
            self.layers += [Layer(dims[indx ], dims[indx + 1]).cuda()]

    def predict(self, x):
        goodness_per_label = []
        for label in range(10):
            h = combine_x_y(x, label)
            goodness = []
            for layer in self.layers:
                h = layer(h)
                goodness += [h.pow(2).mean(1)]
            goodness_per_label += [sum(goodness).unsqueeze(1)]
        goodness_per_label = torch.cat(goodness_per_label, 1)
        return goodness_per_label.argmax(1)

    def train(self, x_pos, x_neg):
        h_pos, h_neg = x_pos, x_neg
        for i, layer in enumerate(self.layers):
            print('training layer', i, '...')
            h_pos, h_neg = layer.train(h_pos, h_neg)

(د)گزارش نتایج

In [8]:
torch.manual_seed(2)
net = Net([784, 500, 500])
x_train = iter(train_data)
for x_data in x_train:
    x_t , y_t = x_data
    x_t , y_t = x_t.to(device) , y_t.to(device)
    # build the data with labels inside
    x_pos = combine_x_y(x_t, y_t)
    y_neg = Wrong_labels(y_t)
    x_neg = combine_x_y(x_t, y_neg)
    # train the network
    net.train(x_pos, x_neg)

training layer 0 ...


100%|██████████| 1000/1000 [00:11<00:00, 88.93it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:05<00:00, 168.08it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 121.21it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:05<00:00, 167.43it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 119.77it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 166.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 118.14it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 164.63it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 116.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 163.56it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 114.58it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 161.77it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 112.36it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 159.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:09<00:00, 110.43it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 159.86it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 112.53it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 162.49it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 113.84it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 162.93it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 113.90it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 163.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:08<00:00, 113.12it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:06<00:00, 162.29it/s]


In [9]:
x_train = iter(train_data)
First = True
for x_data in x_train:
    x_t , y_t = x_data
    x_t , y_t = x_t.to(device) , y_t.to(device)
    if First:
        accuracy = (net.predict(x_t) == y_t)
        First = False
    else:
        accuracy = torch.cat((accuracy , (net.predict(x_t) == y_t)), dim=0)

accuracy = accuracy.float().mean().item()
print('train accuracy:',accuracy)

train accuracy: 0.9444000124931335


In [10]:

x_test = iter(test_data)
First = True
for x_data in x_test:
    x_t , y_t = x_data
    x_t , y_t = x_t.to(device) , y_t.to(device)
    if First:
        accuracy = (net.predict(x_t) == y_t)
        First = False
    else:
        accuracy = torch.cat((accuracy , (net.predict(x_t) == y_t)), dim=0)

accuracy = accuracy.float().mean().item()
print('test accuracy:',accuracy)

test accuracy: 0.9401999711990356
