In [23]:
import torch    #Import torch module 
import torchvision #Import torchvision
import torchvision.transforms as transforms #Import transforms for data transformation

In [24]:
from torch.utils.data import Dataset # Import Dataset class

In [25]:
class MyDataset(Dataset):                  # Create a custom Dataset Class

  def __init__(self, device='cuda'):
    train_set = torchvision.datasets.FashionMNIST(       # Download fashionMNIST Dataset
        root='./data',
        train=True,
        download=True,
        transform = transforms.Compose([
            transforms.ToTensor()
        ])
    )
    self.data=[]
    self.data2=[]
    self.label_0=[]
    self.label_1=[]
    for i in train_set:
      # Create sample by combining each image with numbers 0-9 so for each image we will have 10 samples.
      comb_0 = [i[0] for j in range(10)]    # Image
      comb_1 =  [torch.tensor(j, dtype=torch.float32) for j in range(10)] # Image label
      # Create label with two things(image class number, image class number +(0-9)) 
      label_00 = [i[1] for j in range(10)]    
      label_11 = [torch.tensor(i[1]+j) for j in range(10)]         
      for train_i,train_j in  zip(comb_0,comb_1):
        self.data.append(train_i)
        self.data2.append(train_j)
      for l,l1 in zip(label_00, label_11):
        self.label_0.append(l)
        self.label_1.append(l1)
      

  def __getitem__(self,index):
    return self.data[index],self.data2[index], self.label_0[index], self.label_1[index]

  def __len__(self):
    return len(self.data)
  


In [26]:
import torch.nn as nn
import torch.nn.functional as F
class Network(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
    self.fc2  = nn.Linear(in_features=120, out_features=60)
    self.out = nn.Linear(in_features=70, out_features=10)
  
  def forward(self, t, t1):
    x = t          # Take first input -> Image
    x1 = t1         # Take second input -> random number
    x = self.conv1(x)    # Apply convolutions, relu and max pool on image input
    x= F.relu(x)
    x= F.max_pool2d(x, kernel_size=2, stride=2)

    x = self.conv2(x)
    x= F.relu(x)
    x= F.max_pool2d(x, kernel_size=2, stride=2)

    x = x.reshape(-1,12*4*4)

    x = self.fc1(x)
    x = F.relu(x)

    x = self.fc2(x)         # dimension is 60 for image data(first input)
    #print(x1)
    x2 = F.one_hot(x1.long(),10)   # One hot encode second input so we get vector of dimension 10
    x = torch.cat((x, x2), dim=1)  # combine first output and seconf output to form a single vector of length 70

    x = self.out(x)            # apply a linear layer to convert 70 features to 10 features
    x = F.softmax(x, dim=0)    # apply softmax
    out = torch.argmax(x,dim=1) # retrieve the image prediction no.
    return x, out+x1            # return softmax output, image prediction no + random no 
network = Network()


In [27]:
def get_num_correct(preds, labels):
  return preds.argmax(dim=1).eq(labels).sum().item()

In [28]:
import torch.optim as optim

In [29]:
if torch.cuda.is_available():
    device_name = torch.device("cuda")
else:
    device_name = torch.device('cpu')
print("Using {}.".format(device_name))

Using cuda.


In [None]:
network = Network()
network.to(device_name)
my_data = MyDataset()
train_loader = torch.utils.data.DataLoader(    # Initialize DataLoader to have batch size 64 
    my_data, 
    batch_size = 64, 
    shuffle=True
)
optimizer = optim.Adam(network.parameters(), lr = 0.01)   # Initialize optimizer with learning rate 0.01
for epoch in range(50):
  total_loss=0
  total_correct=0
  total = 0
  for batch in train_loader:
    sample,sample2, label0, label1=batch
    #print(label0, label1, sample2)
    sample=sample.to(device_name)
    sample2=sample2.to(device_name)
    label0=label0.to(device_name)
    label1=label1.to(device_name)
    total = total + 1
    pred=network(sample, sample2)
    loss = F.cross_entropy(pred[0], label0)  # Use loss to compare only image prediction . On basis of that we will change the gradient
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
    total_correct += get_num_correct(pred[0], label0)
  print("total_loss: ", total_loss/total, "total_correct: ",total_correct)


total_loss:  2.304561171849569 total_correct:  62922
total_loss:  2.3041157370758056 total_correct:  62722
total_loss:  2.298062359873454 total_correct:  67202
total_loss:  2.2931556158447264 total_correct:  69152
total_loss:  2.2928664020284018 total_correct:  69763
total_loss:  2.302566183827718 total_correct:  62406
total_loss:  2.3113403741200766 total_correct:  59008
total_loss:  2.3077816186269122 total_correct:  59860
total_loss:  2.3069199922943113 total_correct:  60141
total_loss:  2.308131017735799 total_correct:  59625
total_loss:  2.309306469599406 total_correct:  59937
total_loss:  2.3080953948465983 total_correct:  60805
total_loss:  2.3157895592753093 total_correct:  60643
total_loss:  2.3204255466715495 total_correct:  58774
total_loss:  2.317178121210734 total_correct:  56019
total_loss:  2.307277942682902 total_correct:  60593
total_loss:  2.3074359098815918 total_correct:  63477
total_loss:  2.3088055672709147 total_correct:  61244
total_loss:  2.3096833536020913 tot

In [None]:
()