In [None]:
!sudo du -sh /content/drive/MyDrive/ModelNet10_jpg

^C


In [1]:
import os
import multiprocessing
import numpy as np
import cv2
import time
import shutil

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms

In [2]:
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  

device = torch.device(dev)
print(device)

cuda:0


In [3]:
!nvidia-smi

Sun Apr 25 04:01:46 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    27W / 250W |      2MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [14]:
#https://machinelearningmastery.com/pytorch-tutorial-develop-deep-learning-models/
class Model_3d(nn.Module):
    '''
    A model to use on 3d objects
    '''
    def __init__(self, fc_nodes, no_views, no_classes):
      super(Model_3d, self).__init__()
      self.resnet = torchvision.models.resnet50(pretrained=True) #https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

      num_features = self.resnet.fc.out_features

      self.fcs = nn.ModuleList()

      for idx in range(no_views):
        self.fcs.append(nn.Linear(num_features, fc_nodes))

      self.final_dense = nn.Linear(fc_nodes * no_views, no_classes)
    
    def forward(self, x):
      features = []

      for i, fc in enumerate(self.fcs):
        sample = x[:, i, ...]
        r_out = self.resnet(sample)

        fc_out = fc(r_out)
        features.append(fc_out)
      
      stack = torch.stack(features, 1)

      dense_out = self.final_dense(stack.view([x.shape[0], -1]))

      return torch.nn.functional.softmax(dense_out)

In [None]:
#https://discuss.pytorch.org/t/best-way-to-load-a-lot-of-training-data/80847
class Data_3d(torch.utils.data.Dataset):
   def __init__(self, root, views, transform=None, train=True, ram_cache = True):
        self.root = root
        self.classes = os.listdir(root) # take all files in the root directory

        self.sample_path_list = []
        self.labels = []

        self.class_names = []

        if train:
          ds_folder_name = 'train'
        else:
          ds_folder_name = 'test'

        for i, this_class in enumerate(self.classes):
          
          class_path = os.path.join(self.root, this_class, ds_folder_name)
          
          this_class_samples = os.listdir(class_path)
          for sample in this_class_samples:
            self.sample_path_list.append(os.path.join(class_path,sample))

          this_class_labels = [i] * len(this_class_samples)
          self.labels.extend(this_class_labels)

          self.class_names.append(this_class)
        
        self.jpg = TurboJPEG()#https://learnopencv.com/efficient-image-loading/
        self.views = views

        self.transform = transform

        if ram_cache:
          self.cache = []
          for idx in range(len(self.sample_path_list)):
            self.cache.append(self.get_sample(idx))
        


   def __len__(self):
        return len(self.labels)
   def __getitem__(self, idx):
        if ram_cache:
          return self.cache[idx], self.labels[idx]
        else:
          sample = self.get_sample(idx)
          label = self.labels[idx]
          return sample, label
   def get_sample(self, idx):
        sample_full_path = self.sample_path_list[idx]
        this_example_name = sample_full_path.split(os.sep)[-1]

        sample_list = []
        for view in self.views:
            view_name = view + '_' + this_example_name + '.jpg'
            this_view_path = os.path.join(sample_full_path,  view_name)
            with open(this_view_path, 'rb') as in_file:
              rgb_array = self.jpg.decode(in_file.read(), pixel_format=0)
            #chan_first = rgb_array.transpose([2, 0, 1])
            if self.transform:
              sample = self.transform(rgb_array)
              sample_list.append(sample)
            else:
              sample_list.append(rgb_array)
        sample = torch.stack(sample_list, axis=0)
        
        
        
        return sample

    
        


In [5]:
class Date_3d_Cached(torch.utils.data.Dataset):
    def __init__(self, x, y):
      self.x = torch.from_numpy(x)
      self.y = torch.from_numpy(y)
    
    def __len__(self):
      return self.x.shape[0]

    def __getitem__(self, idx):
      return self.x[idx] /255.0, self.y[idx]

In [6]:
views = ['bottom', 'side_1', 'side_2', 'side_3', 'side_4', 'top']
dir = '/content/drive/MyDrive/ModelNet10_jpg'
#training_data = Data_3d(dir, views, transform=transform)

In [7]:
x = np.load('/content/drive/MyDrive/Cache/x.npy')
y = np.load('/content/drive/MyDrive/Cache/y.npy')

training_data = Date_3d_Cached(x, y)


In [8]:
print(x.max())

255


In [9]:
#90/10 train val split
train_len = int(len(training_data) * .9)
val_len = len(training_data) - train_len

In [10]:
train_set, val_set = torch.utils.data.random_split(training_data, [train_len, val_len],torch.Generator().manual_seed(42))

In [11]:
batch_size = 16

In [28]:
model = Model_3d(1024, len(views), 10)
model.to(device)
print(device)
training_loader = torch.utils.data.DataLoader(train_set, batch_size= batch_size, shuffle=True, num_workers=os.cpu_count())
val_loader = torch.utils.data.DataLoader(val_set, batch_size= batch_size, shuffle=True, num_workers=os.cpu_count())

cuda:0


In [16]:
print(os.cpu_count())
print(train_len)
print(val_len)

2
3591
400


In [17]:
#https://medium.com/analytics-vidhya/saving-and-loading-your-model-to-resume-training-in-pytorch-cb687352fa61
def save_ckp(state, model_path):
    torch.save(state, model_path)

In [29]:
#https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

low_val_loss = 30000000
best_acc = -1
epoch = 0

training_loss_list = []
training_acc_list = []

val_loss_list = []
val_acc_list = []

for epoch in range(1, 301):
    is_best = False
    running_loss = 0.0
    correct = 0
    total = 0

    model.train()

    t0 = time.time()
    for i, data in enumerate(training_loader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        correct += (outputs.argmax(1) == labels).float().sum() #https://stackoverflow.com/questions/51503851/calculate-the-accuracy-every-epoch-in-pytorch
        total += labels.shape[0]
        running_loss += loss.item()
    
    print('{} seconds'.format(time.time() - t0))
    print('loss', running_loss)
    training_loss_list.append(running_loss)
    running_loss = 0.0

    accuracy = 100 * correct / total
    print("Accuracy = {}".format(accuracy))
    training_acc_list.append(torch.IntTensor.item(accuracy))
    correct = 0
    total= 0

    running_loss = 0.0
    correct = 0
    total = 0

    optimizer.zero_grad()
    model.eval()
    t0 = time.time()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)            


            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            correct += (outputs.argmax(1) == labels).float().sum() #https://stackoverflow.com/questions/51503851/calculate-the-accuracy-every-epoch-in-pytorch
            total += labels.shape[0]
            running_loss += loss.item()

    print('{} seconds'.format(time.time() - t0))
    print('val loss', running_loss)
    val_loss_list.append(running_loss)
    

    accuracy = 100 * correct / total
    print(" Val Accuracy = {}".format(accuracy))
    val_acc_list.append(torch.IntTensor.item(accuracy))

    model_name = 'ep_' + str(epoch) + '_loss_' + str(running_loss) + '_acc_' + str(accuracy) + '.pt'
    full_model_path = os.path.join('/content/drive/MyDrive/Models_R50', model_name)
    checkpoint = {
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    if running_loss < low_val_loss:
      if os.path.exists(val_loss_path):
        os.remove(val_loss_path)
      val_loss_path = full_model_path.replace('.pt', 'val_loss_best.pt')
      save_ckp(checkpoint, val_loss_path)
      low_val_loss = running_loss
      print('new low loss')

    elif accuracy > best_acc:
      if os.path.exists(acc_path):
        os.remove(acc_path)
      acc_path = full_model_path.replace('.pt', 'acc_best.pt')
      save_ckp(checkpoint, acc_path)
      best_acc = accuracy
      print('new best acc')
    full_model_path = os.path.join('/content/drive/MyDrive/Models_R50', 'last.pt')
    save_ckp(checkpoint, full_model_path)
        

    

    correct = 0
    total= 0
    running_loss = 0.0
print('Finished Training')



124.71062994003296 seconds
loss 391.8428883552551
Accuracy = 73.73990631103516
4.888025522232056 seconds
val loss 40.08485996723175
 Val Accuracy = 87.0
new low loss
new best acc
124.98024606704712 seconds
loss 357.98404717445374
Accuracy = 87.83069610595703
4.893049716949463 seconds
val loss 39.61577796936035
 Val Accuracy = 87.75
new low loss
new best acc
124.94101071357727 seconds
loss 353.59936559200287
Accuracy = 89.36229705810547
4.871597766876221 seconds
val loss 39.45723581314087
 Val Accuracy = 89.0
new low loss
new best acc
125.00335645675659 seconds
loss 350.1404342651367
Accuracy = 90.89390563964844
4.861590147018433 seconds
val loss 38.68194937705994
 Val Accuracy = 91.75
new low loss
new best acc
125.07532358169556 seconds
loss 342.03172397613525
Accuracy = 95.07101440429688
4.851800441741943 seconds
val loss 38.15747821331024
 Val Accuracy = 94.0
new low loss
new best acc
125.19798827171326 seconds
loss 336.1336096525192
Accuracy = 97.41019439697266
4.849382162094116 sec

In [30]:
import pandas as pd
     
# dictionary of lists  
this_dict = {'training loss': training_loss_list, 'training acc': training_acc_list, 'val loss': val_loss_list, 'val acc':val_acc_list}  
       
df = pd.DataFrame(this_dict) 
    
# saving the dataframe 
df.to_csv('/content/drive/MyDrive/Models_R50/logs.csv') 