In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
root_dir = '/content/drive/MyDrive/Models_exp2.9'

In [3]:
import os
import multiprocessing
import numpy as np
import cv2
import time
import shutil

import torch

import keras
from tensorflow.keras.applications import EfficientNetB0

In [4]:
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  

device = torch.device(dev)
print(device)

cuda:0


In [5]:
!nvidia-smi

Fri Apr 30 14:53:17 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    26W / 250W |      2MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
#https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/
#efficient nets are a lot easier to work with in keras
from keras import layers

def build_model(num_classes, IMG_SIZE):
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    eff_model = EfficientNetB0(include_top=False, input_tensor=inputs, weights="imagenet")

    # Freeze the pretrained weights
    eff_model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(eff_model.output)
    x = layers.BatchNormalization()(x)

    # top_dropout_rate = 0.2
    # x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x)

    # Compile
    model = keras.Model(inputs, outputs, name="EfficientNet")
    optimizer = keras.optimizers.SGD(learning_rate=.001, momentum=0.9)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
    )
    return model

In [7]:
class Date_3d_Cached(torch.utils.data.Dataset):
    def __init__(self, x, y):
      self.x = torch.from_numpy(x)
      self.y = torch.from_numpy(y)
    
    def __len__(self):
      return self.x.shape[0]

    def __getitem__(self, idx):
      x = self.x[idx]
      return x, self.y[idx]

In [8]:
views = ['bottom', 'side_1', 'side_2', 'side_3', 'side_4', 'top', 'top_1', 'top_2', 'top_3', 'top_4', 'bot_1', 'bot_2', 'bot_3', 'bot_4']
x = np.load('/content/drive/MyDrive/Cache/x_compressed.npz')['arr_0']
y = np.load('/content/drive/MyDrive/Cache/y.npy')

In [9]:
reshaped_x = x.transpose([0, 1, 3, 4, 2])

In [10]:
one_hot_y = keras.utils.to_categorical(y, 10)

In [11]:
training_data = Date_3d_Cached(reshaped_x, one_hot_y)
#90/10 train val split
train_len = int(len(training_data) * .9)
val_len = len(training_data) - train_len

In [12]:
train_set, val_set = torch.utils.data.random_split(training_data, [train_len, val_len],torch.Generator().manual_seed(42))

In [13]:
print(os.cpu_count())
print(train_len)
print(val_len)

4
3591
400


In [14]:
x, y = train_set[:]
x = x.numpy()
y = y.numpy()

In [15]:
val_x, val_y = val_set[:]
val_x = val_x.numpy()
val_y = val_y.numpy()


In [16]:
del train_set
del val_set

del training_data

In [17]:
del one_hot_y
del reshaped_x

In [21]:
#https://stackoverflow.com/questions/60567679/save-keras-model-weights-directly-to-bytes-memory
class Save_Best_Weights(keras.callbacks.Callback):   
  def __init__(self, min=True, metric='val_loss'):
      super(Save_Best_Weights, self).__init__()
      self.best = np.Inf
      self.metric = metric
      self.min = min
      if not min:
        self.best = self.best * - 1


  def on_epoch_end(self, epoch, logs=None):
      current_metric = logs.get(self.metric)
      if self.min:
        if np.less(current_metric, self.best):
            self.best = current_metric            
            self.best_weights = model.get_weights() 
            self.name = 'ep_' + str(epoch) + '_' + self.metric + "_" + str(self.best)
            print('saved new ' + self.metric)
      else:
        if np.greater(current_metric, self.best):
            self.best = current_metric            
            self.best_weights = model.get_weights() 
            self.name = 'ep_' + str(epoch) + '_' + self.metric + "_" + str(self.best)
            print('saved new ' + self.metric) 


In [23]:
batch_size = 32
epochs = 100

if not os.path.exists(root_dir):
    os.makedirs(root_dir)


for idx, view in enumerate(views):
    this_dir = os.path.join(root_dir, view + '_' + str(idx))
    if not os.path.exists(this_dir):
      os.makedirs(this_dir)
    model = build_model(10, 224)
    print(view)
    loss_cp = Save_Best_Weights(
    )

    acc_cp = Save_Best_Weights(
        metric="val_accuracy",
        min=False
    )

    logger = keras.callbacks.CSVLogger(os.path.join(this_dir, 'logs.csv'))
      
    
    batch_x = x[:, idx, ...]
    batch_val_x = val_x[:, idx, ...]

    model.fit(batch_x, y, batch_size, epochs, callbacks=[loss_cp, acc_cp, logger], validation_data=(batch_val_x, val_y))


    #loss saving
    model.set_weights(loss_cp.best_weights)
    model.save(os.path.join(this_dir, loss_cp.name))

    #acc saving
    model.set_weights(acc_cp.best_weights)
    model.save(os.path.join(this_dir, acc_cp.name))
          

      

print('Finished Training')

bottom
Epoch 1/100
saved new val_loss
saved new val_accuracy
Epoch 2/100
saved new val_loss
saved new val_accuracy
Epoch 3/100
saved new val_loss
Epoch 4/100
saved new val_loss
saved new val_accuracy
Epoch 5/100
saved new val_loss
Epoch 6/100
saved new val_loss
saved new val_accuracy
Epoch 7/100
saved new val_loss
Epoch 8/100
saved new val_loss
Epoch 9/100
Epoch 10/100
saved new val_loss
saved new val_accuracy
Epoch 11/100
saved new val_accuracy
Epoch 12/100
saved new val_loss
Epoch 13/100
Epoch 14/100
Epoch 15/100
saved new val_accuracy
Epoch 16/100
Epoch 17/100
saved new val_loss
saved new val_accuracy
Epoch 18/100
saved new val_accuracy
Epoch 19/100
saved new val_loss
Epoch 20/100
saved new val_loss
Epoch 21/100
saved new val_accuracy
Epoch 22/100
Epoch 23/100
saved new val_loss
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
saved new va

In [None]:
import pandas as pd
     
# dictionary of lists  
this_dict = {'training loss': training_loss_list, 'training acc': training_acc_list, 'val loss': val_loss_list, 'val acc':val_acc_list}  
       
df = pd.DataFrame(this_dict) 
    
# saving the dataframe 
df.to_csv(os.path.join(root_dir, 'logs.csv'))

In [None]:
print('saved csv')

In [None]:
print(inputs.shape)

In [None]:
cp = torch.load(os.path.join(root_dir, 'last.pt'))
model.load_state_dict(cp['state_dict'])
optimizer.load_state_dict(cp['optimizer'])
last_epoch_done = cp['epoch'] - 1

In [None]:
#continue training


if not os.path.exists(root_dir):
    os.makedirs(root_dir)


low_val_loss = 30000000
best_acc = -1
epoch = 0

val_loss_path = None
acc_path = None
#https://stackoverflow.com/questions/8078330/csv-writing-within-loop
import csv
with open(os.path.join(root_dir, 'logs2.csv'), 'w') as file_csv:
  writer=csv.writer(file_csv, delimiter=',',lineterminator='\n',)
  writer.writerow(['epoch', 'loss', 'acc', 'val_loss', 'val_acc'])


  for epoch in range(last_epoch_done, 101):
      row = [epoch]
      is_best = False
      running_loss = 0.0
      correct = 0
      total = 0

      model.train()

      t0 = time.time()
      for i, data in enumerate(training_loader):
          # get the inputs; data is a list of [inputs, labels]
          inputs, labels = data

          # for this_view in range(inputs.shape[1]):
          #   if np.random.randint(0, 4) == 0:
          #     inputs[:, this_view, ...] = 1.0

          inputs = inputs.to(device)
          labels = labels.to(device)

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          # print statistics
          correct += (outputs.argmax(1) == labels).float().sum() #https://stackoverflow.com/questions/51503851/calculate-the-accuracy-every-epoch-in-pytorch
          total += labels.shape[0]
          running_loss += loss.item()

      print('epoch', epoch)
      
      print('{} seconds'.format(time.time() - t0))
      print('loss', running_loss)
      row.append(running_loss)
      running_loss = 0.0

      accuracy = 100 * correct / total
      print("Accuracy = {}".format(accuracy))
      row.append(torch.IntTensor.item(accuracy))
      correct = 0
      total= 0

      running_loss = 0.0
      correct = 0
      total = 0

      optimizer.zero_grad()
      model.eval()
      t0 = time.time()
      with torch.no_grad():
          for i, data in enumerate(val_loader):
              
              # get the inputs; data is a list of [inputs, labels]
              inputs, labels = data
              inputs = inputs.to(device)
              labels = labels.to(device)            


              # forward + backward + optimize
              outputs = model(inputs)
              loss = criterion(outputs, labels)

              correct += (outputs.argmax(1) == labels).float().sum() #https://stackoverflow.com/questions/51503851/calculate-the-accuracy-every-epoch-in-pytorch
              total += labels.shape[0]
              running_loss += loss.item()

      print('{} seconds'.format(time.time() - t0))
      print('val loss', running_loss)
      row.append(running_loss)
      

      accuracy = 100 * correct / total
      print(" Val Accuracy = {}".format(accuracy))
      row.append(torch.IntTensor.item(accuracy))

      model_name = 'ep_' + str(epoch) + '_loss_' + str(running_loss) + '_acc_' + str(accuracy) + '.pt'
      full_model_path = os.path.join(root_dir, model_name)
      checkpoint = {
          'epoch': epoch + 1,
          'state_dict': model.state_dict(),
          'optimizer': optimizer.state_dict()
      }
      if running_loss < low_val_loss:
        if val_loss_path and os.path.exists(val_loss_path):
          open(val_loss_path, 'w').close() #overwrite and make the file blank instead - ref: https://stackoverflow.com/a/4914288/3553367
          os.remove(val_loss_path)
        val_loss_path = full_model_path.replace('.pt', 'val_loss_best.pt')
        save_ckp(model, val_loss_path)
        low_val_loss = running_loss
        print('new low loss')

      if accuracy > best_acc:
        if acc_path and os.path.exists(acc_path):
          open(acc_path, 'w').close() #overwrite and make the file blank instead - ref: https://stackoverflow.com/a/4914288/3553367
          os.remove(acc_path)
        acc_path = full_model_path.replace('.pt', 'acc_best.pt')
        save_ckp(model, acc_path)
        best_acc = accuracy
        print('new best acc')
      full_model_path = os.path.join(root_dir, 'last.pt')
      save_ckp(checkpoint, full_model_path)
          

      

      correct = 0
      total= 0
      running_loss = 0.0
      writer.writerow(row)
  print('Finished Training')