In [1]:
!nvidia-smi

Mon Jul 11 10:41:48 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:AF:00.0 Off |                  N/A |
| 90%   70C    P2   307W / 350W |  16238MiB / 24576MiB |     95%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

# Mounting GDrive

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

# Initialise LRCN

In [3]:
!pip install torchinfo ipywidgets

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com




In [4]:
import os
import h5py
import numpy as np
from tqdm.auto import tqdm
import random
import matplotlib.pyplot as plt
import matplotlib.animation as animation

import torch
import torch.optim as optim
from torchinfo import summary
from torch.utils.data import DataLoader

In [5]:
# GITHUB PROJECT FOLDER
# import sys

# project_path = '/content/drive/MyDrive/diss/3D-GAN'
# src_path = f'{project_path}/3D-GAN'
# sys.path.append(src_path) # adding path to sys
from src.LRCN import LRCN, weights_init
# from src.LRCN_batch import LRCN, weights_init
# from LRCN_no_LSTM import LRCN, weights_init

In [6]:
data_path = os.path.join('..', 'data', 'shapenet-lamp')

In [7]:
# # importing from h5 file
# f = h5py.File(f'{data_path}/{data_filename}.h5', 'r')
# dataset = torch.from_numpy(np.array(f[list(f.keys())[0]]).reshape(-1, 1, input_dim, input_dim, input_dim)).to(torch.float)
# print('dataset shape:', dataset.shape)

In [8]:
### HYPERPARAMETERS ###
# LRCN
input_dim = 64
output_dim = 128
c = 5

# OPTIMIZER
lr = 1e-4
beta1 = 0.5
beta2 = 0.999

num_epochs = 100
num_models = 1000
batch_size = 4 # update net with every batch size
workers = 0
run_parallel = False

print('batch size:', batch_size)

# Set random seed for reproducibility
manualSeed = 42
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

batch size: 4
Random Seed:  42


<torch._C.Generator at 0x7fc6e8b40a50>

# Importing data

In [9]:
# importing from binvox files
from src import binvox_rw


# input models of resolution 64
input_data_filename = f'shapenet-lamp-binvox-{input_dim}'
binvox_files = os.listdir(os.path.join(data_path, input_data_filename))
binvox_files.sort()
binvox_files = binvox_files[:num_models]
print('number of binvox files:', len(binvox_files))
print(binvox_files[0], binvox_files[-1])
lst_binvox_input = []
for file in tqdm(binvox_files):
  with open(os.path.join(data_path, input_data_filename, file), 'rb') as f:
    m = binvox_rw.read_as_3d_array(f).data
    lst_binvox_input.append(m)

target_data_filename = f'shapenet-lamp-binvox-{output_dim}'
# binvox_files = os.listdir(os.path.join(data_path, data_filename))[:num_models]
# print('number of binvox files:', len(binvox_files))
# print(binvox_files[0], binvox_files[-1])
# use the same filenames for input and target models
lst_binvox_target = []
for file in tqdm(binvox_files):
  with open(os.path.join(data_path, target_data_filename, file), 'rb') as f:
    m = binvox_rw.read_as_3d_array(f).data
    lst_binvox_target.append(m)

number of binvox files: 1000
model_0.binvox model_1898.binvox


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

In [10]:
arr_input = np.array(lst_binvox_input)
arr_target = np.array(lst_binvox_target)
arr_input = arr_input.reshape(-1, 1, input_dim, input_dim, input_dim)
arr_target = arr_target.reshape(-1, 1, output_dim, output_dim, output_dim)
input_tensors = torch.from_numpy(arr_input).to(torch.float)
target_tensors = torch.from_numpy(arr_target).to(torch.float)
print(input_tensors.shape)
print(target_tensors.shape)

torch.Size([1000, 1, 64, 64, 64])
torch.Size([1000, 1, 128, 128, 128])


In [11]:
# for i in range(1, 10):
#   input_sample = arr_input[i][0]
#   target_sample = arr_target[i][0]

#   print('input', i)
#   ax = plt.figure(figsize=(10, 10)).add_subplot(projection='3d')
#   ax.voxels(input_sample)
#   plt.show()

#   print('target', i)
#   ax = plt.figure(figsize=(10, 10)).add_subplot(projection='3d')
#   ax.voxels(target_sample)
#   plt.show()

In [12]:
num_train = int(num_models * 0.8)
train_dataset = torch.utils.data.TensorDataset(input_tensors[:num_train], target_tensors[:num_train])
val_dataset = torch.utils.data.TensorDataset(input_tensors[num_train:], target_tensors[num_train:])

train_dataloader = DataLoader(
    train_dataset, 
    batch_size=batch_size,
    # shuffle=True, 
    shuffle=False,
    num_workers=workers,
)

val_dataloader = DataLoader(
    val_dataset, 
    batch_size=batch_size,
    # shuffle=True, 
    shuffle=False,
    num_workers=workers,
)

print('number of train batches:', len(train_dataloader))
print('number of validation batches:', len(val_dataloader))

number of train batches: 200
number of validation batches: 50


# Initialise LRCN

In [13]:
input_shape = (batch_size, 1, input_dim, input_dim, input_dim)
print('input shape:', input_shape)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device:', device)

net = LRCN(input_dim=input_dim, kernel_size=3, c=c,
           output_dim=output_dim, hidden_size=1000)
net = net.to(device)
opt = optim.Adam(net.parameters(), lr=lr, betas=(beta1, beta2))
# criterion = torch.nn.BCELoss()
criterion = torch.nn.L1Loss()
# criterion = torch.nn.MSELoss()
net.apply(weights_init)
# print("\n\nNetwork summary\n\n")
# summary(net, input_shape)

input shape: (4, 1, 64, 64, 64)
device: cuda


LRCN(
  (in_conv1): Sequential(
    (0): Conv3d(1, 64, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1), bias=False)
    (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2, inplace=True)
  )
  (in_conv2): Sequential(
    (0): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1), bias=False)
    (1): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2, inplace=True)
  )
  (in_conv3): Sequential(
    (0): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1), bias=False)
    (1): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2, inplace=True)
  )
  (fc1): Linear(in_features=16384, out_features=200, bias=True)
  (lstm): LSTM(200, 1000, batch_first=True)
  (linear): Linear(in_features=1000, out_features=262144, bias=True)
  (out_co

In [14]:
weights_path = 'weights/LRCN'
os.makedirs(weights_path, exist_ok=True)

In [15]:
start_epoch = 55
if start_epoch > 0:
    weights_path = 'weights/LRCN'
    file_net = f'{weights_path}/net_r{input_dim}_r{output_dim}_e{start_epoch}_weights.pth'
    net.load_state_dict(torch.load(file_net))
    print('Loaded network weights successfully')
    num_epochs = num_epochs - start_epoch

Loaded network weights successfully


In [16]:
# Training Loop
print("Starting Training Loop...")
lst_loss = []
lst_val_loss = []
# For each epoch
for epoch in tqdm(range(start_epoch, start_epoch+num_epochs)):
    # For each batch in the dataloader
    for i, (input_data, target_data) in enumerate(train_dataloader): # batch
        input_data = input_data.to(device)
        opt.zero_grad() # make sure no grad recorded on opt before the start of epoch
        # get inference
        output = net(input_data)
        # Calculate loss
        err = criterion(output.cpu(), target_data)
        lst_loss.append(err.item())
        err.backward() # err grad to opt
        opt.step()
        opt.zero_grad()
        
        # validation
        val_input, val_target = next(iter(val_dataloader))
        val_input = val_input.to(device)
        with torch.no_grad():
            val_output = net(val_input)
            val_err = criterion(val_output.cpu(), val_target)
            lst_val_loss.append(val_err)
            
        # Output training stats at the end of epoch
        if i % 20 == 0:
            print(f'[{epoch}/{num_epochs}] [{i}/{len(train_dataloader)}]\tLoss: {round(err.item(), 4)}\tVal loss: {round(val_err.item(), 4)}')
    
    if epoch % 5 == 0 and epoch != 0:
      # plot_convergence(G_losses, D_real_losses, D_fake_losses, real_accuracies, fake_accuracies)
      # save network weights
      net_filename = os.path.join(weights_path, f'net_r{input_dim}_r{output_dim}_e{epoch}_weights.pth')
      torch.save(net.state_dict(), net_filename)
      print('saved network weights', net_filename)


# torch.save(net.state_dict(), f'{weights_path}/net_e{epoch}_r{input_dim}_r_{output_dim}_weights.pth')

Starting Training Loop...


  0%|          | 0/45 [00:00<?, ?it/s]

RuntimeError: CUDA out of memory. Tried to allocate 1000.00 MiB (GPU 0; 23.70 GiB total capacity; 4.88 GiB already allocated; 128.44 MiB free; 5.90 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# num_epochs = 100

# # Training Loop
# print("Starting Training Loop...")
# lst_loss = []
# lst_val_loss = []
# # For each epoch
# for epoch in tqdm(range(num_epochs)):
#     # For each batch in the dataloader
#     for i, input_data_all in enumerate(train_input_dataloader, 0): # batch
#         lst_batch_loss = []
#         target_data_all = next(iter(train_target_dataloader))
#         val_input_data = next(iter(val_input_dataloader)).to(device)
#         val_target_data = next(iter(val_target_dataloader))
        
#         # mini batch
#         input_data_split = torch.split(input_data_all, mini_batch_size)
#         target_data_split = torch.split(target_data_all, mini_batch_size)

#         opt.zero_grad()

#         for j in range(len(input_data_split)):
#             input_data = input_data_split[j].to(device)
#             target_data = target_data_split[j]
#             output_data = net(input_data)
#             # Calculate loss
#             err = criterion(output_data.cpu(), target_data) / len(input_data_split)
#             lst_batch_loss.append(err.item())
#             err.backward() # err grad to opt
#             if j==len(input_data_split)-1: # if the end of mini-batch iteration (one batch done)
#                 opt.step()  # update the weights only after accumulating k mini batches
#                 opt.zero_grad()  # reset gradients for accumulation for the next large batch
#         lst_loss.append(np.sum(lst_batch_loss)) # average of losses in one batch
        
#         # validation
#         with torch.no_grad():
#           val_output_data = net(val_input_data)
#         val_err = criterion(val_output_data.cpu(), val_target_data)
#         lst_val_loss.append(val_err)
            
#         # Output training stats
#         if i % 5 == 0:
#             print(f'[{epoch}/{num_epochs}][{i}/{len(train_input_dataloader)}]\tLoss: {round(err.item(), 4)}\tVal loss: {round(val_err.item(), 4)}')
            
#         # iters += 1
    
#     if epoch % 5 == 0 and epoch != 0:
#       # plot_convergence(G_losses, D_real_losses, D_fake_losses, real_accuracies, fake_accuracies)
#       # save network weights
#       net_filename = os.path.join(weights_path, f'net_r{input_dim}_r{output_dim}_e{epoch}_weights.pth')
#       torch.save(net.state_dict(), net_filename)
#       print('saved network weights', net_filename)


# # torch.save(net.state_dict(), f'{weights_path}/net_e{epoch}_r{input_dim}_r_{output_dim}_weights.pth')

In [None]:
# weights_path = f'{project_path}/weights/LRCN'
# os.makedirs(weights_path, exist_ok=True)
# file_net = f'{weights_path}/net_r{input_dim}_r{output_dim}_e{epoch}_weights.pth'
# torch.save(net.state_dict(), file_net)

In [None]:
# file_net = os.path.join(weights_path, 'net_r64_r128_e35_weights.pth')
# net.load_state_dict(torch.load(file_net))

In [None]:
# Visualisation for testing
input_data, target_data = next(iter(train_dataloader))
with torch.no_grad():
    output_data = net(input_data.to(device))
    
input_data = input_data.cpu().numpy()
output_data = (output_data.cpu().numpy() > 0.3).astype(int)

for i in range(10):
    input_sample = input_data[i][0]
    output_sample = output_data[i][0]
    output_sample = (output_sample > 0.5).astype(int)

    print('input', i)
    # ax = plt.figure().add_subplot(projection='3d')
    ax = plt.figure(figsize=(10, 10)).add_subplot(projection='3d')
    ax.voxels(input_sample)
    plt.show()

    print('output', i)
    # ax = plt.figure().add_subplot(projection='3d')
    ax = plt.figure(figsize=(10, 10)).add_subplot(projection='3d')
    ax.voxels(output_sample)
    plt.show()