In [None]:
# custom dataset that fetches image pairs from a given scene
# DEPRECATED, use h5Dataset for performance improvements
class ImagePairDataset(Dataset):
    def __init__(self, rgb_path, depth_path, transform=None):
        '''
        rgb_path: path to actual rgb photos (.../rgb)
        depth_path: path to actual depth photos (.../depth)
        transform: transforms to perform. at minimum, must convert to tensor
        '''
        # lists of full path to every image
        self.rgb_paths = [rgb_path + f for f in listdir(rgb_path)]
        self.depth_paths = [depth_path + f for f in listdir(depth_path)]

        # rgb vs depth sizes might mismatch, so take min
        size = min(len(self.rgb_paths), len(self.depth_paths))

        # truncate the lists so every image has a pair, also discard last image
        # because it may be corrupted
        self.rgb_paths = self.rgb_paths[:size-2]
        self.depth_paths = self.depth_paths[:size-2]

        self.transform = transform
        
    def __getitem__(self, index):

        #rgb = np.moveaxis(np.array(Image.open(self.rgb_paths[index])), 0, -1)

        # open images and convert to numpy, scale depth down
        rgb = np.array(Image.open(self.rgb_paths[index]))
        depth = np.array(Image.open(self.depth_paths[index])) / 65536

        # try to circumvent some strange size errors with a few pictures,
        # if the occur just return previous index's picture pair
        try:
            while rgb.shape[2] != 3:
                index -= 1
                rgb = np.array(Image.open(self.rgb_paths[index]))
                depth = np.array(Image.open(self.depth_paths[index])) / 65536
        except:
            index -= 1
            rgb = np.array(Image.open(self.rgb_paths[index]))
            depth = np.array(Image.open(self.depth_paths[index])) / 65536

        if self.transform:
            rgb = self.transform(rgb)
            depth = self.transform(depth).squeeze()
            
        return rgb, depth
    
    def __len__(self):
        return min(len(self.rgb_paths), len(self.depth_paths))



class Baseline1(nn.Module):
    def __init__(self):
        super(Baseline1, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(3, 20, kernel_size=11, padding=5),
            nn.Conv2d(20, 40, kernel_size=11, padding=5),
            nn.Conv2d(40, 20, kernel_size=7, padding=3),
            nn.Conv2d(20, 1, kernel_size=7, padding=3)
        )

    def forward(self, x):
        return self.conv(x)

class Model1(nn.Module):
    def __init__(self):
        super(Model1, self).__init__()

        self.conv = nn.Sequential(
                                                                    #in: 640x480
            nn.Conv2d(3, 40, kernel_size=11, padding=5, stride=2), #out: 320x240
            nn.Conv2d(40, 80, kernel_size=7, padding=3, stride=2), #out: 160x120
            nn.Conv2d(80, 160, kernel_size=5, padding=2, stride=2), #out: 80x60
            nn.Conv2d(160, 320, kernel_size=3, padding=1, stride=2),#out: 40x30
        )

        self.trans_conv = nn.Sequential(
                                                                    #in: 40x30
            nn.ConvTranspose2d(320, 160, kernel_size=3, padding=1, 
                               stride=2, output_padding=1),        #out: 80x60
            nn.ConvTranspose2d(160, 80, kernel_size=5, padding=2, 
                               stride=2, output_padding=1),        #out: 160x120
            nn.ConvTranspose2d(80, 40, kernel_size=7, padding=3, 
                               stride=2, output_padding=1),        #out: 320x240
            nn.ConvTranspose2d(40, 1, kernel_size=11, padding=5, 
                               stride=2, output_padding=1),        #out: 640x480
        )

    def forward(self, x):
        return self.trans_conv(self.conv(x))

class Model2(nn.Module):
    def __init__(self):
        super(Model2, self).__init__()

        self.conv = nn.Sequential(
                                                                    #in: 640x480
            nn.Conv2d(3, 40, kernel_size=21, padding=10, stride=2), #out: 320x240
            nn.SELU(),
            nn.Conv2d(40, 80, kernel_size=11, padding=5, stride=2), #out: 160x120
            nn.SELU(),
            nn.Conv2d(80, 160, kernel_size=7, padding=3, stride=2), #out: 80x60
            nn.SELU(),
            nn.Conv2d(160, 320, kernel_size=3, padding=1, stride=2),#out: 40x30
        )

        self.trans_conv = nn.Sequential(
                                                                    #in: 40x30
            nn.ConvTranspose2d(320, 160, kernel_size=3, padding=1, 
                               stride=2, output_padding=1),        #out: 80x60
            nn.SELU(),
            nn.ConvTranspose2d(160, 80, kernel_size=7, padding=3, 
                               stride=2, output_padding=1),        #out: 160x120
            nn.SELU(),
            nn.ConvTranspose2d(80, 40, kernel_size=11, padding=5, 
                               stride=2, output_padding=1),        #out: 320x240
            nn.SELU(),
            nn.ConvTranspose2d(40, 1, kernel_size=21, padding=10, 
                               stride=2, output_padding=1),        #out: 640x480
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.trans_conv(self.conv(x))

class Model3(nn.Module):
    def __init__(self):
        super(Model3, self).__init__()

        self.conv = nn.Sequential(
                                                                      #in: 640x480
            nn.Conv2d(3, 100, kernel_size=5, padding=2, stride=2),   #out: 320x240
            nn.SELU(),
            nn.Conv2d(100, 200, kernel_size=3, padding=1, stride=2), #out: 160x120
            nn.SELU(),
            nn.Conv2d(200, 500, kernel_size=3, padding=1, stride=2), #out: 80x60
        )

        self.trans_conv = nn.Sequential(
                                                                    #in: 80x60
            nn.ConvTranspose2d(500, 200, kernel_size=3, padding=1, 
                               stride=2, output_padding=1),        #out: 160x120
            nn.SELU(),
            nn.ConvTranspose2d(200, 100, kernel_size=3, padding=1, 
                               stride=2, output_padding=1),        #out: 320x240
            nn.SELU(),
            nn.ConvTranspose2d(100, 1, kernel_size=5, padding=2, 
                               stride=2, output_padding=1),        #out: 640x480
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.trans_conv(self.conv(x))

class Model4(nn.Module):
    def __init__(self):
        super(Model4, self).__init__()

        self.conv = nn.Sequential(
                                                                    #in: 640x480
            nn.Conv2d(3, 40, kernel_size=51, padding=25, stride=2), #out: 320x240
            nn.SELU(),
            nn.Conv2d(40, 80, kernel_size=21, padding=10, stride=2), #out: 160x120
            nn.SELU(),
            nn.Conv2d(80, 160, kernel_size=11, padding=5, stride=2), #out: 80x60
            nn.SELU(),
            nn.Conv2d(160, 320, kernel_size=3, padding=1, stride=2),#out: 40x30
        )

        self.trans_conv = nn.Sequential(
                                                                    #in: 40x30
            nn.ConvTranspose2d(320, 160, kernel_size=3, padding=1, 
                               stride=2, output_padding=1),        #out: 80x60
            nn.SELU(),
            nn.ConvTranspose2d(160, 80, kernel_size=11, padding=5, 
                               stride=2, output_padding=1),        #out: 160x120
            nn.SELU(),
            nn.ConvTranspose2d(80, 40, kernel_size=21, padding=10, 
                               stride=2, output_padding=1),        #out: 320x240
            nn.SELU(),
            nn.ConvTranspose2d(40, 1, kernel_size=51, padding=25, 
                               stride=2, output_padding=1),        #out: 640x480
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.trans_conv(self.conv(x))

class Model5(nn.Module):
    def __init__(self):
        super(Model5, self).__init__()

        self.conv = nn.Sequential(
                                                                    #in: 640x480
            nn.BatchNorm2d(3),
            nn.Conv2d(3, 40, kernel_size=51, padding=25, stride=2), #out: 320x240
            nn.SELU(),
            nn.BatchNorm2d(40),
            nn.Conv2d(40, 80, kernel_size=21, padding=10, stride=2), #out: 160x120
            nn.SELU(),
            nn.BatchNorm2d(80),
            nn.Conv2d(80, 160, kernel_size=11, padding=5, stride=2), #out: 80x60
            nn.SELU(),
            nn.BatchNorm2d(160),
            nn.Conv2d(160, 320, kernel_size=3, padding=1, stride=2),#out: 40x30
            nn.BatchNorm2d(320)
        )

        self.trans_conv = nn.Sequential(
                                                                    #in: 40x30
            nn.ConvTranspose2d(320, 160, kernel_size=3, padding=1, 
                               stride=2, output_padding=1),        #out: 80x60
            nn.SELU(),
            nn.BatchNorm2d(160),
            nn.ConvTranspose2d(160, 80, kernel_size=11, padding=5, 
                               stride=2, output_padding=1),        #out: 160x120
            nn.SELU(),
            nn.BatchNorm2d(80),
            nn.ConvTranspose2d(80, 40, kernel_size=21, padding=10, 
                               stride=2, output_padding=1),        #out: 320x240
            nn.SELU(),
            nn.BatchNorm2d(40),
            nn.ConvTranspose2d(40, 1, kernel_size=51, padding=25, 
                               stride=2, output_padding=1),        #out: 640x480
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.trans_conv(self.conv(x))


# DEPRECATED
def load_dataset_older(root_path, batch_size=16, ratios=(.7,.15,.15)):
    sub_datasets = []
    for scene in listdir(root_path):
        sub_datasets.append(
            ImagePairDataset(
                rgb_path = root_path + scene + "/rgb/",
                depth_path = root_path + scene + "/depth/",
                transform = transforms.ToTensor()
            )
        )

    dataset = ConcatDataset(sub_datasets)
    num_samples = len(dataset)
    len1 = int(num_samples*ratios[0])
    len2 = int(num_samples*ratios[1])
    train_data, val_data, test_data = random_split(dataset,
                                    (len1, len2, num_samples-len1-len2))
    
    print(len(train_data), len(val_data), len(test_data))
    
    train_loader = DataLoader(train_data, batch_size=batch_size, 
                              shuffle=False, num_workers=0)
    val_loader = DataLoader(val_data, batch_size=batch_size, 
                              shuffle=False, num_workers=0)
    test_loader = DataLoader(test_data, batch_size=batch_size, 
                              shuffle=False, num_workers=0)

    return train_loader, val_loader, test_loader

# returns train, val, test loaders when you provide a path to the data
# DEPRECATED
def load_dataset_old(root_path, batch_size=16, num_folders=(6, 2, 2)):
    '''
    root_path: path to data folder, which must contain scene folders inside.
        for example, provide path to ".../Data" for this structure:
            /Data
                /computer_room
                    /rgb
                    /depth
                /dentist_office
                    /rgb
                    /depth
    batch_size: used by data loaders
    num_folders: of the x number of scene folders (we have 10 right now), 
        list how many folders should go to train, val, and test respectively
    '''

    train_sub_datasets = []
    val_sub_datasets = []
    test_sub_datasets = []

    for i, scene in enumerate(listdir(root_path)):

        if "." in scene:
            pass

        if i < num_folders[0]:
            train_sub_datasets.append(
                ImagePairDataset(
                    rgb_path = root_path + scene + "/rgb/",
                    depth_path = root_path + scene + "/depth/",
                    transform = transforms.ToTensor()
                )
            )
        
        elif i < num_folders[0] + num_folders[1]:
            val_sub_datasets.append(
                ImagePairDataset(
                    rgb_path = root_path + scene + "/rgb/",
                    depth_path = root_path + scene + "/depth/",
                    transform = transforms.ToTensor()
                )
            )

        else:
            test_sub_datasets.append(
                ImagePairDataset(
                    rgb_path = root_path + scene + "/rgb/",
                    depth_path = root_path + scene + "/depth/",
                    transform = transforms.ToTensor()
                )
            )
        
    train_data = ConcatDataset(train_sub_datasets)
    val_data = ConcatDataset(val_sub_datasets)
    test_data = ConcatDataset(test_sub_datasets)

    train_loader = DataLoader(train_data, batch_size=batch_size, 
                              shuffle=True, num_workers=0)
    val_loader = DataLoader(val_data, batch_size=batch_size, 
                              shuffle=True, num_workers=0)
    test_loader = DataLoader(test_data, batch_size=batch_size, 
                              shuffle=True, num_workers=0)
    
    return train_loader, val_loader, test_loader

def mem_train(model, batch_size=16, batch_multiplier = 1, learning_rate=0.01, 
              num_epochs=1, ratios=(.7, .15, .15)):

    plotlosses = PlotLosses()

    print("loading data...")
    train_loader, val_loader = load_dataset(
        file_path=darie_path,
        batch_size=batch_size,
        ratios=ratios
        )[:2]
    print("data loading complete")

    torch.manual_seed(69)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, \
                                        factor=0.5, patience=20, verbose=True)

    for epoch in range(num_epochs):

        for i, (rgb, depth) in enumerate(train_loader):

            if torch.cuda.is_available():
                rgb = rgb.cuda()
                depth = depth.cuda()
            
            if i % batch_multiplier == 0:
                optimizer.step()
                optimizer.zero_grad()

            pred = model(rgb.float())
            del rgb

            loss = torch.sqrt(criterion(depth.squeeze(), pred.squeeze())) \
                    / batch_multiplier
            del pred, depth

            gc.collect()

            val_loss = batch_loss(model, nn.MSELoss(), val_loader) \
                        / batch_multiplier
            scheduler.step(val_loss)
            loss.backward()

            plotlosses.update({
                'loss': loss.item() * batch_multiplier,
                'val_loss': val_loss * batch_multiplier
            })
            plotlosses.send()

            if torch.cuda.is_available():
                torch.cuda.synchronize()


# calculate loss for all validation data (without training network)
# DEPRECATED, use batch_loss()
def validation_loss(model, criterion, val_loader, batch_size):

    total_loss = 0.0

    for i, (rgb, depth) in enumerate(val_loader):

        print("\rValidation Batches: {}/{}".format(i+1, \
                    int(len(val_loader.dataset)/batch_size)+1), end="")

        if torch.cuda.is_available():
            rgb = rgb.cuda()
            depth = depth.cuda()
        
        pred = model(rgb)

        loss = torch.sqrt(criterion(depth, pred.squeeze()))
        #del pred
        total_loss += loss.item()
        #del loss
    
    return total_loss / len(val_loader.dataset)

In [None]:
# stack size of vars
local_vars = list(locals().items())
for var, obj in local_vars:
    print(var, getsizeof(obj))