In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

In [3]:
trainset = torchvision.datasets.CIFAR10(root='/Users/cabe0006/Projects/monash/data/cifar', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

Files already downloaded and verified


In [4]:
i, l = next(iter(trainloader))

In [5]:
i.shape

torch.Size([4, 3, 32, 32])

In [9]:
torch.max(i)

tensor(0.9765)

In [12]:
torch.min(i/2 + 0.5)

tensor(0.)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
# print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))

In [None]:
from models_vae.encoder import build_encoder
from models_vae.decoder import build_decoder
import torch
from torchsummary import summary


In [None]:
encoder = build_encoder(args)
encoder(torch.rand(1, 3, 32, 32)).shape

In [None]:
# decoder = build_decoder()
# decoder(torch.rand(1, 2048, 1, 1)).shape

In [None]:
from models_vae.vae import build

In [None]:
vae = build({})[0]

In [None]:
vae(torch.rand(1, 3, 32, 32))[0].shape

In [None]:
from models_vae.vae_resnet import build
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter
import torch
from torchsummary import summary


In [None]:
class Args:
    backbone = 'resnet50'
    dilation = False
    
args=Args()

In [None]:
model = build(args)[0]

In [None]:
writer = SummaryWriter('runs/experiment_1')
writer.add_graph(model, torch.rand(2, 3, 542, 1024))
writer.close()

In [None]:
# summary(model, (3, 542, 1024))

In [None]:
from models_vae.encoder import build_encoder
from models_vae.decoder import build_decoder
import torch
from torchsummary import summary


In [None]:
encoder = build_encoder(args)
encoder(torch.rand(1, 3, 542, 1024))[0]

In [None]:
sum(p.numel() for p in encoder.parameters() if p.requires_grad)

In [None]:
decoder = build_decoder()

In [None]:
decoder(torch.rand(1, 2048, 17, 32)).shape

In [None]:
sum(p.numel() for p in decoder.parameters() if p.requires_grad)

In [None]:
from torchvision import models
from torchsummary import summary
# vgg = models.resnet50()
# summary(vgg, (3, 542, 1042))

"""
The following is an import of PyTorch libraries.
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image
import matplotlib.pyplot as plt
import numpy as np
import random
from tqdm import tqdm_notebook
from tqdm import trange
import util.misc as utils


In [None]:
input = torch.randn(1, 3, 512, 271)
print(input.size())
downsample = nn.Conv2d(3, 16, 3, stride=2, padding=1)
upsample = nn.ConvTranspose2d(16, 3, 3, stride=2, padding=1)
same = nn.Conv2d(16, 16, 3, padding=1)

h = downsample(input)
print(h.size())
s = same(h)
print(s.size())
output = upsample(s, output_size=input.size())
print(output.size())

In [None]:
input = torch.randn(1, 3, 512, 271)
same = nn.Conv2d(3, 3, 3, padding=1)
h = same(input)
print(h.size())

In [None]:
"""
Determine if any GPUs are available
"""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
"""
A Convolutional Variational Autoencoder
"""
class VAE(nn.Module):
    def __init__(self, imgChannels=3, featureDim=32*20*20, zDim=256):
        super(VAE, self).__init__()

        # Initializing the 2 convolutional layers and 2 full-connected layers for the encoder
#         self.encConv1 = nn.Conv2d(imgChannels, 16, 5)
#         self.encConv2 = nn.Conv2d(16, 32, 5)
#         self.pool = nn.AdaptiveMaxPool2d((20, 20))

#         self.encFC1 = nn.Linear(featureDim, zDim)
#         self.encFC2 = nn.Linear(featureDim, zDim)

#         # Initializing the fully-connected layer and 2 convolutional layers for decoder
#         self.decFC1 = nn.Linear(zDim, featureDim)
#         self.decConv1 = nn.ConvTranspose2d(32, 16, 5)
#         self.decConv2 = nn.ConvTranspose2d(16, imgChannels, 5)
        
        
        self.down1 = nn.Conv2d(imgChannels, 16, 3, stride=2, padding=1)
        self.down2 = nn.Conv2d(16, 16, 3, stride=2, padding=1)
        self.down3 = nn.Conv2d(16, 16, 3, stride=2, padding=1)
        
        self.same1 = nn.Conv2d(16, 16, 3, padding=1)
        self.same2 = nn.Conv2d(16, 16, 3, padding=1)
        
        self.up1 = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
        self.up2 = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
        self.up4 = nn.ConvTranspose2d(16, 32, 3, stride=2, padding=1)
        self.up3 = nn.ConvTranspose2d(32, imgChannels, 3, stride=2, padding=1)

    def encoder(self, x):

        # Input is fed into 2 convolutional layers sequentially
        # The output feature map are fed into 2 fully-connected layers to predict mean (mu) and variance (logVar)
        # Mu and logVar are used for generating middle representation z and KL divergence loss
        x = F.relu(self.down1(x))
        x = F.relu(self.down2(x))
        x = F.relu(self.down3(x))
        mu = self.same1(x)
        logVar = self.same2(x)
        return mu, logVar

    def reparameterize(self, mu, logVar):

        #Reparameterization takes in the input mu and logVar and sample the mu + std * eps
        std = torch.exp(logVar/2)
        eps = torch.randn_like(std)
        return mu + std * eps

    def decoder(self, z):

        # z is fed back into a fully-connected layers and then into two transpose convolutional layers
        # The generated output is the same size of the original input
        x = F.relu(self.up1(z, output_size=(136, 256)))
#         x = F.relu(self.up2(z))
        x = F.relu(self.up4(x, output_size=(271, 512)))
        x = torch.sigmoid(self.up3(x, output_size=(542, 1024)))
        return x

    def forward(self, x):

        # The entire pipeline of the VAE: encoder -> reparameterization -> decoder
        # output, mu, and logVar are returned for loss computation
        mu, logVar = self.encoder(x)
        z = self.reparameterize(mu, logVar)
        print("********")
        print(z.shape)
        out = self.decoder(z)
        return  mu, logVar, z

In [None]:
net = VAE().to(device)
summary(net, (3, 542, 1024))


In [None]:
from datasets import build_dataset, get_coco_api_from_dataset
import matplotlib.pyplot as plt
%matplotlib inline
from torch.utils.data import DataLoader, DistributedSampler


In [None]:
class Args:
    
    dataset_file = 'vae_ant'
    data_path = '/Users/cabe0006/Projects/monash/cvpr_data/detection_dataset/local_env'
    masks=False
    
args=Args()

In [None]:
dataset_train = build_dataset(image_set='test', args=args)
# img1 = dataset_train[100][0].numpy()
# img = np.moveaxis(img1, [0, 1 ,2 ], [2, 0, 1])
# plt.imshow(img)

In [None]:
sampler_train = torch.utils.data.RandomSampler(dataset_train)

In [None]:
batch_sampler_train = torch.utils.data.BatchSampler(
        sampler_train, 8, drop_last=True)

In [None]:
data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                                   collate_fn=utils.collate_fn, num_workers=0)

In [None]:
a = next(iter(data_loader_train))

In [None]:
a[0].tensors.shape

# Dataset testing

In [None]:
import torch
import util.misc as utils
from datasets import build_dataset
from torch.utils.data import DataLoader, DistributedSampler
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from torchvision import transforms


In [None]:
class Args:
    
    dataset_file = 'vae_ant'
    data_path = '/Users/cabe0006/Projects/monash/cvpr_data/detection_dataset/local_env'
    masks=False
    
args=Args()
invTrans = transforms.Compose([ transforms.Normalize(mean = [ 0., 0., 0. ],
                                                     std = [ 1/0.229, 1/0.224, 1/0.225 ]),
                                transforms.Normalize(mean = [ -0.485, -0.456, -0.406 ],
                                                     std = [ 1., 1., 1. ]),
                               ])

In [None]:
dataset_train = build_dataset(image_set='train', args=args)
sampler_train = torch.utils.data.RandomSampler(dataset_train)
batch_sampler_train = torch.utils.data.BatchSampler(
        sampler_train, 2, drop_last=True)
data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                                    num_workers=0)

In [None]:
imgs = next(iter(data_loader_train))


In [None]:
torch.max(imgs)

In [None]:
imgs = next(iter(data_loader_train))
inv_img = invTrans(imgs[0])
s = inv_img.numpy()
s = np.moveaxis(s, [0, 1, 2], [2, 0, 1])
print(s.shape)

plt.imshow(s)

# VAE Testing

In [None]:
import torch
q = torch.distributions.Normal(2, 4)
z = q.rsample()
print(z)

In [None]:
p = torch.distributions.Normal(0, 1)
log_pz = p.log_prob(z)
log_qzx = q.log_prob(z)
print(f'log prob pz: {log_pz}, prob: {torch.exp(log_pz)}')
print(f'log prob pz: {log_qzx}, prob: {torch.exp(log_qzx)}')

# ResNet model

In [None]:
import pytorch_lightning as pl
from torch import nn
import torch
from pl_bolts.models.autoencoders.components import (
    resnet18_decoder,
    resnet18_encoder,
)



In [None]:
class VAE(nn.Module):
    def __init__(self,  enc_out_dim=512, latent_dim=256, input_height=542):
        super(VAE, self).__init__()
        self.encoder = resnet18_encoder(False, False)
        self.decoder = resnet18_decoder(
            latent_dim=latent_dim,
            input_height=input_height,
            first_conv=False,
            maxpool1=False
        )
        self.fc_mu = nn.Linear(enc_out_dim, latent_dim)
        self.fc_var = nn.Linear(enc_out_dim, latent_dim)
#         self.decoder = resnet18_decoder(False, False)
        
    def encoder(self, x):
        # Input is fed into 2 convolutional layers sequentially
        # The output feature map are fed into 2 fully-connected layers to predict mean (mu) and variance (logVar)
        # Mu and logVar are used for generating middle representation z and KL divergence loss
        x_encoded = self.encoder(x)
        mu, log_var = self.fc_mu(x_encoded), self.fc_var(x_encoded)
        return mu, log_var

    def reparameterize(self, mu, logVar):
        # Reparameterization takes in the input mu and logVar and sample the mu + std * eps
        std = torch.exp(logVar / 2)
        eps = torch.randn_like(std)
        return mu + std * eps

    def decoder(self, z):
        # z is fed back into a fully-connected layers and then into two transpose convolutional layers
        # The generated output is the same size of the original input
        x = self.decoder(z)
        return x
    
    def forward(self, x):
        # The entire pipeline of the VAE: encoder -> reparameterization -> decoder
        # output, mu, and logVar are returned for loss computation
        mu, logVar = self.encoder(x)
        z = self.reparameterize(mu, logVar)
        out = self.decoder(z)
        return out, mu, logVar

In [None]:
from torchsummary import summary
# vgg = models.resnet50()
# summary(vgg, (3, 542, 1042))
vae = VAE()


In [None]:
# summary(vae, (3, 28, 28))

In [None]:
encoder = resnet18_encoder(False, False)

In [None]:
inp = torch.rand(1, 3, 28, 28)

In [None]:
encoder(inp).shape