In [1]:
from __future__ import print_function
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from PIL import Image


# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

Random Seed:  999


<torch._C.Generator at 0x7f88941c8410>

In [2]:
# use argparse set all the parameter here
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--img_size", type=int, default=32, help="size of each image dimension")
parser.add_argument(
        '-f',
        '--file',
        help='Path for input file. First line should contain number of lines to search in'
    )
parser.add_argument("--n_epochs", type=int, default=200, help="number of epochs of training")
parser.add_argument("--batch_size", type=int, default=256, help="size of the batches")
parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
parser.add_argument("--latent_dim", type=int, default=100, help="dimensionality of the latent space")
parser.add_argument("--n_classes", type=int, default=29, help="number of classes for dataset")
parser.add_argument("--channels", type=int, default=3, help="number of image channels")
parser.add_argument("--sample_interval", type=int, default=400, help="interval between image sampling")
x = '--img_size 100'.split()
opt = parser.parse_args(" --img_size 64".split())
print(opt)

Namespace(b1=0.5, b2=0.999, batch_size=256, channels=3, file=None, img_size=64, latent_dim=100, lr=0.0002, n_classes=29, n_cpu=8, n_epochs=200, sample_interval=400)


In [3]:
test_prefix_path = './asl_alphabet_test/asl_alphabet_test/'
train_prefix_path = './asl_alphabet_train/asl_alphabet_train/'
_ = os.listdir(test_prefix_path)

test_path = [test_prefix_path + x for x in _]
#print(test_path[:3])

tmp = [train_prefix_path + x for x in os.listdir(train_prefix_path)]
from glob import glob
train_path = []
for i in tmp:
    # _ = os.listdir(i)
    # _ == ['./asl_alphabet_train/asl_alphabet_train/A/A1.jpg etc.', ... ]
    for j in glob(i + '/*.jpg'):
        train_path.append(j)
print(train_path[:20], len(train_path))

['./asl_alphabet_train/asl_alphabet_train/A/A1493.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1891.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1901.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1954.jpg', './asl_alphabet_train/asl_alphabet_train/A/A2243.jpg', './asl_alphabet_train/asl_alphabet_train/A/A2517.jpg', './asl_alphabet_train/asl_alphabet_train/A/A2625.jpg', './asl_alphabet_train/asl_alphabet_train/A/A2810.jpg', './asl_alphabet_train/asl_alphabet_train/A/A2897.jpg', './asl_alphabet_train/asl_alphabet_train/A/A2946.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1.jpg', './asl_alphabet_train/asl_alphabet_train/A/A10.jpg', './asl_alphabet_train/asl_alphabet_train/A/A100.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1000.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1001.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1002.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1003.jpg', './asl_alphabet_train/asl_alphabet_train/A/A1004.jpg', './asl_alphabet

In [4]:
print(train_path[0].split('/'))

['.', 'asl_alphabet_train', 'asl_alphabet_train', 'A', 'A1493.jpg']


In [5]:
class hand_dataset(object):
    #input the path to get the data
    def __init__(self, data_path, transforms):
        self.data_path = data_path
        self.transforms = transforms

    def __getitem__(self, idx):
        img_path = self.data_path[idx]
        img = Image.open(img_path)
        #get img
        #and get label from path
        label = img_path.split('/')[-2]
        #If we have transform, do it.
        #trans english label to number
        if label == 'space':
            label = 26
        elif label == 'del':
            label = 27
        elif label == 'nothing':
            label = 28
        else :
            label = ord(label)-65

        if transforms:
            img = self.transforms(img)

        return img,label
        
    def __len__(self):
        return len(self.data_path)

trans = transforms.Compose([transforms.Resize(opt.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])

hand_train = hand_dataset(train_path, trans)
hand_test = hand_dataset(test_path, trans)

In [6]:
hand_train.__getitem__(0)[0]

tensor([[[-0.7725, -0.5294, -0.4980,  ..., -0.7333, -0.6706, -0.7020],
         [-0.6000,  0.0118,  0.0510,  ..., -0.3020, -0.1765, -0.3569],
         [-0.5294,  0.0510, -0.0118,  ..., -0.0745, -0.1059, -0.3490],
         ...,
         [-0.2941,  0.6941,  0.6863,  ...,  0.3647,  0.4039,  0.0980],
         [-0.3098,  0.6706,  0.6627,  ...,  0.3647,  0.3804,  0.0824],
         [-0.4745,  0.2706,  0.2627,  ...,  0.0431,  0.0510, -0.1765]],

        [[-0.8510, -0.6078, -0.6000,  ..., -0.8196, -0.7333, -0.7490],
         [-0.6471, -0.2157, -0.1686,  ..., -0.4431, -0.2471, -0.3882],
         [-0.6314, -0.1765, -0.2314,  ..., -0.2314, -0.2157, -0.4118],
         ...,
         [-0.2706,  0.7412,  0.7490,  ...,  0.3098,  0.3569,  0.0275],
         [-0.2706,  0.7490,  0.7647,  ...,  0.3176,  0.3647,  0.0196],
         [-0.4431,  0.3412,  0.3569,  ..., -0.0118,  0.0196, -0.2314]],

        [[ 0.6784,  0.3412,  0.3333,  ...,  0.1843,  0.2627,  0.5137],
         [ 0.3098, -0.3961, -0.4039,  ..., -0

In [7]:
(hand_train.__getitem__(0)[1])

0

In [8]:
test_data = torch.utils.data.DataLoader(
 hand_test, batch_size=2, shuffle=True, num_workers=0)
train_data = torch.utils.data.DataLoader(
 hand_train, batch_size=opt.batch_size, shuffle=True, num_workers=0)

In [9]:
def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)

In [10]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        self.label_emb = nn.Embedding(opt.n_classes, opt.latent_dim)

        self.init_size = opt.img_size // 4  # Initial size before upsampling
        self.l1 = nn.Sequential(nn.Linear(opt.latent_dim, 128 * self.init_size ** 2))

        self.conv_blocks = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 128, 3, stride=1, padding=1),
            nn.BatchNorm2d(128, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 64, 3, stride=1, padding=1),
            nn.BatchNorm2d(64, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, opt.channels, 3, stride=1, padding=1),
            nn.Tanh(),
        )

    def forward(self, noise, labels):
        gen_input = torch.mul(self.label_emb(labels), noise)
        out = self.l1(gen_input)
        out = out.view(out.shape[0], 128, self.init_size, self.init_size)
        img = self.conv_blocks(out)
        return img


In [11]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        def discriminator_block(in_filters, out_filters, bn=True):
            """Returns layers of each discriminator block"""
            block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
            if bn:
                block.append(nn.BatchNorm2d(out_filters, 0.8))
            return block

        self.conv_blocks = nn.Sequential(
            *discriminator_block(opt.channels, 16, bn=False),
            *discriminator_block(16, 32),
            *discriminator_block(32, 64),
            *discriminator_block(64, 128),
        )

        # The height and width of downsampled image
        ds_size = opt.img_size // 2 ** 4

        # Output layers
        self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())
        self.aux_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, opt.n_classes), nn.Softmax())

    def forward(self, img):
        out = self.conv_blocks(img)
        out = out.view(out.shape[0], -1)
        validity = self.adv_layer(out)
        label = self.aux_layer(out)

        return validity, label

In [12]:
# Loss functions
adversarial_loss = torch.nn.BCELoss()
auxiliary_loss = torch.nn.CrossEntropyLoss()

# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()


In [13]:
cuda = True if torch.cuda.is_available() else False

if cuda:
    generator.cuda()
    discriminator.cuda()
    adversarial_loss.cuda()
    auxiliary_loss.cuda()

In [14]:
# Initialize weights
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)

Discriminator(
  (conv_blocks): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Dropout2d(p=0.25, inplace=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Dropout2d(p=0.25, inplace=False)
    (6): BatchNorm2d(32, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
    (7): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): LeakyReLU(negative_slope=0.2, inplace=True)
    (9): Dropout2d(p=0.25, inplace=False)
    (10): BatchNorm2d(64, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
    (11): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (12): LeakyReLU(negative_slope=0.2, inplace=True)
    (13): Dropout2d(p=0.25, inplace=False)
    (14): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)
  )
  (ad

In [15]:
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))

FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if cuda else torch.LongTensor


In [16]:
def sample_image(n_row, batches_done):
    """Saves a grid of generated digits ranging from 0 to n_classes"""
    # Sample noise
    z = Variable(FloatTensor(np.random.normal(0, 1, (n_row ** 2, opt.latent_dim))))
    # Get labels ranging from 0 to n_classes for n rows
    labels = np.array([num for _ in range(n_row) for num in range(n_row)])
    labels = Variable(LongTensor(labels))
    gen_imgs = generator(z, labels)
    save_image(gen_imgs.data, "images/%d.png" % batches_done, nrow=n_row, normalize=True)

In [None]:
from torch.autograd import Variable
from torchvision.utils import save_image

for epoch in range(opt.n_epochs):
    for i, (imgs, labels) in enumerate(train_data):

        batch_size = imgs.shape[0]

        # Adversarial ground truths
        valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False)
        fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False)

        # Configure input
        real_imgs = Variable(imgs.type(FloatTensor))
        labels = Variable(labels.type(LongTensor))

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        # Sample noise and labels as generator input
        z = Variable(FloatTensor(np.random.normal(0, 1, (batch_size, opt.latent_dim))))
        gen_labels = Variable(LongTensor(np.random.randint(0, opt.n_classes, batch_size)))

        # Generate a batch of images
        gen_imgs = generator(z, gen_labels)

        # Loss measures generator's ability to fool the discriminator
        validity, pred_label = discriminator(gen_imgs)
        g_loss = 0.5 * (adversarial_loss(validity, valid) + auxiliary_loss(pred_label, gen_labels))

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()

        # Loss for real images
        real_pred, real_aux = discriminator(real_imgs)
        d_real_loss = (adversarial_loss(real_pred, valid) + auxiliary_loss(real_aux, labels)) / 2

        # Loss for fake images
        fake_pred, fake_aux = discriminator(gen_imgs.detach())
        d_fake_loss = (adversarial_loss(fake_pred, fake) + auxiliary_loss(fake_aux, gen_labels)) / 2

        # Total discriminator loss
        d_loss = (d_real_loss + d_fake_loss) / 2

        # Calculate discriminator accuracy
        pred = np.concatenate([real_aux.data.cpu().numpy(), fake_aux.data.cpu().numpy()], axis=0)
        gt = np.concatenate([labels.data.cpu().numpy(), gen_labels.data.cpu().numpy()], axis=0)
        d_acc = np.mean(np.argmax(pred, axis=1) == gt)

        d_loss.backward()
        optimizer_D.step()

        if i%20 == 0:
            print(
            "[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %d%%] [G loss: %f]"
            % (epoch, opt.n_epochs, i, len(train_data), d_loss.item(), 100 * d_acc, g_loss.item())
        )
        batches_done = epoch * len(train_data) + i
        if batches_done % opt.sample_interval == 0:
            sample_image(n_row=10, batches_done=batches_done)

  input = module(input)


[Epoch 0/200] [Batch 0/340] [D loss: 2.030270, acc: 2%] [G loss: 2.026789]
[Epoch 0/200] [Batch 20/340] [D loss: 2.029470, acc: 4%] [G loss: 1.990261]
[Epoch 0/200] [Batch 40/340] [D loss: 2.029001, acc: 3%] [G loss: 2.048695]
[Epoch 0/200] [Batch 60/340] [D loss: 2.026309, acc: 5%] [G loss: 2.019625]
[Epoch 0/200] [Batch 80/340] [D loss: 2.021957, acc: 4%] [G loss: 2.055563]
[Epoch 0/200] [Batch 100/340] [D loss: 2.018302, acc: 9%] [G loss: 2.001978]
[Epoch 0/200] [Batch 120/340] [D loss: 2.021887, acc: 7%] [G loss: 2.021203]
[Epoch 0/200] [Batch 140/340] [D loss: 2.018646, acc: 9%] [G loss: 2.033036]
[Epoch 0/200] [Batch 160/340] [D loss: 2.000196, acc: 6%] [G loss: 2.052599]
[Epoch 0/200] [Batch 180/340] [D loss: 1.989007, acc: 8%] [G loss: 2.064776]
[Epoch 0/200] [Batch 200/340] [D loss: 1.991482, acc: 8%] [G loss: 2.036966]
[Epoch 0/200] [Batch 220/340] [D loss: 1.992297, acc: 8%] [G loss: 2.044227]
[Epoch 0/200] [Batch 240/340] [D loss: 1.981819, acc: 11%] [G loss: 2.086098]
[Epo

In [None]:
with open()