In [1]:
!echo Installing Library to Display gifs:
!pip install moviepy
!echo Downloading pre-trained weights
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1mk9JdmJH79_vtQkl8zk-jDxa7xUXpck-' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1mk9JdmJH79_vtQkl8zk-jDxa7xUXpck-" -O state_normal81000.ckpt && rm -rf /tmp/cookies.txt

Installing Library to Display gifs:
Downloading pre-trained weights


'wget' is not recognized as an internal or external command,
operable program or batch file.


In [2]:
import torch
import torch.nn as nn
import numpy as np
from moviepy.editor import ImageSequenceClip
from IPython.display import Image

def genSamples(g,n=8):
    with torch.no_grad():
        s=g(torch.rand((n**2,100),device='cuda')*2-1).cpu().detach().numpy()
        
    out = np.zeros((3,16,64*n,64*n))
    
    for j in range(n):
        for k in range(n):
            out[:,:,64*j:64*(j+1),64*k:64*(k+1)] = s[j*n+k,:,:,:,:]
    
    out = out.transpose((1,2,3,0))
    out=(out+1)/2 *255
    out.astype(int)
    clip=ImageSequenceClip(list(out),fps=20)
    clip.write_gif('sample.gif',fps=20)
    Image(open('sample.gif', 'rb').read())

In [3]:
device='cuda'

In [4]:
class TemporalGenerator(nn.Module):
    def __init__(self):
        super().__init__()
        #generate 16 by 100 tensor, 16 represent the temporal dimension
        self.model=nn.Sequential(
        nn.ConvTranspose1d(100,512,kernel_size=1,stride=1,padding=0),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.ConvTranspose1d(512,256,kernel_size=4,stride=2,padding=1),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        nn.ConvTranspose1d(256,128,kernel_size=4,stride=2,padding=1),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.ConvTranspose1d(128,128,kernel_size=4,stride=2,padding=1),
        nn.BatchNorm1d(128),
        nn.ReLU(), 
        nn.ConvTranspose1d(128,100,kernel_size=4,stride=2,padding=1),
        nn.Tanh()
        )
        self.model.apply(self.init_weights)
        
    def init_weights(self,m):
        if type(m)==nn.ConvTranspose1d:
            nn.init.xavier_uniform_(m.weight,gain=2**0.5)
            
    def forward(self,x):
        x=x.view(-1,100,1)
        x=self.model(x).transpose(1,2)
        return x

In [5]:
class VideoGenerator(nn.Module):
    def __init__(self):
        super().__init__()
        self.temp=TemporalGenerator()
        
        self.fast= nn.Sequential(
        nn.Linear(100,256*4*4,bias=False),
        nn.BatchNorm1d(256*4*4),
        nn.ReLU()
        )
        
        self.slow =nn.Sequential(
        nn.Linear(100,256*4*4,bias=False),
        nn.BatchNorm1d(256*4*4),
        nn.ReLU()
        )
        
        self.model=nn.Sequential(
        nn.ConvTranspose2d(512,256,kernel_size=4,stride=2,padding=1,bias=False),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.ConvTranspose2d(256,128,kernel_size=4,stride=2,padding=1,bias=False),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.ConvTranspose2d(128,64,kernel_size=4,stride=2,padding=1,bias=False),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.ConvTranspose2d(64,32,kernel_size=4,stride=2,padding=1,bias=False),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.ConvTranspose2d(32,3,kernel_size=3,stride=1,padding=1),
        nn.Tanh()
        )
        
        self.fast.apply(self.init_weights)
        self.slow.apply(self.init_weights)
        self.model.apply(self.init_weights)
        
    def init_weights(self,m):
        if type(m)==nn.ConvTranspose2d or type(m)== nn.Linear:
            nn.init.uniform_(m.weight,a=-0.01,b=0.01)
    
    def forward(self,x):
        z_fast=self.temp(x).contiguous()
        z_fast=z_fast.view(-1,100)
        
        z_fast=self.fast(z_fast).view(-1,256,4,4)
        z_slow=self.slow(x).view(-1,256,4,4).unsqueeze(1)
        z_slow=torch.cat([z_slow]*16,dim=1).view(-1,256,4,4)
        
        z=torch.cat([z_slow,z_fast],dim=1)
        
        out =self.model(z)
        return out.view(-1,16,3,64,64).transpose(1,2)

In [6]:
class VideoDiscriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model3d=nn.Sequential(
        nn.Conv3d(3,64,kernel_size=4,padding=1,stride=2),
        nn.LeakyReLU(0.2),
        nn.Conv3d(64,128,kernel_size=4,padding=1,stride=2),
        nn.BatchNorm3d(128),
        nn.LeakyReLU(0.2),
        nn.Conv3d(128,256,kernel_size=4,padding=1,stride=2),
        nn.BatchNorm3d(256),
        nn.LeakyReLU(0.2),
        nn.Conv3d(256,512,kernel_size=4,padding=1,stride=2),
        nn.BatchNorm3d(512),
        nn.LeakyReLU(0.2)
        )
        self.conv2d =nn.Conv2d(512,1,kernel_size=4,stride=1,padding=0)
        
        self.model3d.apply(self.init_weights)
        self.init_weights(self.conv2d)
        
    def init_weights(self,m):
        if type(m)==nn.Conv3d or type(m)==nn.Conv2d:
            nn.init.xavier_normal_(m.weight,gain=2**0.5)
    
    def forward(self,x):
        h=self.model3d(x)
        h=torch.reshape(h,(-1,512,4,4))
        h=self.conv2d(h)
        
        return h
    

In [7]:
!pip install av



In [8]:
import torch

from torchvision import transforms
from torchvision.datasets import UCF101


In [9]:
ucf_data_dir = "P:\\dataset\\UCF-101\\sample"
ucf_label_dir = "P:\\dataset\\UCF-101\\sample"
frames_per_clip = 16
step_between_clips = 1
batch_size = 16

In [10]:
tfs = transforms.Compose([
            # TODO: this should be done by a video-level transfrom when PyTorch provides transforms.ToTensor() for video
            # scale in [0, 1] of type float
            transforms.Lambda(lambda x: x / 255.),
            # reshape into (C, T, H, W) 
            transforms.Lambda(lambda x: x.permute(3, 0, 1, 2)),
            # rescale to the most common size
            transforms.Lambda(lambda x: nn.functional.interpolate(x, (64, 64))),
])

In [11]:
def custom_collate(batch):
    filtered_batch = []
    for video, _, label in batch:
        if video.shape[1]!= 16:
            print("error in shape", video.shape)
            continue
        filtered_batch.append((video, label))
    return torch.utils.data.dataloader.default_collate(filtered_batch)

In [12]:
train_dataset = UCF101(ucf_data_dir, ucf_label_dir, frames_per_clip=frames_per_clip,
                       step_between_clips=step_between_clips, train=True, transform=tfs)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                           collate_fn=custom_collate)

  0%|          | 0/18 [00:00<?, ?it/s]

In [13]:
v,l=next(iter(train_loader))

In [14]:
criterion=nn.BCEWithLogitsLoss()
display_step=500

In [15]:
gen=VideoGenerator().to(device)
gen_opt=torch.optim.Adam(gen.parameters())
disc=VideoDiscriminator().to(device)
disc_opt=torch.optim.Adam(disc.parameters())

In [None]:
n_epochs=5
cur_step=0
mean_generator_loss=0
mean_discriminator_loss=0

for epoch in range(n_epochs):
    for i,(real,l) in enumerate(train_loader):
        
#         print(l)
        real=real.to(device)
        
        disc_opt.zero_grad()
        fake_noise=torch.rand((batch_size, 100), device='cuda')*2-1
        fake=gen(fake_noise)
#         print(fake.size)
        disc_fake_pred=disc(fake.detach())
        disc_fake_loss=criterion(disc_fake_pred,torch.zeros_like(disc_fake_pred))
        disc_real_pred=disc(real)
        disc_real_loss=criterion(disc_real_pred,torch.ones_like(disc_real_pred))
        disc_loss =(disc_fake_loss + disc_real_loss)/2
        
        
        mean_discriminator_loss+=disc_loss.item()/display_step
        disc_loss.backward(retain_graph=True)
        disc_opt.step()
        
        
        gen_opt.zero_grad()
        fake_noise_2=torch.rand((batch_size, 100), device='cuda')*2-1
        fake_2=gen(fake_noise_2)
        disc_fake_pred=disc(fake_2)
        gen_loss=criterion(disc_fake_pred,torch.ones_like(disc_fake_pred))
        gen_loss.backward()
        gen_opt.step()
        
        mean_generator_loss+=gen_loss.item()/display_step
        if cur_step % display_step == 0 and cur_step > 0:
            print(f"Step / Epoch : {cur_step} / {epoch} :Generator loss: {mean_generator_loss}, discriminator loss: {mean_discriminator_loss}")
#             show_tensor_images(fake)
#             show_tensor_images(real)
            genSamples(gen)
            mean_generator_loss = 0
            mean_discriminator_loss = 0
        cur_step += 1
    
        
        
        

Step / Epoch : 500 / 0 :Generator loss: 8.696979818344117, discriminator loss: 0.09445764821554803


t:  18%|████████████▎                                                         | 3/17 [00:00<00:00, 17.44it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 1000 / 0 :Generator loss: 8.251449469089513, discriminator loss: 0.09155838824692553


t:  12%|████████▏                                                             | 2/17 [00:00<00:00, 18.18it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 1500 / 0 :Generator loss: 8.282766531467452, discriminator loss: 0.08713800461659381


t:  12%|████████▏                                                             | 2/17 [00:00<00:00, 17.09it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 2000 / 0 :Generator loss: 8.502601240992549, discriminator loss: 0.08810883671340342


t:  12%|████████▏                                                             | 2/17 [00:00<00:00, 19.80it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 2500 / 0 :Generator loss: 8.506793653964994, discriminator loss: 0.07316077891332776


t:  12%|████████▏                                                             | 2/17 [00:00<00:00, 16.02it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 3000 / 1 :Generator loss: 8.444769371271141, discriminator loss: 0.0689439903603287


t:  12%|████████▏                                                             | 2/17 [00:00<00:01, 13.15it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 3500 / 1 :Generator loss: 8.727850784540177, discriminator loss: 0.08255410178151666


t:  18%|████████████▎                                                         | 3/17 [00:00<00:00, 17.14it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 4000 / 1 :Generator loss: 9.094063885807989, discriminator loss: 0.09024563953784395


t:  12%|████████▏                                                             | 2/17 [00:00<00:00, 17.24it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 4500 / 1 :Generator loss: 9.019547147631652, discriminator loss: 0.07799030894794674


t:   0%|                                                                              | 0/17 [00:00<?, ?it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 5000 / 1 :Generator loss: 7.700207958936685, discriminator loss: 0.056378637095482624


t:  12%|████████▏                                                             | 2/17 [00:00<00:00, 16.26it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 5500 / 1 :Generator loss: 8.778859921693803, discriminator loss: 0.06628730890257929


t:  18%|████████████▎                                                         | 3/17 [00:00<00:00, 16.66it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 6000 / 2 :Generator loss: 9.066775604724876, discriminator loss: 0.052326812163989454


t:  12%|████████▏                                                             | 2/17 [00:00<00:00, 17.56it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       

Step / Epoch : 6500 / 2 :Generator loss: 10.116045177936549, discriminator loss: 0.06814337020918632


t:  12%|████████▏                                                             | 2/17 [00:00<00:00, 18.01it/s, now=None]

MoviePy - Building file sample.gif with imageio.


                                                                                                                       