In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
import torch.utils.data as data
import torchvision
from torch.autograd import Variable
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score
import pickle
from PIL import Image
from tensorboardX import SummaryWriter

In [2]:
# for CRNN
class Dataset_CRNN(data.Dataset):
    "Characterizes a dataset for PyTorch"
    def __init__(self, data_path, frame_length=10, transform=None):
        "Initialization"
        self.transform = transform
        #self.frames = frames
        self.folders = data_path
        self.frames = frame_length #For our case since we are computing 10 frames always

    def __len__(self):
        "Denotes the total number of samples"
        return len(os.listdir(self.folders))

    def read_images(self, data_path, use_transform):
        X = []
        file_name = ""
        for i in os.listdir(data_path):
            file_name = i
            image = Image.open(os.path.join(data_path,i))
            
            #print(image.shape)
            if use_transform is not None:
                image = use_transform(image)
                #print(image.size)
            image = torch.from_numpy(np.asarray(image))
            X.append(image)
        X = torch.stack(X, dim=0)

        return X, file_name

    def __getitem__(self, index):
        data_path = os.path.join(self.folders,os.listdir(self.folders)[index])
              
        # Load data
        X, file_name = self.read_images(data_path, self.transform)                     # (input) spatial images
        
        y = np.ones(self.frames)
        if 'real' in data_path:
            y = np.zeros(self.frames)
        #print("Folder is {}".format(data_path))
        #print(X.shape)
        return X, torch.from_numpy(y).type(torch.LongTensor)

In [3]:
TRANSFORM_IMG = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.CenterCrop(256),
    #transforms.ToTensor()
    #transforms.Normalize(mean=[0.4246, 0.4144, 0.4114],
                         #std=[0.2265, 0.2208, 0.2215])
    ])

In [4]:
train_path = '/home/chinmay/datatset/deepfake_split/train'
train_data = Dataset_CRNN(train_path, transform=TRANSFORM_IMG, frame_length=30 )
# for step, (x, y) in enumerate(data):
#     print(x.shape)
val_path = '/home/chinmay/datatset/deepfake_split/val'
val_data = Dataset_CRNN(val_path, transform=TRANSFORM_IMG, frame_length=30)

In [5]:
batch_size = 32
epochs = 100
log_interval = 20
learning_rate = 1e-4

In [6]:
# use same encoder CNN saved!
#CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 768
# We can change this to 2 dimensions since that is the output of MesoNet
#CNN_embed_dim = 2   # latent dim extracted by 2D CNN 

res_size = 224        # ResNet image size
dropout_p = 0.6       # dropout probability being high as it seems to overfit

# use same decoder RNN saved!
RNN_hidden_layers = 2
RNN_hidden_nodes = 64
RNN_FC_dim = 16

In [7]:
def new_output_shape(num_of_maxpool_2,shape):
    return int(shape/(2**num_of_maxpool_2))

In [8]:
class Meso4_modified(nn.Module):
    def __init__(self,in_channel=3, img_shape = 256, number_of_classes=1):
        super(Meso4_modified,self).__init__()
        self.conv1 = nn.Conv2d(in_channel,8, kernel_size=(3,3), stride = 1, padding= 1)
        self.batch_norm_1 = nn.BatchNorm2d(8)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(8,8, kernel_size=(5,5),stride=1, padding=2)
        self.batch_norm_2 = nn.BatchNorm2d(8)
        self.conv3 = nn.Conv2d(8,16, kernel_size=(5,5),stride=1, padding=2)
        self.batch_norm_3 = nn.BatchNorm2d(16)
        self.conv4 = nn.Conv2d(16,16, kernel_size=(5,5),stride=1, padding=2)
        self.batch_norm_4 = nn.BatchNorm2d(16)
        self.max_pool_2 = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        self.max_pool_4 = nn.MaxPool2d(kernel_size=(4, 4), stride=4)
        out_shape = new_output_shape(num_of_maxpool_2=4, shape = img_shape)
        self.fc_conv = nn.Conv2d(16,number_of_classes,kernel_size=(out_shape,out_shape))
        self.dropout = nn.Dropout2d(p=0.4) #This is more than usual as it seems overfitting
        
    def forward(self,x_3d):
        cnn_embed_seq = []
        x_3d = x_3d.permute(0,1,4,2,3)# Required to match shapes
        x_3d = x_3d.type(torch.cuda.FloatTensor) #Converting to Float Tensor from Byte Tensor
        for t in range(x_3d.size(1)):
            x = self.conv1(x_3d[:, t, :, :, :])
            x = self.batch_norm_1(x)
            x = self.relu(x)
            # x = self.dropout(x) Avoid in the first layer
            # Segment 2
            x = self.max_pool_2(x)
            x = self.conv2(x)
            x = self.batch_norm_2(x)
            x = self.relu(x)
            x = self.dropout(x)
            # Segment 3
            x = self.max_pool_2(x)
            x = self.conv3(x)
            x = self.batch_norm_3(x)
            x = self.relu(x)
            #x = self.dropout(x)
            # Segment 4
            x = self.max_pool_2(x)
            x = self.conv4(x)
            x = self.batch_norm_4(x)
            x = self.relu(x)
            x = self.dropout(x)
            # Going for the last layer
            x = self.max_pool_2(x)
            #print("current shape after maxpool {}".format(x.shape))
            # doing a max_pool of 4 to get most important features
            x = self.max_pool_4(x)
            # x = self.fc_conv(x) Not using the conv layer retaining spatial info
            #print("Shape of x is {}".format(x.shape))
            x = x.view(x.shape[0], -1)
            
            cnn_embed_seq.append(x)
        cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
        return cnn_embed_seq

In [9]:
# 2D CNN encoder using ResNet-152 pretrained
import torchvision.models as models
class ResCNNEncoder(nn.Module):
    def __init__(self):
        """Load the pretrained ResNet-152 and replace top fc layer."""
        super(ResCNNEncoder, self).__init__()

        # self.model = Meso4_modified()
        self.model = models.vgg11_bn(pretrained=True)
        
    def forward(self, x_3d):
        x_3d = x_3d.permute(0,1,4,2,3)# Required to match shapes
        x_3d = x_3d.type(torch.cuda.FloatTensor) #Converting to Float Tensor from Byte Tensor
        cnn_embed_seq = []
        with torch.no_grad():
            for t in range(x_3d.size(1)):
                x = self.model.features(x_3d[:, t, :, :, :])
                x = x.view(x.shape[0], -1)
                cnn_embed_seq.append(x)
            cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
            # print("shaps is {}".format(cnn_embed_seq.shape))
        return cnn_embed_seq


class DecoderRNN(nn.Module):
    def __init__(self, CNN_embed_dim=256, h_RNN_layers=3, h_RNN=256, h_FC_dim=128, drop_p=0.3, num_classes=2):
        super(DecoderRNN, self).__init__()

        self.RNN_input_size = CNN_embed_dim
        self.h_RNN_layers = h_RNN_layers   # RNN hidden layers
        self.h_RNN = h_RNN                 # RNN hidden nodes
        self.h_FC_dim = h_FC_dim
        self.drop_p = drop_p
        self.num_classes = num_classes

        self.LSTM = nn.LSTM(
            input_size=self.RNN_input_size,
            hidden_size=self.h_RNN,        
            num_layers=h_RNN_layers,       
            batch_first=True,       # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )

        self.fc1 = nn.Linear(self.h_RNN, self.h_FC_dim)
        self.fc2 = nn.Linear(self.h_FC_dim, self.num_classes)

    def forward(self, x_RNN):
        
        self.LSTM.flatten_parameters()
        RNN_out, (h_n, h_c) = self.LSTM(x_RNN, None)  
        """ h_n shape (n_layers, batch, hidden_size), h_c shape (n_layers, batch, hidden_size) """ 
        """ None represents zero initial hidden state. RNN_out has shape=(batch, time_step, output_size) """
        # print("shape is {}".format(RNN_out.shape))
        # FC layers
        x = self.fc1(RNN_out[:, -1, :])   # choose RNN_out at the last time step
        x = F.relu(x)
        x = F.dropout(x, p=self.drop_p) #, training=self.training
        x = self.fc2(x)
        #print("output shape is {}".format(x.shape))
        return x

## ---------------------- end of CRNN module ---------------------- ##

In [10]:
def find_median(numpy_array = []): #This is expected to take an array of array. So,
    #print("Input array is {}".format(numpy_array))
    output = []
    confidence_scores = []
    for array in numpy_array:
        counts = np.bincount(array)
        output.append(np.argmax(counts))
        # Let us compute the confidence of the scores
        # since frames are independent, our confidence is purely based on the number
        # of frames our model thinks is belonging to a specific category
        # the confidence of individual frame prediction is not taken into consideration
        # and this portion is debatable....
        frame_set_pred = np.sort(counts)[-1]
        confidence = frame_set_pred/sum(counts)
        confidence_scores.append(confidence)
    return torch.from_numpy(np.asarray(output)).type(torch.LongTensor), torch.from_numpy(np.asarray(confidence_scores)).type(torch.FloatTensor)

In [11]:
# Detect devices
use_cuda = torch.cuda.is_available()                   # check if GPU exists
device = torch.device("cuda" if use_cuda else "cpu")  
params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 4, 'pin_memory': True} if use_cuda else {}
train_loader = data.DataLoader(train_data, **params)
valid_loader = data.DataLoader(val_data, **params)

In [12]:
# reload CRNN model
cnn_encoder = ResCNNEncoder().to(device) #Since we have a GPU already
rnn_decoder = DecoderRNN(CNN_embed_dim=256, h_RNN_layers=RNN_hidden_layers, h_RNN=RNN_hidden_nodes, 
                         h_FC_dim=RNN_FC_dim, drop_p=dropout_p, num_classes=2).to(device)

In [13]:
# Combine all EncoderCNN + DecoderRNN parameters
crnn_params = list(cnn_encoder.model.parameters()) + list(rnn_decoder.parameters())

    
optimizer = torch.optim.Adam(crnn_params, lr=learning_rate)

In [14]:
writer_train = SummaryWriter('/home/chinmay/training-results/res_cnn_meso2/train')
writer_test = SummaryWriter('/home/chinmay/training-results/res_cnn_meso2/test')
save_model_path = "/home/chinmay/model_weights/res_cnn_meso2/"
def train(log_interval, model, device, train_loader, optimizer, epoch):
    cnn_encoder, rnn_decoder = model
    cnn_encoder.train() # Put the model in training mode
    rnn_decoder.train() # Put the model in training mode
    
    losses = []
    N_count = 0   # counting total trained sample in one epoch
    scores = []
    #single_iter_loss = []
    for batch_idx, (X, y) in enumerate(train_loader):
        # distribute data to device
        X, y = X.to(device), y.to(device)
        N_count += X.size(0)

        optimizer.zero_grad()
        output = rnn_decoder(cnn_encoder(X))    # output has dim = (batch, number of classes)
        #print(output.shape)
        y, _ = find_median(y) #This is necessary as now only single label output for entire frame
        y = y.to(device)
        #print(y)
        #print(y.shape)
        loss = F.cross_entropy(output, y)
        losses.append(loss.item())

        # to compute accuracy
        y_pred = torch.max(output, 1)[1]  # y_pred != output
        step_score = accuracy_score(y.cpu().data.squeeze().numpy(), y_pred.cpu().data.squeeze().numpy())
        scores.append(step_score)         # computed on CPU

        loss.backward()
        optimizer.step()
        
                 
        if (batch_idx + 1) % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accu: {:.2f}%'.format(
                epoch + 1, N_count, len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), loss.item(), 100 * step_score))
         
    return np.mean(losses), np.mean(scores)

In [15]:
def validation(model, device, optimizer, test_loader):
    # set model as testing mode
    cnn_encoder, rnn_decoder = model
    cnn_encoder.eval()
    rnn_decoder.eval()
    
    test_loss = 0
    all_y = []
    all_y_pred = []
    test_loss = []
    with torch.no_grad():
        for X, y  in test_loader:
            # distribute data to device
            X, y = X.to(device), y.to(device)

            output = rnn_decoder(cnn_encoder(X))
            y, _ = find_median(y) #This is necessary as now only single label output for entire frame
            y = y.to(device)
            loss = F.cross_entropy(output, y)
            test_loss.append(loss.item())                 # sum up batch loss
            y_pred = output.max(1, keepdim=True)[1]  # (y_pred != output) get the index of the max log-probability
            
            # collect all y and y_pred in all batches
            all_y.extend(y)
            all_y_pred.extend(y_pred)

    # No need for this line. Our dataloader is a bit different. Simply use append idea
    #test_loss /= len(test_loader.dataset)
    test_loss = np.mean(test_loss)
    # compute accuracy
    all_y = torch.stack(all_y, dim=0)
    all_y_pred = torch.stack(all_y_pred, dim=0)
    test_score = accuracy_score(all_y.cpu().data.squeeze().numpy(), all_y_pred.cpu().data.squeeze().numpy())

    # show information
    print('\nTest set ({:d} samples): Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format(len(all_y), test_loss, 100* test_score))

    # save Pytorch models of best record
    torch.save(cnn_encoder.state_dict(), os.path.join(save_model_path, 'cnn_encoder_epoch{}.pth'.format(epoch + 1)))  # save spatial_encoder
    torch.save(rnn_decoder.state_dict(), os.path.join(save_model_path, 'rnn_decoder_epoch{}.pth'.format(epoch + 1)))  # save motion_encoder
    torch.save(optimizer.state_dict(), os.path.join(save_model_path, 'optimizer_epoch{}.pth'.format(epoch + 1)))      # save optimizer
    print("Epoch {} model saved!".format(epoch + 1))


    return test_loss, test_score

In [16]:
def adjust_learning_rate(optimizer, learning_rate, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 20 epochs"""
    lr = learning_rate * (0.1 ** (epoch // 15))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [17]:
epoch_train_losses = []
epoch_train_scores = []
epoch_test_losses = []
epoch_test_scores = []
for epoch in range(epochs):
    # train, test model
    train_losses, train_scores = train(log_interval, [cnn_encoder, rnn_decoder], device, train_loader, optimizer, epoch)
    epoch_test_loss, epoch_test_score = validation([cnn_encoder, rnn_decoder], device, optimizer, valid_loader)
    # Reduce learning-rate by a factor of 1/10 after every 10 epochs
    # avoid this step as Adam is being used
    #adjust_learning_rate(optimizer=optimizer, learning_rate=learning_rate, epoch=epoch)
    
    # save results
    writer_train.add_scalar('loss',train_losses,epoch+1)
    writer_train.add_scalar('score',train_scores,epoch+1)
    writer_test.add_scalar('loss',epoch_test_loss,epoch+1)
    writer_test.add_scalar('score',epoch_test_score,epoch+1)
    epoch_train_losses.append(train_losses)
    epoch_train_scores.append(train_scores)
    epoch_test_losses.append(epoch_test_loss)
    epoch_test_scores.append(epoch_test_score)
    #Empty the cache
    torch.cuda.empty_cache()



Test set (235 samples): Average loss: 0.6860, Accuracy: 60.43%

Epoch 1 model saved!

Test set (235 samples): Average loss: 0.6864, Accuracy: 60.00%

Epoch 2 model saved!

Test set (235 samples): Average loss: 0.6804, Accuracy: 60.00%

Epoch 3 model saved!

Test set (235 samples): Average loss: 0.6799, Accuracy: 60.00%

Epoch 4 model saved!

Test set (235 samples): Average loss: 0.6731, Accuracy: 60.00%

Epoch 5 model saved!

Test set (235 samples): Average loss: 0.6655, Accuracy: 60.00%

Epoch 6 model saved!

Test set (235 samples): Average loss: 0.6586, Accuracy: 60.00%

Epoch 7 model saved!

Test set (235 samples): Average loss: 0.6466, Accuracy: 60.00%

Epoch 8 model saved!

Test set (235 samples): Average loss: 0.6317, Accuracy: 62.98%

Epoch 9 model saved!

Test set (235 samples): Average loss: 0.6082, Accuracy: 73.19%

Epoch 10 model saved!

Test set (235 samples): Average loss: 0.5759, Accuracy: 73.19%

Epoch 11 model saved!

Test set (235 samples): Average loss: 0.5442, Accur

Process Process-240:
Traceback (most recent call last):
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/process.py", line 252, in _bootstrap
    self.run()
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/chinmay/anaconda3/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 96, in _worker_loop
    r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL)
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/queues.py", line 104, in get
    if timeout < 0 or not self._poll(timeout):
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/connection.py", line 257, in poll
    return self._poll(timeout)
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/connection.py", line 414, in _poll
    r = wait([self], timeout)
  File "/home/chinmay/anaconda3/lib/python3.5/multiprocessing/connection.py", line 911, in wait
    ready = selector.sel

Traceback (most recent call last):
  File "/home/chinmay/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-30adc2b4bdd6>", line 8, in <module>
    epoch_test_loss, epoch_test_score = validation([cnn_encoder, rnn_decoder], device, optimizer, valid_loader)
  File "<ipython-input-15-b5859fd28d85>", line 12, in validation
    for X, y  in test_loader:
  File "/home/chinmay/anaconda3/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 330, in __next__
    idx, batch = self._get_batch()
  File "/home/chinmay/anaconda3/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 309, in _get_batch
    return self.data_queue.get()
  File "/home/chinmay/anaconda3/lib/python3.5/queue.py", line 164, in get
    self.not_empty.wait()
  File "/home/chinmay/anaconda3/lib/python3.5/threading.py", line 293, in wait
    waiter.acquire()
KeyboardInterrupt

Durin

KeyboardInterrupt: 