In [1]:
from __future__ import print_function 
from __future__ import division

FASTPART=False
if FASTPART:
    num_frames = 4
else:
    num_frames = 16
    
is_alchemy_used = True
from datetime import datetime
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from skimage import io, transform
import torch
from torch.utils import data
from torch.utils.data import DataLoader, SubsetRandomSampler,Dataset
from random import randint
from tqdm import tqdm
from PIL import Image
from random import shuffle
if is_alchemy_used:
    from catalyst.dl import SupervisedAlchemyRunner as SupervisedRunner
else:
    from catalyst.dl import SupervisedRunner

import random
from scipy import ndimage
import torch.nn as nn
import torch.nn.functional as F
from network.models import model_selection
import math

import cv2
from albumentations import Compose, RandomCrop, Normalize, HorizontalFlip, Resize, RandomResizedCrop, CenterCrop,PadIfNeeded
from albumentations.pytorch import ToTensor
from alchemy import Logger
token = "d1dd16f08d518293bcbeddd313b49aa4"

print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

def seed_everything(seed=12345):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
# seed_everything()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


PyTorch Version:  1.3.1
Torchvision Version:  0.4.2


In [2]:
from typing import Callable, List, Tuple 

import os
import torch
import catalyst

from catalyst.dl import utils

print(f"torch: {torch.__version__}, catalyst: {catalyst.__version__}")

# os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # "" - CPU, "0" - 1 GPU, "0,1" - MultiGPU

SEED = 42
utils.set_global_seed(SEED)
utils.prepare_cudnn(deterministic=True)

torch: 1.3.1, catalyst: 20.02.3


In [3]:
BASE_DIR = f'/home/{os.environ["USER"]}/projects/dfdc'
DATA_DIR = os.path.join(BASE_DIR, 'data/dfdc-videos')
HDF5_DIR = f'/home/{os.environ["USER"]}/projects/dfdc/data/dfdc-crops/hdf5'
IMG_DIR = f'/home/{os.environ["USER"]}/projects/dfdc/data/dfdc-crops/webp'

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "resnet"

# Number of classes in the dataset
num_classes = 2

# Batch size for training (change depending on how much memory you have)
batch_size = 24#24#32

# Number of epochs to train for 
num_epochs = 10

# Flag for feature extracting. When False, we finetune the whole model, 
#   when True we only update the reshaped layer params
feature_extract = False

In [4]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes) 
        input_size = 224

    elif model_name == "inception":
        """ Inception v3 
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()
    
    return model_ft, input_size

def my_initialize_model(file_checkpoint, model_name, feature_extract, emb_len):
    

    model, input_size = initialize_model(model_name, 2, feature_extract, use_pretrained=True)
#     model = model.to(device)
    if file_checkpoint != None:
        print(f'Loading checkpoint {file_checkpoint}')
        checkpoint = torch.load(file_checkpoint)#, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
    
    _ = model.eval()
    
    if file_checkpoint != None:
        del checkpoint

#     emb_len = 128
    if emb_len > 2:
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, emb_len)
    return model, input_size

In [5]:
# model, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

In [6]:
def create_filter(kernel_size = 7, sigma = 3, channels = 3):
    # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2)
    x_cord = torch.arange(kernel_size)
    x_grid = x_cord.repeat(kernel_size).view(kernel_size, kernel_size)
    y_grid = x_grid.t()
    xy_grid = torch.stack([x_grid, y_grid], dim=-1)

    mean = (kernel_size - 1)/2.
    variance = sigma**2.

    # Calculate the 2-dimensional gaussian kernel which is
    # the product of two gaussian distributions for two different
    # variables (in this case called x and y)
    gaussian_kernel = (1./(2.*math.pi*variance)) *\
                      torch.exp(
                          -torch.sum((xy_grid - mean)**2., dim=-1) /\
                          (2*variance)
                      )

    # Make sure sum of values in gaussian kernel equals 1.
    gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel)

    # Reshape to 2d depthwise convolutional weight
    gaussian_kernel = gaussian_kernel.view(1, 1, kernel_size, kernel_size)
    gaussian_kernel = gaussian_kernel.repeat(channels, 1, 1, 1)

    gaussian_filter = nn.Conv2d(in_channels=channels, out_channels=channels,
                                kernel_size=kernel_size, groups=channels, bias=False
                                , padding=(int(kernel_size/2),int(kernel_size/2))
                               )

    gaussian_filter.weight.data = gaussian_kernel
    gaussian_filter.weight.requires_grad = False
    return gaussian_filter


In [7]:
def k_to_ij(num_frames, k):
    num_in_row = int(np.sqrt(num_frames))
    q = 0
    for i in range(num_in_row):
        for j in range(num_in_row):
            if q == k:
                return (i, j)
            q += 1
def ij_to_k(num_frames, i_in, j_in):
    num_in_row = int(np.sqrt(num_frames))
    q = 0
    for i in range(num_in_row):
        for j in range(num_in_row):
            if i == i_in and j == j_in:
                return q
            q += 1
    

In [8]:


class NetLstm(nn.Module):
    def __init__(self, checkpoint_file, model_name, emb_len, hidden_dim):
        super(NetLstm, self).__init__()
        self.backbone, self.input_size = my_initialize_model(checkpoint_file, model_name, False, emb_len)
        self.lstm = nn.LSTM(emb_len, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, 2)
#         self.out2tag = nn.Linear(self.input_size, 2)
        self.filter = create_filter(kernel_size = 7, sigma = 3, channels = 3)

    def forward(self, sentences):
        self.lstm.flatten_parameters()
        
        tag_scores_list = torch.zeros((sentences.shape[0], 2), dtype=torch.float32 ).cuda()
#         print(tag_scores_list.shape)
        for i, sentence in enumerate(sentences):   
  
            sentence = sentence.permute(0, 3, 1, 2)
            embeds = self.backbone(sentence - self.filter(sentence))
#             print(embeds.shape)
            lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
#             print(lstm_out.shape)
            tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))     
#             print(tag_space.shape)
            tag_scores_list[i] = tag_space[-1,:]
        return tag_scores_list
    
class NetRes(nn.Module):
    def __init__(self, checkpoint_file, model_name):
        super(NetRes, self).__init__()
        self.backbone, self.input_size = my_initialize_model(checkpoint_file, model_name, False, 2)
 
        self.filter = create_filter(kernel_size = 7, sigma = 3, channels = 3)
        self.norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        

    def forward(self, sentences):
        
        tag_scores_list = torch.zeros((sentences.shape[0], 2), dtype=torch.float32 ).cuda()
#         print(tag_scores_list.shape)
        for i, sentence in enumerate(sentences):  
                    
            sentence = sentence.permute(0, 3, 1, 2)
            sentence = sentence - self.filter(sentence)
            for j in range(sentence.shape[0]):
                sentence[j] = self.norm(sentence[j])
            embeds = self.backbone(sentence)
#             print(embeds.shape)
            tag_scores_list[i] = embeds.mean(axis=0)
        return tag_scores_list

class NetResThr(nn.Module):
    def __init__(self, checkpoint_file, model_name, emb_len, num_frames=4 ):
        super(NetResThr, self).__init__()
        self.backbone, self.input_size = my_initialize_model(checkpoint_file, model_name, False, emb_len)
        self.emb_len = emb_len
        self.num_frames = num_frames
        self.img_in_row = int(np.sqrt(self.num_frames))
        self.sz_in_row = int(self.input_size/self.img_in_row)
#         self.filter = create_filter(kernel_size = 7, sigma = 3, channels = 3)
        self.norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        self.fc =   nn.Linear(self.num_frames * emb_len, 2)

    def forward(self, sentences):
        
        tag_scores_list = torch.zeros((sentences.shape[0], 2), dtype=torch.float32 ).cuda()
        
        for k, sentence in enumerate(sentences): 
            
            sentence = sentence.permute(0, 3, 1, 2)
            sentence_generated = torch.zeros(sentence.shape, dtype=torch.float32).cuda()

            for frame_out in range(num_frames):
                for frame_in in range(num_frames):
                    for pt_out in range(num_frames):            

                        i_in, j_in = k_to_ij(self.num_frames, frame_out)
                        i_out, j_out = k_to_ij(self.num_frames, pt_out)
                        sentence_generated[frame_out,:,i_out*self.sz_in_row:(i_out+1)*self.sz_in_row, j_out*self.sz_in_row:(j_out+1)*self.sz_in_row] = \
                            sentence[frame_in, :, i_in*self.sz_in_row:(i_in+1)*self.sz_in_row, j_in*self.sz_in_row:(j_in+1)*self.sz_in_row]                    
                sentence_generated[frame_out] = self.norm(sentence_generated[frame_out] )
            embeds = self.backbone(sentence_generated)
            embeds = torch.flatten(embeds)
            embeds = self.fc( embeds )
            tag_scores_list[k] = embeds#.mean(axis=0)
        return tag_scores_list    

# model = NetLstm('/home/kb/Documents/best0.pth', 'resnet', 16, 16)
# model = NetLstm(None, 'resnet', 4, 4)
# model = NetRes(None, 'resnet')
emb_len = 32
model = NetResThr(None, 'resnet', emb_len, num_frames)
input_size = model.input_size




In [9]:
import math
# import os
import gc
import sys
import time

from pathlib import Path

from functools import partial
from typing import Callable, Dict, Iterator, List, Optional, Tuple, Union

# from tqdm.notebook import tqdm

import cv2
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torchvision
from torch import Tensor

In [10]:
sys.path.insert(0, os.path.join(BASE_DIR, 'src'))
from dataset.utils import read_labels
from prepare_data import get_file_list

In [11]:
def show_images(images, cols = 1, titles = None):
    """Display a list of images in a single figure with matplotlib.
    
    Parameters
    ---------
    images: List of np.arrays compatible with plt.imshow.
    
    cols (Default = 1): Number of columns in figure (number of rows is 
                        set to np.ceil(n_images/float(cols))).
    
    titles: List of titles corresponding to each image. Must have
            the same length as titles.
    """
    assert((titles is None)or (len(images) == len(titles)))
    n_images = len(images)
    if titles is None: titles = ['Image (%d)' % i for i in range(1,n_images + 1)]
    fig = plt.figure()
    for n, (image, title) in enumerate(zip(images, titles)):
        a = fig.add_subplot(cols, np.ceil(n_images/float(cols)), n + 1)
        if image.ndim == 2:
            plt.gray()
        plt.imshow(image)
        a.set_title(title)
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_images)
    plt.show()

In [12]:
def check_len_hdf5(path):
    lens = dict()
    for name in os.listdir(path):
        full_path = os.path.join(path, name)
        if os.path.isfile(full_path):
            with h5py.File(full_path, 'r+') as f:
                lens[name] = len(f)
    return lens


def check_len_images(path):
    lens = dict()
    for name in os.listdir(path):
        full_path = os.path.join(path, name)
        if os.path.isdir(full_path):
            lens[name] = len(os.listdir(full_path))
    return lens

In [13]:
def sparse_frames(n: int, total: int) -> np.ndarray:
    idxs = np.linspace(0, total, min(n, total), dtype=int, endpoint=False)
    rnd_shift = np.random.randint(0, (total - idxs[-1]))
    return idxs + rnd_shift


def rnd_slice_frames(n: int, total: int, stride=1.) -> np.ndarray:
    idxs = np.arange(0, total, stride)[:n].astype(np.uint16)
    rnd_shift = np.random.randint(0, (total - idxs[-1]))
    return idxs + rnd_shift


def create_mask(idxs: np.ndarray, total: int) -> np.ndarray:
    mask = np.zeros(total, dtype=np.bool)
    mask[idxs] = 1
    return mask


def pad(frames: np.ndarray, amount: int, where :str='start') -> np.ndarray:
    dims = np.zeros((frames.ndim, 2), dtype=np.int8)
    pad_dim = 1 if where == 'end' else 0
    dims[0, pad_dim] = amount
    return np.pad(frames, dims, 'constant')

In [14]:
class FrameSampler():
    def __init__(self, num_frames: int, real_fake_ratio: float, 
                 p_sparse: float):
        self.num_frames = num_frames
        self.real_fake_ratio = real_fake_ratio
        self.p_sparse = p_sparse
        
    def __call__(self, label: Tuple[int, bool]) -> Callable[[int], np.ndarray]:
        dice = np.random.rand()
        if dice < self.p_sparse:
            return partial(sparse_frames, self.num_frames)
        else:
            # Stored frames: fake - 30, real - 150, 
            # the real_fake_ratio should be set to 150 / 30 = 5
            # stride for fake: 5 - (4 * 1) = 1
            # stride for real: 5 - (4 * 0) = 5
            n = self.real_fake_ratio
            stride = n - ((n-1) * int(label))
            return partial(rnd_slice_frames, self.num_frames, stride=stride)

In [15]:
# sampler = FrameSampler(num_frames=15, real_fake_ratio=100/30, p_sparse=1.)


In [16]:
class ImagesDataset(torch.utils.data.Dataset):
    def __init__(self, base_path: str, size: Tuple[int, int], 
                 sampler: FrameSampler, 
                 sub_dirs: Optional[List[str]]=None):
        self.base_path = base_path
        self.size = size
        self.sampler = sampler
        self.df = ImagesDataset._read_annotations(base_path, sub_dirs)
        
    @staticmethod
    def _read_annotations(base_path: str, 
                          sub_dirs: Optional[List[str]]) -> pd.DataFrame:
        if not os.path.isdir(base_path):
            raise RuntimeError('Unable to access %s' % base_path)
        parts = []
        load_all = sub_dirs is None
        if load_all:
            sub_dirs = os.listdir(base_path)
        for chunk_dir in sub_dirs:
            chunk_path = Path(base_path)/chunk_dir
            if not chunk_path.is_dir():
                if not load_all:
                    print('Invalid dir: %s' % str(chunk_path))
                continue
            files = os.listdir(chunk_path)
            df = pd.DataFrame(files, columns=['video'])
            df['label'] = df['video'].str.endswith('_1')
            df['dir'] = chunk_dir
            parts.append(df)
        if len(parts) < 1:
            raise AttributeError('No images were found')
        return pd.concat(parts).reset_index()
    
    @staticmethod
    def read_image_folder(path: str, num_frames: int, size: int,
                          sample_fn: Callable[[int], np.ndarray]) -> np.ndarray:
        img_size = (size, size)
        images = []
        files = sorted(os.listdir(path))
        total_frames = len(files)
        if total_frames > 0:
            idxs = sample_fn(total_frames)
            pick = create_mask(idxs, total_frames)
            for i, file in enumerate(files):
                if pick[i]:
                    img_path = os.path.join(path, file)
                    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    if img.shape[0] > input_size:
                        img = img[int(img.shape[0]/2)-int(input_size/2):int(img.shape[0]/2)+int(input_size/2),:,:]
                    if img.shape[1] > input_size:
                        img = img[:, int(img.shape[1]/2)-int(input_size/2):int(img.shape[1]/2)+int(input_size/2),:]
                    img = PadIfNeeded(min_height=input_size, min_width=input_size)(image=img)['image']
                    
#                     img = cv2.resize(img, img_size, 
#                                      interpolation=cv2.INTER_NEAREST)
                    images.append(img)
            return np.stack(images)
        else:
            return np.empty((0, size, size, 3), dtype=np.uint8)
        
    def __len__(self) :
        return len(self.df)
    
    def __getitem__(self, idx) -> Tuple[np.ndarray, int]:
        num_frames, size = self.size
        meta = self.df.iloc[idx]
        label = int(meta.label)
        path = os.path.join(self.base_path, meta.dir, meta.video)
        
        if os.path.isdir(path):
            sample_fn = self.sampler(meta.label)
            frames = ImagesDataset.read_image_folder(
                path, num_frames, size, sample_fn=sample_fn)
        else:
            print('Dir not found: {}'.format(path))
            frames = np.zeros((num_frames, size, size, 3), dtype=np.uint8)
        
        if len(frames) > 0:
            pad_amount = num_frames - len(frames)
            if pad_amount > 0:
                frames = pad(frames, pad_amount, 'start')
        else:
            print('Empty file {}'.format(path))
            frames = np.zeros((num_frames, size, size, 3), dtype=np.uint8)
            
        frames = np.array(frames, dtype=np.float32)
        tr = Compose([
            
            CenterCrop(170, 80),
            Resize(input_size, input_size, interpolation=3, p=1),
#             RandomResizedCrop(input_size, input_size, scale=(0.2, 0.3), 
#                                          ratio=(0.8, 1.2), 
#                                          interpolation=3, always_apply=True, p=1.0),
            
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
#             ToTensor()

        ])
        frames =np.asarray([tr(image=frame)['image'] for frame in frames ], dtype=np.float32)

#         print(frames.shape)
#         kernel = np.array([[-1, -1, -1],
#                    [-1,  8, -1],
#                    [-1, -1, -1]])
#         for i in range(frames.shape[0]):
#             for j in range(3):
#                 frames[i,:,:,j] = ndimage.convolve(frames[i,:,:,j], kernel)
            
        
        return frames, label

In [17]:
def shuffled_idxs(values: np.ndarray, val: int) -> List[int]:
    idxs = (values == val).nonzero()[0]
    idxs = np.random.permutation(idxs)
    return idxs


class BalancedSampler(torch.utils.data.RandomSampler):
    def __init__(self, data_source, replacement=False, num_samples=None):
        
        super().__init__(data_source, replacement, num_samples)
        if not hasattr(data_source, 'df'):
            raise ValueError("DataSource must have a 'df' property")
            
        if not 'label' in data_source.df: 
            raise ValueError("DataSource.df must have a 'label' column")
    
    def __iter__(self):
        df = self.data_source.df
        all_labels = df['label'].values
        uniq_labels, label_freq = np.unique(all_labels, return_counts=True)
        rev_freq = (len(all_labels) / label_freq)
        
        idxs = []
        for freq, label in zip(rev_freq, uniq_labels):
            fraction, times = np.modf(freq)
            label_idxs = (all_labels == label).nonzero()[0]
            for _ in range(int(times)):
                label_idxs = np.random.permutation(label_idxs)
                idxs.append(label_idxs)
            if fraction > 0.05:
                label_idxs = np.random.permutation(label_idxs)
                chunk = int(len(label_idxs) * fraction)
                idxs.append(label_idxs[:chunk])
        idxs = np.concatenate(idxs)
        idxs = np.random.permutation(idxs)[:self.num_samples]
        return iter(idxs.tolist())

In [18]:


def get_loader(num_frames=15, real_fake_ratio=1, p_sparse=0.5, input_size=input_size, img_dir=None, sub_dirs=None):
    
    sampler = FrameSampler(num_frames, real_fake_ratio=real_fake_ratio, p_sparse=p_sparse)
    ds = ImagesDataset(img_dir, size=(num_frames, input_size), sampler=sampler,
                       sub_dirs =sub_dirs)
    print(len(ds))
    s = BalancedSampler(ds)
    batch_sampler = torch.utils.data.BatchSampler(
        BalancedSampler(ds), 
        batch_size=batch_size, 
        drop_last=True
        
    )
    dl = torch.utils.data.DataLoader(ds, batch_sampler=batch_sampler)
    return dl
    
loaders = {}
loaders['train'] = get_loader(num_frames=num_frames, real_fake_ratio=100/30, p_sparse=1.0, input_size=input_size, 
                              img_dir='/home/kb/projects/dfdc/data/dfdc-crops/webp',
                              sub_dirs= ['dfdc_train_part_%d' % i for i in [1,5,10,15,20,25,30,35]]
                             )
loaders['valid'] = get_loader(num_frames=num_frames, real_fake_ratio=100/30, p_sparse=1.0, input_size=input_size, 
                              img_dir='/home/kb/projects/dfdc/data/dfdc-crops/webp',
                              sub_dirs= ['dfdc_train_part_%d' % i for i in range(40,50)]
                             )
# loaders['test'] = get_loader(num_frames=num_frames, real_fake_ratio=100/30, p_sparse=1.0, input_size=input_size, 
#                               img_dir='/home/kb/projects/dfdc/data/dfdc-crops/webp')

19114
Invalid dir: /home/kb/projects/dfdc/data/dfdc-crops/webp/dfdc_train_part_42
Invalid dir: /home/kb/projects/dfdc/data/dfdc-crops/webp/dfdc_train_part_43
Invalid dir: /home/kb/projects/dfdc/data/dfdc-crops/webp/dfdc_train_part_44
Invalid dir: /home/kb/projects/dfdc/data/dfdc-crops/webp/dfdc_train_part_47
Invalid dir: /home/kb/projects/dfdc/data/dfdc-crops/webp/dfdc_train_part_48
Invalid dir: /home/kb/projects/dfdc/data/dfdc-crops/webp/dfdc_train_part_49
8394


In [19]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:


project = 'dfdc_v2_resnet'
num_epochs = 25

group = datetime.now().strftime("%m_%d_%Y__%H_%M_%S")

if FASTPART:
    group = f'fast_{group}'
    
expnum = 0
experiment = f"exp{expnum}"
logdir = f"/home/kb/hdd/logs/deepfake/{project}/{group}/{experiment}"


model = model.to(device)
params_to_update = model.parameters()
if feature_extract:
    params_to_update = []
    for name,param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
else:
    for name,param in model.named_parameters():
        if param.requires_grad == True:
            pass


criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.AdamW(params=model.parameters(), lr=0.00001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

# model runner
runner = SupervisedRunner()





print(f'----------------Experiment: {experiment}')
logger = Logger(
    token=token,
    experiment=experiment,
    group=group,
    project=project,
)

logger.close()

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=True,
    monitoring_params={
        "token": token,
        "project": project,
        "experiment": experiment,
        "group": group,
    }
)

----------------Experiment: exp0
1/25 * Epoch (train): 100% 796/796 [51:56<00:00,  3.92s/it, loss=0.507]
1/25 * Epoch (valid): 100% 349/349 [20:36<00:00,  3.54s/it, loss=0.684]
[2020-03-09 12:27:47,754] 
1/25 * Epoch 1 (train): _base/lr=1.000e-05 | _base/momentum=0.9000 | _timers/_fps=6.7718 | _timers/batch_time=3.5455 | _timers/data_time=0.9787 | _timers/model_time=2.5668 | loss=0.6262
1/25 * Epoch 1 (valid): _base/lr=1.000e-05 | _base/momentum=0.9000 | _timers/_fps=6.7853 | _timers/batch_time=3.5381 | _timers/data_time=1.0160 | _timers/model_time=2.5220 | loss=0.6964
2/25 * Epoch (train): 100% 796/796 [52:19<00:00,  3.94s/it, loss=0.434]
2/25 * Epoch (valid):  20% 69/349 [04:08<16:36,  3.56s/it, loss=0.669]

In [None]:
# num_frames = num_frames
# img_in_row = int(np.sqrt(num_frames))
# sz_in_row = int(input_size/img_in_row)

In [None]:
# for sentences, labels in loaders['train']:
    
#     for k, sentence in enumerate(sentences): 
#         print(f'showing {k}-th video')
#         sentence = sentence.permute(0, 3, 1, 2)
#         sentence_generated = torch.zeros(sentence.shape, dtype=torch.float32).cuda()
        
#         for frame_out in range(num_frames):
#             for frame_in in range(num_frames):
#                 for pt_out in range(num_frames):            
                
#                     i_in, j_in = k_to_ij(num_frames, frame_out)
#                     i_out, j_out = k_to_ij(num_frames, pt_out)
#                     sentence_generated[frame_out,:,i_out*sz_in_row:(i_out+1)*sz_in_row, j_out*sz_in_row:(j_out+1)*sz_in_row] = \
#                         sentence[frame_in, :, i_in*sz_in_row:(i_in+1)*sz_in_row, j_in*sz_in_row:(j_in+1)*sz_in_row]

                
      
    
    
#         for j in range(sentence_generated.shape[0]):
#             print(f'---showing {j}-th frame')
            
#             plt.figure()
#             img = sentence_generated[j,:,:].permute(1,2,0).cpu().numpy() 
#             img -= img.min()
#             img /= img.max() / 255.
#             img = np.array(img, dtype=np.uint8)
#             print(f'max {img[j,:,:].max()}, min {img[j,:,:].min()}')
#             plt.imshow(  img)
#             plt.show()
        
    
#         break
#     break

In [None]:
# sentence_generated.shape

In [None]:
# sentence_generated[0].max()

In [None]:
# sentence[0].min()