In [58]:
import time
import torch
import torch.nn as nn
import torch.optim as optim

from models import Autoencoder

In [52]:
import cv2
import itertools
import json
import numpy as np
import os.path
import torch

In [60]:
class CapIter:
    def __init__(self, cap, n_frames):
        self.cap = cap
        self.n_frames = n_frames
        self.i = 0
    def __iter__(self):
        return self
    def __next__(self):
        ok, frame = self.cap.read()
        if (ok and self.i < self.n_frames):
            self.i += 1
            return frame
        else:
            raise StopIteration

class DeepfakeDataset(torch.utils.data.Dataset):
    def __init__(self, folders, n_frames=float("inf")):
        self.n_frames = n_frames
        self.videos = []
        for folder in folders:
            with open(os.path.join(folder, 'metadata.json')) as f:
                videos = json.load(f)
                videos = [(os.path.join(folder, video), metadata) for (video, metadata) in videos.items()]
                self.videos += videos
    def __process_frame(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = torch.tensor(frame)
        frame = frame.permute(2, 0, 1)
        print(frame.size())
        frame = frame / 255.
        return frame
    def __getitem__(self, n):
        (video, metadata) = self.videos[n]
        cap = cv2.VideoCapture(video)
        it = CapIter(cap, self.n_frames)
        frames = list(map(self.__process_frame, it))
        cap.release()
        return (torch.stack(frames), metadata['label'])

    def __len__(self):
        return len(self.videos)

In [None]:
train_folders = [
    'train/dfdc_train_part_0',
]
# train_dataset = DeepfakeDataset(train_folders)
train_dataset = DeepfakeDataset(train_folders, n_frames=1) # only load the first frame of every video
train_dataset[0]

In [59]:
model = Autoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())
num_epochs = 5
batch_size = 1

In [57]:
# dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(num_epochs):
    for i, batch in enumerate(dataloader):
        if i * batch_size >= 10: # only train 100 videos per epoch
            break
        data, _ = batch
        data = data.reshape(data.shape[0] * data.shape[1], data.shape[2], data.shape[3], data.shape[4])
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, data)
        loss.backward()
        optimizer.step()
    print(f'epoch: {epoch}, loss: {loss}')

torch.save(model.state_dict(), f'autoencoder{time.time()}.pt')

epoch: 0, loss: 0.09596087783575058


KeyboardInterrupt: 

In [3]:
import glob
import os

list_of_files = glob.glob('./*.pt') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)

./autoencoder1584556558.0113933.pt


In [14]:
train_dataset[114]

number of frames 300


(tensor([[[[0.4667, 0.4824, 0.4824,  ..., 0.5373, 0.5412, 0.5451],
           [0.4627, 0.4706, 0.4706,  ..., 0.5373, 0.5412, 0.5451],
           [0.4471, 0.4588, 0.4588,  ..., 0.5412, 0.5451, 0.5451],
           ...,
           [0.2941, 0.2784, 0.2667,  ..., 0.6549, 0.6314, 0.6235],
           [0.2627, 0.2588, 0.2471,  ..., 0.6392, 0.6235, 0.6157],
           [0.2392, 0.2353, 0.2392,  ..., 0.6314, 0.6196, 0.6157]],
 
          [[0.5098, 0.5255, 0.5255,  ..., 0.5961, 0.6000, 0.6039],
           [0.5059, 0.5137, 0.5137,  ..., 0.5961, 0.6000, 0.6039],
           [0.4902, 0.5020, 0.5020,  ..., 0.6000, 0.6039, 0.6039],
           ...,
           [0.2392, 0.2235, 0.2118,  ..., 0.5725, 0.5608, 0.5529],
           [0.2078, 0.2039, 0.1922,  ..., 0.5569, 0.5529, 0.5451],
           [0.1843, 0.1804, 0.1843,  ..., 0.5490, 0.5490, 0.5451]],
 
          [[0.3882, 0.4039, 0.4039,  ..., 0.3882, 0.3922, 0.3961],
           [0.3843, 0.3922, 0.3922,  ..., 0.3882, 0.3922, 0.3961],
           [0.3686, 0.38