In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm
import time

In [26]:
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch.optim as optim
import numpy as np

!unzip -q "/content/deep-fake-detection-knu-2020.zip"

BATCH_SIZE = 64

replace submission_sample.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [27]:
train_data_path = '/content/train/train/'

VAL_SIZE = 0.15 # percentage of data for validation

transform = transforms.Compose(
        [transforms.Grayscale(num_output_channels=1),
         transforms.ToTensor(),
         transforms.Normalize((0.5,), (0.5,))])

# Creating dataset for training and validation
train_dataset = torchvision.datasets.ImageFolder(
        root=train_data_path,
        transform=transform)

# Shuffling data and choosing data that will be used for training and validation
num_train = len(train_dataset)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(VAL_SIZE * num_train))
train_idx, val_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

# Creating dataloaders for training and validation
train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size = BATCH_SIZE,
        num_workers=0,
        sampler=train_sampler)

val_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size = BATCH_SIZE,
        num_workers=0,
        sampler=val_sampler)

print(f"Length train: {len(train_idx)}")
print(f"Length valid: {len(val_idx)}")


Length train: 102000
Length valid: 18000


In [0]:
from torchvision import datasets, transforms
use_cuda = True

device = torch.device("cuda" if use_cuda else "cpu")

train_batch_size=64
test_batch_size=1000
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.bnm1 = nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(20, 50, 5)
        self.bnm2 = nn.BatchNorm2d(50)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = self.bnm1(x)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = self.bnm2(x)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [0]:
import torch.optim as optim

lr=0.01
momentum = 0.5

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

In [0]:
def train(model, device, train_loader, optimizer, epoch,log_interval):
    model.train()
    avg_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() # zero the gradient buffers
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step() # Does the update
        avg_loss+=F.nll_loss(output, target, reduction='sum').item()
        
        if batch_idx % log_interval == 0:
          print('Train Epoch: {} [{:5.0f}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
              epoch, batch_idx * len(data), len(train_loader.dataset),
              100. * batch_idx / len(train_loader), loss.item()))
    avg_loss/=len(train_loader.dataset)
    return avg_loss

def test(model, device, test_loader):
    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    val_loss /= len(val_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(
        val_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))
    accuracy = 100. * correct / len(val_loader.dataset)
    return val_loss,accuracy

In [0]:
epochs = 7
log_interval = 47
save_model = False

In [33]:
train_losses = []
test_losses = []
accuracy_list = []
for epoch in range(1, epochs + 1):
    trn_loss = train(model, device, train_loader, optimizer, epoch,log_interval)
    test_loss,accuracy = test(model, device, val_loader)
    train_losses.append(trn_loss)
    test_losses.append(test_loss)
    accuracy_list.append(accuracy)
    if epoch==3 or epoch==10 or epoch==7 or epoch==13 :
      lr=lr/1.2
      optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

if (save_model):
    torch.save(model.state_dict(),"mnist_cnn.pt")


Test set: Average loss: 0.0223, Accuracy: 16973/120000 (14.1%)


Test set: Average loss: 0.0140, Accuracy: 17385/120000 (14.5%)


Test set: Average loss: 0.0104, Accuracy: 17542/120000 (14.6%)


Test set: Average loss: 0.0095, Accuracy: 17585/120000 (14.7%)


Test set: Average loss: 0.0071, Accuracy: 17710/120000 (14.8%)



In [0]:
model.eval()

from PIL import Image
import os
import csv

dir = "/content/test/test"
files = os.listdir(dir)
results = []
for f in files:
    image = Image.open(dir+"/"+f)
    normalized_image = transform(image).unsqueeze(0).to(device)
    prediction = model(normalized_image)
    prediction = prediction[0]
    probabilities = torch.softmax(prediction, -1)
    probability = probabilities[1].detach().cpu().item()
    results.append((f, probability))
    
with open('results.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["imagename", "is_fake"])
    for el in results:
        writer.writerow([el[0], el[1]])