In [166]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: chingis
"""
import matplotlib.pyplot as plt
import numpy as np
import os
class Visualizer(object):
    def __ini__(self):
        super(Visualizer,self).__init__()
        
    def line_graph(self, x, y,color, title):
        plt.plot(x,y,color,label=title)
    def label_graph(self, xlabel, ylabel):
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.legend()
    
    def show(self):
        plt.show()
        
    def save(self, directory, name):
        if not os.path.isdir(directory):
            os.mkdir(directory)
        plt.savefig('{}/{}.png'.format(directory, name), format='png')
        plt.close()
            

In [167]:
class train_stats:
    def __init__(self, epochs=30):
        self.epochs = epochs
        self.train_losses = []
        self.val_losses = []
        self.train_correct = []
        self.val_correct = []
    def update_loss(self, train_loss=None, val_loss=None):
        if train_loss is not None:
            self.train_losses.append(train_loss)
        if val_loss is not None:
            self.val_losses.append(val_loss)
    def update_accuracy(self, train_acc=None, val_acc=None):
        if train_acc is not None:
            self.train_correct.append(train_acc)
        if val_acc is not None:
            self.val_correct.append(val_acc)

In [168]:
import torch
import time
from tqdm import tqdm
# NEVER mix train data, validation data, or test data
def trainer(optimizer, net, criterion, train_stats, train_loader, val_loader, device, scheduler):
    start_time = time.time()
    
    epochs = train_stats.epochs
    # Necessary for FP16
    scaler = torch.cuda.amp.GradScaler()
    
    for i in tqdm(range(epochs)):
        trn_corr = 0
        val_corr = 0
        net.train()
       # print("========================Training=================================")
        # Run the training batches
        data = 0
        losses = 0
        for b, (X_train, y_train, idx) in enumerate(train_loader):
            b+=1
            data += X_train.shape[0]
            X_train, y_train = X_train.to(device), y_train.to(device)
            #print(X_train.device, y_train.device)
            # Apply the model
            with torch.cuda.amp.autocast():
                y_pred = net(X_train)  # we don't flatten X-train here
                loss = criterion(y_pred, y_train)
     
            # the number of correct predictions
            predicted = torch.max(y_pred.data, 1)[1]
            batch_corr = (predicted == y_train).sum()
            trn_corr += batch_corr.item()
            losses += loss.item()
            # Update parameters
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            #optimizer.zero_grad()
            #loss.backward()
            #optimizer.step()
            
            
        
        train_stats.update_loss(train_loss=losses/b)
        train_stats.update_accuracy(train_acc=trn_corr/data)
        net.eval()
      #  print("========================Validation=================================")
        # Run the testing batches
        data = 0
        losses = 0
        with torch.no_grad():
            for b, (X_val, y_val, idx) in enumerate(val_loader):
                b+=1
                data += X_val.shape[0]
                X_val, y_val = X_val.to(device), y_val.to(device)
                # Apply the model
                y_pred = net(X_val)
                
                loss = criterion(y_pred, y_val)
                # the number of correct predictions
                predicted = torch.max(y_pred.data, 1)[1] 
                val_corr += (predicted == y_val).sum().item()
                losses += loss.item()


        train_stats.update_loss(val_loss=losses/b)
        train_stats.update_accuracy(val_acc=val_corr/data)
        if scheduler:
            scheduler.step()
        if (i + 1) % (epochs // 10) == 0:
            trainloss = train_stats.train_losses[-1]
            valloss = train_stats.val_losses[-1]
            trainacc = train_stats.train_correct[-1]
            valacc = train_stats.val_correct[-1]
            print('[%d/%d] TrainLoss: %.3f, ValLoss: %.3f | TrainAcc: %.2f, ValAcc: %.2f' % (i + 1, epochs, trainloss, valloss, trainacc, valacc))
            
            
    print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed   

In [189]:
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 22 14:09:38 2021

@author: nuvilabs
"""
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import random
import os
from glob import glob
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)


train_transform = transforms.Compose([
        transforms.RandomRotation(20),      # rotate +/- 10 degrees
        transforms.RandomHorizontalFlip(),  # reverse 50% of images
        transforms.Resize(112),             # resize shortest side to 224 pixels
        transforms.CenterCrop(112),         # crop longest side to 224 pixels at center
        transforms.ToTensor(),
        transforms.Normalize(0.5, 0.5),
    ])
test_transform = transforms.Compose([
        transforms.Resize(112),             # resize shortest side to 224 pixels
        transforms.CenterCrop(112),         # crop longest side to 224 pixels at center
        transforms.ToTensor(),
        transforms.Normalize(0.5, 0.5),
    ])
device = 'cuda' if torch.cuda.is_available() else 'cpu'




In [190]:
males = glob("Male_Character_Face/*.jpg")
females = glob("animefaces256cleaner_female/*.jpg")

In [191]:
len(males)

4656

In [192]:
len(females)

27377

In [193]:
females = females[:len(males)]
assert len(females) == len(males)

In [194]:
dataset = [(img, 1) for img in males]
for img in females:
    dataset.append((img, 0 ))
random.shuffle(dataset)

In [195]:
dataset[:5]

[('Male_Character_Face/4851.jpg', 1),
 ('Male_Character_Face/585.jpg', 1),
 ('Male_Character_Face/15385.jpg', 1),
 ('animefaces256cleaner_female/88033140_result.jpg', 0),
 ('Male_Character_Face/8805.jpg', 1)]

In [196]:
from torch.utils.data import Dataset
from PIL import Image
class AnimeDataset(Dataset):
    def __init__(self, dataset, indices, transform=None):
        self.data = dataset
        self.indices = indices
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        img_path, target = self.data[idx] 
        image = Image.open(img_path)

        if self.transform:
            image = self.transform(image)
        return image, target, idx

In [197]:
## validation 
# Use SubsetRandomSamplers to split train and validation set
indices = list(range(len(dataset)))
np.random.shuffle(indices)
split = int(np.floor(0.9 * len(dataset)))
tr_idx, val_idx = indices[:split], indices[split:]
tr_sampler = SubsetRandomSampler(tr_idx)
val_sampler = SubsetRandomSampler(val_idx)
###

In [198]:
dataset_train = AnimeDataset(dataset, tr_idx, train_transform)
dataset_val = AnimeDataset(dataset, val_idx, test_transform)

In [199]:
train_loader = DataLoader(dataset_train, batch_size=256, shuffle=False, sampler=tr_sampler)
val_loader = DataLoader(dataset_val, batch_size=256, shuffle=False, sampler=val_sampler)

In [200]:
males = 0
females = 0
for b, (X_test, y_test, _) in enumerate(val_loader):
    males += (y_test == 1).sum()
    females += (y_test == 0).sum()
print(males)
print(females)

tensor(436)
tensor(496)


In [201]:
device

'cuda'

In [202]:
import torchvision.models as models
net = models.resnet18(pretrained=True)

In [203]:
net.fc = nn.Linear(in_features=512, out_features=2)

In [204]:
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [205]:

epochs = 50
lr = 0.1
net = torch.nn.DataParallel(net, device_ids=[0, 1])
net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=lr,
                      momentum=0.9, weight_decay=5e-4)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

criterion.to(device)

tracker = train_stats(epochs)

trainer(optimizer, net, criterion, tracker, train_loader, val_loader, device, scheduler)




 10%|██████▌                                                           | 5/50 [02:13<20:05, 26.79s/it]

[5/50] TrainLoss: 0.258, ValLoss: 0.229 | TrainAcc: 0.91, ValAcc: 0.92


 20%|█████████████                                                    | 10/50 [04:28<17:58, 26.97s/it]

[10/50] TrainLoss: 0.098, ValLoss: 0.086 | TrainAcc: 0.97, ValAcc: 0.97


 20%|█████████████                                                    | 10/50 [04:36<18:24, 27.60s/it]


KeyboardInterrupt: 

In [206]:
net.eval()
print("========================Testing=================================")
# Run the testing batches
data = 0
losses = 0
test_corr = 0
with torch.no_grad():
    for b, (X_test, y_test, idx) in enumerate(val_loader):
        b+=1
        data += X_test.shape[0]
        X_test, y_test = X_test.to(device), y_test.to(device)
        # Apply the model
        y_pred = net(X_test)
        
        # the number of correct predictions
        predicted = torch.max(y_pred.data, 1)[1] 
        test_corr += (predicted == y_test).sum().item()
print('TestAcc: %.2f' % (test_corr / data))

TestAcc: 0.97


In [207]:
torch.save(net.module, 'pseudo.pt')

In [208]:
male_noise = [(img, -1) for img in glob("Male_Character_Face_Noise/*.jpg")]

In [209]:
male_noise.__len__()

25456

In [210]:
male_noise_dataset = AnimeDataset(male_noise, range(len(male_noise)), test_transform)
n_loader = DataLoader(male_noise_dataset, batch_size=256, shuffle=False)

In [213]:
net.eval()
print("========================Testing=================================")
# Run the testing batches
data = 0
losses = 0
test_corr = 0
indices = []
with torch.no_grad():
    for X_test, y_test, idx in n_loader:
        data += X_test.shape[0]
        X_test, y_test = X_test.to(device), y_test.to(device)
        # Apply the model
        y_pred = net(X_test)
        y_pred = torch.softmax(y_pred, dim=-1)
        # the number of correct predictions
        score, predicted = torch.max(y_pred.data, 1) 
        
        store = idx[(predicted == 1) & (score > 0.99)]
        indices.extend(store.detach().cpu().numpy().tolist())



In [None]:
import shutil
for idx in indices:
    img, _ = male_noise[idx]
    shutil.move(img, img.replace('Male_Character_Face_Noise','Male_Character_pseudo')) 
    
    