In [1]:
import warnings
import glob
from torch.utils.data import Dataset, DataLoader,Subset
from torchvision import transforms
from sklearn import metrics
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim as optim
import math
import random
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import StratifiedKFold



total = 40000
def extract_samples(target,all_files):
    positive_data = None
    negative_data = None

    positive_init = False
    negative_init = False

    positive_indices = None
    negative_indices = None

    print('**********************', target)
    for i, filename in enumerate(all_files):
        if i == target:
            positive_dataset = np.load(filename)
            positive_indices = list(range(len(positive_dataset)))
            np.random.shuffle(positive_indices)
            positive_init = True
            positive_data = positive_dataset[positive_indices]
            # print("Current positive keystroke images Data shape is",positive_data.shape)

#     positive_length = len(positive_data)
#     negative_length = total - positive_length

#     for i, filename in enumerate(all_files):
#         if i != target:
#             negative_dataset = np.load(filename)
#             nega_len = len(negative_dataset)
#             smaple_len = 1
#             if random.randint(0, 1) == 0:
#                 smaple_len = math.floor(nega_len/negative_length * positive_length)
#             else:
#                 smaple_len = math.ceil(nega_len/negative_length * positive_length)
#             negative_indices = list(range(nega_len))

#             if not negative_init:
#                 negative_data = negative_dataset[negative_indices[:smaple_len]]
#                 negative_init = True
#             else:
#                 extend_sameple = negative_dataset[negative_indices[:smaple_len]]
#                 negative_data = np.concatenate((negative_data, extend_sameple), axis=0)

#             # print("Current negative keystroke images Data shape is", negative_data.shape)

    print("Finaly positive keystroke images Data shape is",positive_data.shape)

    return positive_data


class KeystrokesDataset(Dataset):
    def __init__(self, samples, labels, transform):
        self.samples = samples
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        x = self.samples[idx]
        x = x.transpose((1, 2 ,0))
        x = self.transform(image=x)['image']
        label = torch.from_numpy(self.labels[idx]).float()
        return x, label

    
def encapsulate(positive_data):

    train_dataloaders = []
    test_dataloaders = []
  
    # try and use Cross Fold Val
    skf = StratifiedKFold(n_splits=5, random_state=None, shuffle=True)

    total_dataset = positive_data
    total_labels = np.ones((len(positive_data), 1))
    # total_dataset = np.concatenate((positive_data, negative_data), axis = 0)
    # total_labels = np.concatenate((
    #   np.ones((len(positive_data), 1)), 
    #   np.zeros((len(negative_data), 1))
    # ), axis=0)
    

    for train_index, test_index in skf.split(total_dataset, total_labels):
        train_dataset_numpy, test_dataset_numpy = total_dataset[train_index], total_dataset[test_index]
        train_labels_numpy, test_labels_numpy = total_labels[train_index], total_labels[test_index]

        mean = 0.
        std = 0.

        batch_samples = len(train_dataset_numpy) # batch size (the last batch can have smaller size!)
        images = train_dataset_numpy.reshape(batch_samples, 5, -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)

        mean /= len(train_dataset_numpy)
        std /= len(train_dataset_numpy)

    # transform = transforms.Compose([
    #   transforms.Normalize(mean=mean,std=std)
    # ])

    train_transform = A.Compose([
        A.Normalize(mean=mean.tolist(), std=std.tolist(), max_pixel_value=1.0, p=1.0),
        A.CoarseDropout(p=0.5, max_height=3, max_width=3, fill_value=0),
        # A.Cutout(p=0.5, max_h_size=3, max_w_size=3, fill_value=0),
        ToTensorV2(p=1.0),
    ], p=1.)

    test_transform = A.Compose([
        A.Normalize(mean=mean.tolist(), std=std.tolist(), max_pixel_value=1.0, p=1.0),
        ToTensorV2(p=1.0),
    ], p=1.)


    train_dataset = KeystrokesDataset(train_dataset_numpy, train_labels_numpy, train_transform)
    test_dataset = KeystrokesDataset(test_dataset_numpy, test_labels_numpy, test_transform)

    train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

    train_dataloaders.append(train_dataloader)
    test_dataloaders.append(test_dataloader)

    return train_dataloaders, test_dataloaders
    
    


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard


class Discriminator(nn.Module):
    def __init__(self, channels_img, in_features, num_classes, img_size):
        super(Discriminator,self).__init__()
        self.img_size = img_size
        self.disc = nn.Sequential(
            nn.Conv2d(channels_img+1,in_features,kernel_size=4,stride=2,padding=1),
            nn.LeakyReLU(0.2),
            self._block(in_features,in_features*2,4,2,1),
            self._block(in_features*2,in_features*4,4,2,1),
            self._block(in_features*4,in_features*8,4,2,1),
            nn.Conv2d(in_features*8,1,kernel_size=4,stride=2,padding=0),
        )
        self.embed = nn.Embedding(num_classes,img_size*img_size)

    def _block(self, in_channels, out_channels,kernel_size,stride,padding):
        return nn.Sequential(
            nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding,bias=False),
            nn.InstanceNorm2d(out_channels, affine=True),
            nn.LeakyReLU(.2),
        )
        
    def forward(self, x,labels):
        embedding = self.embed(labels.long()).view(labels.shape[0],1,self.img_size,self.img_size)
        x = torch.cat([x,embedding],dim=1)
        return self.disc(x)


class Generator(nn.Module):
    def __init__(self, z_dim, channels_img, features_g, num_classes, img_size, embed_size):
        super(Generator,self).__init__()
        self.img_size = img_size
        self.net = nn.Sequential(
            self._block(z_dim+embed_size,features_g*16,4,1,0),
            self._block(features_g*16,features_g*8,4,2,1),
            self._block(features_g*8,features_g*4,4,2,1),
            self._block(features_g*4,features_g*2,4,2,1),
            nn.ConvTranspose2d(features_g*2, channels_img, kernel_size=4, stride = 2, padding = 1),
            nn.Tanh(),
        )
        self.embed = nn.Embedding(num_classes,embed_size)

    def _block(self, in_channels, out_channels,kernel_size,stride,padding):
        return nn.Sequential(
            nn.ConvTranspose2d(in_channels,out_channels,kernel_size,stride,padding,bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )
        
    def forward(self, x, labels):
        embedding = self.embed(labels.long()).unsqueeze(2).unsqueeze(3)
        x = torch.cat([x,embedding],dim=1)
        return self.net(x)

def gradient_penalty(critic, labels, real, fake, device="cpu"):
    BATCH_SIZE, C, H, W = real.shape
    alpha = torch.rand((BATCH_SIZE, 1, 1, 1)).repeat(1, C, H, W).to(device)
    interpolated_images = real * alpha + fake * (1 - alpha)

    # Calculate critic scores
    mixed_scores = critic(interpolated_images,labels)

    # Take the gradient of the scores with respect to the images
    gradient = torch.autograd.grad(
        inputs=interpolated_images,
        outputs=mixed_scores,
        grad_outputs=torch.ones_like(mixed_scores),
        create_graph=True,
        retain_graph=True,
    )[0]
    gradient = gradient.view(gradient.shape[0], -1)
    gradient_norm = gradient.norm(2, dim=1)
    gradient_penalty = torch.mean((gradient_norm - 1) ** 2)
    return gradient_penalty

def initialize_weights(model):
    # Initializes weights according to the DCGAN paper
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
            nn.init.normal_(m.weight.data, 0.0, 0.02)

# Hyperparameters etc.
device = "cuda" if torch.cuda.is_available() else "cpu"
LEARNING_RATE = 1e-4
BATCH_SIZE = 64
IMAGE_SIZE = 64
CHANNELS_IMG = 5
GEN_EMBEDDING = 100
NUM_CLASSES = 2
Z_DIM = 100
NUM_EPOCHS = 50
FEATURES_CRITIC = 16
FEATURES_GEN = 16
CRITIC_ITERATIONS = 5
LAMBDA_GP = 10
transforms = transforms.Compose(
    [
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(
            [0.5 for _ in range(CHANNELS_IMG)], [0.5 for _ in range(CHANNELS_IMG)]),
    ]
)

# dataset = datasets.MNIST(root="dataset/", transform=transforms, download=True)
# # comment mnist above and uncomment below for training on CelebA
# #dataset = datasets.ImageFolder(root="celeb_dataset", transform=transforms)
# loader = DataLoader(
#     dataset,
#     batch_size=BATCH_SIZE,
#     shuffle=True,
# )



def train(loader,MODEL_NUM,NUM):
    stop = False
    # initialize gen and disc, note: discriminator should be called critic,
    # according to WGAN paper (since it no longer outputs between [0, 1])
    gen = Generator(Z_DIM, CHANNELS_IMG, FEATURES_GEN, NUM_CLASSES, IMAGE_SIZE, GEN_EMBEDDING).to(device)
    critic = Discriminator(CHANNELS_IMG, FEATURES_CRITIC, NUM_CLASSES, IMAGE_SIZE).to(device)
    initialize_weights(gen)
    initialize_weights(critic)

    # initializate optimizer
    opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas=(0.0, 0.9))
    opt_critic = optim.Adam(critic.parameters(), lr=LEARNING_RATE, betas=(0.0, 0.9))

    # for tensorboard plotting
    fixed_noise = torch.randn(32, Z_DIM, 1, 1).to(device)
    writer_real = SummaryWriter(f"logs/GAN_MNIST/real")
    writer_fake = SummaryWriter(f"logs/GAN_MNIST/fake")
    step = 0

    gen.train()
    critic.train()
    
    
    for epoch in range(NUM_EPOCHS):

        for batch_idx, (real, labels) in enumerate(loader[0]):
            real = real.to(device)
            cur_batch_size = real.shape[0]
            labels = labels.view(labels.shape[0]).to(device)
            # Train Critic: max E[critic(real)] - E[critic(fake)]
            # equivalent to minimizing the negative of that
            for _ in range(CRITIC_ITERATIONS):
                noise = torch.randn((cur_batch_size, Z_DIM, 1, 1),device = device)
                fake = gen(noise,labels)
                critic_real = critic(real,labels).reshape(-1)
                critic_fake = critic(fake,labels).reshape(-1)
                gp = gradient_penalty(critic, labels, real, fake, device=device)
                loss_critic = (
                    -(torch.mean(critic_real) - torch.mean(critic_fake)) + LAMBDA_GP * gp
                )
                critic.zero_grad()
                loss_critic.backward(retain_graph=True)
                opt_critic.step()

            # Train Generator: max E[critic(gen_fake)] <-> min -E[critic(gen_fake)]
            gen_fake = critic(fake,labels).reshape(-1)
            loss_gen = -torch.mean(gen_fake)
            gen.zero_grad()
            loss_gen.backward()
            opt_gen.step()

            # Print losses occasionally and print to tensorboard
            if epoch + 1 == NUM_EPOCHS and not stop:
                with torch.no_grad():
                    fake = gen(noise,labels).cpu().detach().numpy()
                    samples = list()
                    c = list()
                    for channel in fake:
                        for rows in range(len(channel)):
                            r = list()
                            for row in range(len(channel[rows])):
                                r.append(channel[rows][row][11:53])
                            r = r[11:53]
                            c.append(r)
                        samples.append(c)
                        c = list()
                    samples = np.array(samples)
                    np.save(r"/home/jupyter/src/Thesis_Project/CGAN_data/"+NUM+"/gen_data_"+MODEL_NUM+".npy",samples)
                    print(samples.shape,"HERE")
                    stop = True
        
        

for num in ['50','75','100']:
    path = r"/home/jupyter/src/Thesis_Project/Data_padded/"+num
    all_files = glob.glob(path + "/*")
    all_files.sort()
    for i in range(75):
        data = extract_samples(i,all_files)
        dataloader,dataloader2 = encapsulate(data)
        train(dataloader,str(i),num)
        
# torch.Size([64, 1, 64, 64])
# torch.Size([64])    
# torch.Size([64, 1, 64, 64])
# GEN:  torch.Size([64, 100, 1, 1]) torch.Size([64, 100, 1, 1])
# DISC:  torch.Size([64, 1, 64, 64]) torch.Size([64, 1, 64, 64])
# DISC:  torch.Size([64, 1, 64, 64]) torch.Size([64, 1, 64, 64])
# DISC:  torch.Size([64, 1, 64, 64]) torch.Size([64, 1, 64, 64])
# GEN:  torch.Size([64, 100, 1, 1]) torch.Size([64, 100, 1, 1])

********************** 0
Finaly positive keystroke images Data shape is (137, 5, 64, 64)


2023-04-12 21:38:37.651232: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-12 21:38:55.250877: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/conda/lib/python3.7/site-packages/cv2/../../lib64:/usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-04-12 21:38:55.252241: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PAT

(64, 5, 42, 42) HERE
********************** 1
Finaly positive keystroke images Data shape is (104, 5, 64, 64)
(64, 5, 42, 42) HERE
********************** 2
Finaly positive keystroke images Data shape is (119, 5, 64, 64)
(64, 5, 42, 42) HERE
********************** 3
Finaly positive keystroke images Data shape is (140, 5, 64, 64)
(64, 5, 42, 42) HERE
********************** 4
Finaly positive keystroke images Data shape is (90, 5, 64, 64)
(64, 5, 42, 42) HERE
********************** 5
Finaly positive keystroke images Data shape is (112, 5, 64, 64)
(64, 5, 42, 42) HERE
********************** 6
Finaly positive keystroke images Data shape is (131, 5, 64, 64)
(64, 5, 42, 42) HERE
********************** 7
Finaly positive keystroke images Data shape is (110, 5, 64, 64)
(64, 5, 42, 42) HERE
********************** 8
Finaly positive keystroke images Data shape is (157, 5, 64, 64)
(64, 5, 42, 42) HERE
********************** 9
Finaly positive keystroke images Data shape is (109, 5, 64, 64)
(64, 5, 42,