In [19]:
import torch 
import torch.nn as nn


class Discriminator(nn.Module):
    def __init__(self,channels_img, features_d):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(
            nn.Conv2d(
                channels_img, features_d, kernel_size = 4, stride = 2, padding =1
            ),
            nn.LeakyReLU(0.2),
            self._block(features_d, features_d*2,4,2,1,),
            self._block(features_d*2, features_d*4,4,2,1,),
            self._block(features_d*4, features_d*8,4,2,1,),
            nn.Conv2d(features_d*8, 1,  kernel_size= 4, stride = 2, padding = 0),
            nn.Sigmoid(),
        )
        
    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.Conv2d(
                in_channels, out_channels, kernel_size, stride, padding, bias = False
            ),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(0.2),
        )

    def forward(self,x):
        return self.disc(x)

In [20]:
class Generator(nn.Module):
    def __init__(self, z_dim, channels_img, features_g):
        super(Generator, self).__init__()
        self.gen = nn.Sequential(
            self._block(z_dim, features_g*16, 4, 1, 0),
            self._block(features_g*16, features_g*8, 4, 2, 1),
            self._block(features_g*8, features_g*4, 4, 2, 1 ),
            self._block(features_g*4, features_g*2, 4, 2, 1),
            nn.ConvTranspose2d(
                features_g*2, channels_img, kernel_size=4, stride = 2, padding = 1
            ),
            nn.Tanh(),
        )
        
        
        
    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.ConvTranspose2d(
                in_channels, out_channels, kernel_size, stride, padding, bias = False
            ),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def forward(self,x):
        return self.gen(x)

In [21]:
def initialize_weights (model):
    for m in model.modules():
        if isinstance(m,(nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
            nn.init.normal_(m.weight.data, 0.0, .02)
            
def test():
    N, in_channels, H,W = 8, 3, 64, 64
    z_dim = 100
    x = torch.randn((N,in_channels, H,W))
    disc = Discriminator(in_channels, 8)
    initialize_weights(disc)
    assert disc(x).shape == (N,1,1,1)
    gen = Generator(z_dim, in_channels, 8)
    initialize_weights(gen)
    z = torch.randn((N,z_dim,1,1))
    assert gen(z).shape == (N,in_channels, H,W)
    print("success")
    
    
test()
    

success


In [22]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn import metrics
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim as optim
import math
import random
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import StratifiedKFold

In [23]:
import glob

path = r"/home/jupyter/src/Thesis_Project/Data_padded/100"
all_files = glob.glob(path + "/*")
print("Total number of subjects:", len(all_files))

Total number of subjects: 75


In [24]:
total = 40000
def extract_samples(target):
    positive_data = None
    negative_data = None

    positive_init = False
    negative_init = False

    positive_indices = None
    negative_indices = None

    print('**********************', target)
    for i, filename in enumerate(all_files):
        if i == target:
            positive_dataset = np.load(filename)
            positive_indices = list(range(len(positive_dataset)))
            np.random.shuffle(positive_indices)
            positive_init = True
            positive_data = positive_dataset[positive_indices]
            # print("Current positive keystroke images Data shape is",positive_data.shape)

    positive_length = len(positive_data)
    negative_length = total - positive_length

    for i, filename in enumerate(all_files):
        if i != target:
            negative_dataset = np.load(filename)
            nega_len = len(negative_dataset)
            smaple_len = 1
            if random.randint(0, 1) == 0:
                smaple_len = math.floor(nega_len/negative_length * positive_length)
            else:
                smaple_len = math.ceil(nega_len/negative_length * positive_length)
            negative_indices = list(range(nega_len))

            if not negative_init:
                negative_data = negative_dataset[negative_indices[:smaple_len]]
                negative_init = True
            else:
                extend_sameple = negative_dataset[negative_indices[:smaple_len]]
                negative_data = np.concatenate((negative_data, extend_sameple), axis=0)

            # print("Current negative keystroke images Data shape is", negative_data.shape)

    print("Finaly positive keystroke images Data shape is",positive_data.shape)
    print("Finaly negative keystroke images Data shape is", negative_data.shape)

    return positive_data, negative_data

In [25]:
class KeystrokesDataset(Dataset):
    def __init__(self, samples, labels, transform):
        self.samples = samples
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        x = self.samples[idx]
        x = x.transpose((1, 2 ,0))
        x = self.transform(image=x)['image']
        label = torch.from_numpy(self.labels[idx]).float()
        return x, label


In [26]:
def encapsulate(positive_data, negative_data):

    train_dataloaders = []
    test_dataloaders = []
  
    # try and use Cross Fold Val
    skf = StratifiedKFold(n_splits=5, random_state=None, shuffle=True)

    total_dataset = np.concatenate((positive_data, negative_data), axis = 0)
    total_labels = np.concatenate((
      np.ones((len(positive_data), 1)), 
      np.zeros((len(negative_data), 1))
    ), axis=0)

    for train_index, test_index in skf.split(total_dataset, total_labels):
        train_dataset_numpy, test_dataset_numpy = total_dataset[train_index], total_dataset[test_index]
        train_labels_numpy, test_labels_numpy = total_labels[train_index], total_labels[test_index]

        mean = 0.
        std = 0.

        batch_samples = len(train_dataset_numpy) # batch size (the last batch can have smaller size!)
        images = train_dataset_numpy.reshape(batch_samples, 5, -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)

        mean /= len(train_dataset_numpy)
        std /= len(train_dataset_numpy)

    # transform = transforms.Compose([
    #   transforms.Normalize(mean=mean,std=std)
    # ])

    train_transform = A.Compose([
        A.Normalize(mean=mean.tolist(), std=std.tolist(), max_pixel_value=1.0, p=1.0),
        A.CoarseDropout(p=0.5, max_height=3, max_width=3, fill_value=0),
        # A.Cutout(p=0.5, max_h_size=3, max_w_size=3, fill_value=0),
        ToTensorV2(p=1.0),
    ], p=1.)

    test_transform = A.Compose([
        A.Normalize(mean=mean.tolist(), std=std.tolist(), max_pixel_value=1.0, p=1.0),
        ToTensorV2(p=1.0),
    ], p=1.)


    train_dataset = KeystrokesDataset(train_dataset_numpy, train_labels_numpy, train_transform)
    test_dataset = KeystrokesDataset(test_dataset_numpy, test_labels_numpy, test_transform)

    train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

    train_dataloaders.append(train_dataloader)
    test_dataloaders.append(test_dataloader)

    return train_dataloaders, test_dataloaders

In [None]:
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

device = torch.device("cuda"if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 2e-4
BATCH_SIZE = 64
IMAGE_SIZE = 64
CHANNELS_IMG = 5
NOISE_DIM = 100
NUM_EPOCHS = 5 
FEATURES_DISC = 64
FEATURES_GEN = 64

transforms = transforms.Compose(
    [
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(
            [.5 for _ in range(CHANNELS_IMG)], [.5 for _ in range(CHANNELS_IMG)]),
    ]
)

# dataset = datasets.MNIST(root="dataset/", train= True, transform = transforms, download = True)
# dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
data = extract_samples(0)
dataloader = encapsulate(data[0],data[1])


gen = Generator(NOISE_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)
disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device)
initialize_weights(gen)
initialize_weights(disc)

opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas = (.5, .999))
opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE, betas = (.5, .999))
criterion = nn.BCELoss()

fixed_noise = torch.randn(32, NOISE_DIM, 1,1).to(device)
writer_real = SummaryWriter(f"logs/real")
writer_fake = SummaryWriter(f"logs/fake")
step = 0

gen.train()
disc.train()

print(len(dataloader))

for epoch in range(NUM_EPOCHS):
    for batch_idx, (real,_) in enumerate(dataloader):
        real = real.to(device)
        noise = torch.randn((BATCH_SIZE, NOISE_DIM, 1,1), device = device)
        fake = gen(noise)
        
        
        disc_real = disc(real).reshape(-1)
        loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake).reshape(-1)
        loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        loss_disc = (loss_disc_real + loss_disc_fake ) /2
        disc.zero_grad()
        loss_disc.backward(retain_graph=True)
        opt_disc.step()
        
        
        output = disc(fake).reshape(-1)
        loss_gen = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        loss_gen.backward()
        opt_gen.step()
        
        # Print losses occasionally and print to tensorboard
        if batch_idx % 100 == 0:
            print(
                f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(dataloader)} \
                  Loss D: {loss_disc:.4f}, loss G: {loss_gen:.4f}"
            )

            with torch.no_grad():
                fake = gen(fixed_noise)
                # take out (up to) 32 examples
                img_grid_real = torchvision.utils.make_grid(real[:32], normalize=True)
                img_grid_fake = torchvision.utils.make_grid(fake[:32], normalize=True)

                writer_real.add_image("Real", img_grid_real, global_step=step)
                writer_fake.add_image("Fake", img_grid_fake, global_step=step)

            step += 1
        
        

********************** 0


In [27]:
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

device = torch.device("cuda"if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 2e-6
BATCH_SIZE = 64
IMAGE_SIZE = 64
CHANNELS_IMG = 5
NOISE_DIM = 100
NUM_EPOCHS = 50
FEATURES_DISC = 64
FEATURES_GEN = 64

transforms = transforms.Compose(
    [
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(
            [.5 for _ in range(CHANNELS_IMG)], [.5 for _ in range(CHANNELS_IMG)]),
    ]
)

# dataset = datasets.MNIST(root="dataset/", train= True, transform = transforms, download = True)
# dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)


def train_GAN(dataloader,MODEL_NUM):
    gen = Generator(NOISE_DIM, CHANNELS_IMG, FEATURES_GEN).to(device)
    disc = Discriminator(CHANNELS_IMG, FEATURES_DISC).to(device)
    initialize_weights(gen)
    initialize_weights(disc)

    opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE, betas = (.5, .999))
    opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE, betas = (.5, .999))
    criterion = nn.BCELoss()

    fixed_noise = torch.randn(32, NOISE_DIM, 1,1).to(device)
    step = 0

    gen.train()
    disc.train()
    stop = False
    for epoch in range(NUM_EPOCHS):
    
        for batch_idx, (real,_) in enumerate(dataloader[0]):
            


            real = real.to(device)
            noise = torch.randn((BATCH_SIZE, NOISE_DIM, 1,1), device = device)
            fake = gen(noise)


            disc_real = disc(real).reshape(-1)
            loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
            disc_fake = disc(fake).reshape(-1)
            loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
            loss_disc = (loss_disc_real + loss_disc_fake ) /2
            disc.zero_grad()
            loss_disc.backward(retain_graph=True)
            opt_disc.step()


            output = disc(fake).reshape(-1)
            loss_gen = criterion(output, torch.ones_like(output))
            gen.zero_grad()
            loss_gen.backward()
            opt_gen.step()

            # Print losses occasionally and print to tensorboard
            # if batch_idx % BATCH_SIZE == 0:
            #     print(
            #         f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(dataloader)} \
            #           Loss D: {loss_disc:.4f}, loss G: {loss_gen:.4f}"
            #     )

    #             with torch.no_grad():
    #                 fake = gen(fixed_noise)
    #                 # take out (up to) 32 examples
    #                 img_grid_real = torchvision.utils.make_grid(real[:32], normalize=True)
    #                 img_grid_fake = torchvision.utils.make_grid(fake[:32], normalize=True)

    #                 writer_real.add_image("Real", img_grid_real, global_step=step)
    #                 writer_fake.add_image("Fake", img_grid_fake, global_step=step)

                # step += 1
        
            if epoch + 1 == NUM_EPOCHS and not stop:
                with torch.no_grad():
                    fake = gen(fixed_noise).cpu().detach().numpy()
                    samples = list()
                    c = list()
                    for channel in fake:

                        for rows in range(len(channel)):
                            r = list()
                            for row in range(len(channel[rows])):
                                r.append(channel[rows][row][11:53])
                            r = r[11:53]
                            c.append(r)
                        samples.append(c)
                        c = list()
                    samples = np.array(samples)
                    np.save(r"/home/jupyter/src/Thesis_Project/DCGAN_data/100/gen_data_"+MODEL_NUM+".npy",samples)
                    print(samples.shape,"HERE")
                    stop = True

        
for i in range(75):
    data = extract_samples(i)
    dataloader,dataloader2 = encapsulate(data[0],data[1])
    train_GAN(dataloader,str(i))
    

********************** 0
Finaly positive keystroke images Data shape is (55, 5, 64, 64)
Finaly negative keystroke images Data shape is (33, 5, 64, 64)
(32, 5, 42, 42) HERE
********************** 1
Finaly positive keystroke images Data shape is (47, 5, 64, 64)
Finaly negative keystroke images Data shape is (36, 5, 64, 64)
(32, 5, 42, 42) HERE
********************** 2
Finaly positive keystroke images Data shape is (51, 5, 64, 64)
Finaly negative keystroke images Data shape is (35, 5, 64, 64)
(32, 5, 42, 42) HERE
********************** 3
Finaly positive keystroke images Data shape is (52, 5, 64, 64)
Finaly negative keystroke images Data shape is (30, 5, 64, 64)
(32, 5, 42, 42) HERE
********************** 4
Finaly positive keystroke images Data shape is (52, 5, 64, 64)
Finaly negative keystroke images Data shape is (30, 5, 64, 64)
(32, 5, 42, 42) HERE
********************** 5
Finaly positive keystroke images Data shape is (51, 5, 64, 64)
Finaly negative keystroke images Data shape is (37, 