In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))
import scipy.io as sio
from scipy.misc import imread
import tensorflow as tf
import numpy as np
import pandas as pd
import scipy.io
import tqdm 
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from xgboost import XGBClassifier
import random
from numpy.random import choice
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, roc_curve
import sklearn.pipeline as pipeline
import sklearn.preprocessing as preprocessing
from sklearn.model_selection import train_test_split
from torch import nn
from collections import OrderedDict
import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader, random_split
from torchvision import transforms as transforms
import argparse
import pathlib
from typing import Dict, Tuple, Any, Optional
from sklearn.preprocessing import LabelEncoder
from torch.nn import functional as F
from torch import optim
from visdom_logger.logger import VisdomLogger

import datasets.util as util
from featurelearning.data import TransformDataset
import featurelearning.models as models
from preprocessing.normalize import crop_center

In [2]:
# Define the architecture
class SigNet(nn.Module):
    """ SigNet model, from https://arxiv.org/abs/1705.05787
    """
    def __init__(self):
        super(SigNet, self).__init__()

        self.feature_space_size = 2048

        self.conv_layers = nn.Sequential(OrderedDict([
            ('conv1', conv_bn_relu(1, 96, 11, stride=4)),
            ('maxpool1', nn.MaxPool2d(3, 2)),
            ('conv2', conv_bn_relu(96, 256, 5, pad=2)),
            ('maxpool2', nn.MaxPool2d(3, 2)),
            ('conv3', conv_bn_relu(256, 384, 3, pad=1)),
            ('conv4', conv_bn_relu(384, 384, 3, pad=1)),
            ('conv5', conv_bn_relu(384, 256, 3, pad=1)),
            ('maxpool3', nn.MaxPool2d(3, 2)),
        ]))

        self.fc_layers = nn.Sequential(OrderedDict([
            ('fc1', linear_bn_relu(256 * 3 * 5, 2048)),
            ('fc2', linear_bn_relu(self.feature_space_size, self.feature_space_size)),
        ]))

    def forward(self, inputs):
        x = self.conv_layers(inputs)
        x = x.view(x.shape[0], 256 * 3 * 5)
        x = self.fc_layers(x)
        return x


def conv_bn_relu(in_channels, out_channels, kernel_size,  stride=1, pad=0):
    return nn.Sequential(OrderedDict([
        ('conv', nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad, bias=False)),
        ('bn', nn.BatchNorm2d(out_channels)),
        ('relu', nn.ReLU()),
    ]))


def linear_bn_relu(in_features, out_features):
    return nn.Sequential(OrderedDict([
        ('fc', nn.Linear(in_features, out_features, bias=False)),  # Bias is added after BN
        ('bn', nn.BatchNorm1d(out_features)),
        ('relu', nn.ReLU()),
    ]))

In [3]:
class TransformDataset(Dataset):
    """
        Dataset that applies a transform on the data points on __get__item.
    """
    def __init__(self, dataset, transform, transform_index=0):
        self.dataset = dataset
        self.transform = transform
        self.transform_index = transform_index

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, item):
        data = self.dataset[item]
        img = data[self.transform_index]

        return tuple((self.transform(img), *data[1:]))

In [4]:
def train(base_model: torch.nn.Module,
          classification_layer: torch.nn.Module,
          forg_layer: torch.nn.Module,
          train_loader: torch.utils.data.DataLoader,
          val_loader: torch.utils.data.DataLoader,
          device: torch.device,
          callback: Optional[VisdomLogger],
          args: Any,
          logdir: Optional[pathlib.Path]):
    """ Trains a network using either SigNet or SigNet-F loss functions on
    https://arxiv.org/abs/1705.05787 (e.q. (1) and (4) on the paper)
    Parameters
    ----------
    base_model: torch.nn.Module
        The model architecture that "extract features" from signatures
    classification_layer: torch.nn.Module
        The classification layer (from features to predictions of which user
        wrote the signature)
    forg_layer: torch.nn.Module
        The forgery prediction layer (from features to predictions of whether
        the signature is a forgery). Only used in args.forg = True
    train_loader: torch.utils.data.DataLoader
        Iterable that loads the training set (x, y) tuples
    val_loader: torch.utils.data.DataLoader
        Iterable that loads the validation set (x, y) tuples
    device: torch.device
        The device (CPU or GPU) to use for training
    callback: VisdomLogger (optional)
        A callback to report the training progress
    args: Namespace
        Extra arguments for training: epochs, lr, lr_decay, lr_decay_times, momentum, weight_decay
    logdir: str
        Where to save the model and training curves
    Returns
    -------
    Dict (str -> tensors)
        The trained weights
    """

    # Collect all parameters that need to be optimizer
    parameters = list(base_model.parameters()) + list(classification_layer.parameters())
    if args.forg:
        parameters.extend(forg_layer.parameters())

    # Initialize optimizer and learning rate scheduler
    optimizer = optim.SGD(parameters, lr=args.lr, momentum=args.momentum,
                          nesterov=True, weight_decay=args.weight_decay)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             args.epochs // args.lr_decay_times,
                                             args.lr_decay)

    best_acc = 0
    best_params = get_parameters(base_model, classification_layer, forg_layer)

    for epoch in range(args.epochs):
        # Train one epoch; evaluate on validation
        train_epoch(train_loader, base_model, classification_layer, forg_layer,
                    epoch, optimizer, lr_scheduler, callback, device, args)

        val_metrics = test(val_loader, base_model, classification_layer, device, args.forg, forg_layer)
        val_acc, val_loss, val_forg_acc, val_forg_loss = val_metrics

        # Save the best model only on improvement (early stopping)
        if val_acc >= best_acc:
            best_acc = val_acc
            best_params = get_parameters(base_model, classification_layer, forg_layer)
            if logdir is not None:
                torch.save(best_params, logdir / 'model_best.pth')

        if callback:
            callback.scalar('val_loss', epoch + 1, val_loss)
            callback.scalar('val_acc', epoch + 1, val_acc)

            if args.forg:
                callback.scalar('val_forg_loss', epoch + 1, val_forg_loss)
                callback.scalar('val_forg_acc', epoch + 1, val_forg_acc)

        if args.forg:
            print('Epoch {}. Val loss: {:.4f}, Val acc: {:.2f}%,'
                  'Val forg loss: {:.4f}, Val forg acc: {:.2f}%'.format(epoch, val_loss,
                                                                        val_acc * 100,
                                                                        val_forg_loss,
                                                                        val_forg_acc * 100))
        else:
            print('Epoch {}. Val loss: {:.4f}, Val acc: {:.2f}%'.format(epoch, val_loss, val_acc * 100))

        if logdir is not None:
            current_params = get_parameters(base_model, classification_layer, forg_layer)
            torch.save(current_params, logdir / 'model_last.pth')
            if callback:
                callback.save(logdir / 'train_curves.pickle')

    return best_params


def copy_to_cpu(weights: Dict[str, Any]):
    return OrderedDict([(k, v.cpu()) for k, v in weights.items()])


def get_parameters(base_model, classification_layer, forg_layer):
    best_params = (copy_to_cpu(base_model.state_dict()),
                   copy_to_cpu(classification_layer.state_dict()),
                   copy_to_cpu(forg_layer.state_dict()))
    return best_params


def train_epoch(train_loader: torch.utils.data.DataLoader,
                base_model: torch.nn.Module,
                classification_layer: torch.nn.Module,
                forg_layer: torch.nn.Module,
                epoch: int,
                optimizer: torch.optim.Optimizer,
                lr_scheduler: torch.optim.lr_scheduler._LRScheduler,
                callback: Optional[VisdomLogger],
                device: torch.device,
                args: Any):
    """ Trains the network for one epoch
        Parameters
        ----------
        train_loader: torch.utils.data.DataLoader
            Iterable that loads the training set (x, y) tuples
        base_model: torch.nn.Module
            The model architecture that "extract features" from signatures
        classification_layer: torch.nn.Module
            The classification layer (from features to predictions of which user
            wrote the signature)
        forg_layer: torch.nn.Module
            The forgery prediction layer (from features to predictions of whether
            the signature is a forgery). Only used in args.forg = True
        epoch: int
            The current epoch (used for reporting)
        optimizer: torch.optim.Optimizer
            The optimizer (already initialized)
        lr_scheduler: torch.optim.lr_scheduler._LRScheduler
            The learning rate scheduler
        callback: VisdomLogger (optional)
            A callback to report the training progress
        device: torch.device
            The device (CPU or GPU) to use for training
        args: Namespace
            Extra arguments used for training:
            args.forg: bool
                Whether forgeries are being used for training
            args.lamb: float
                The weight used for the forgery loss (training with forgeries only)
        Returns
        -------
        None
        """

    step = 0
    n_steps = len(train_loader)
    for batch in train_loader:
        x, y = batch[0], batch[1]
        x = torch.tensor(x, dtype=torch.float).to(device)
        y = torch.tensor(y, dtype=torch.long).to(device)
        yforg = torch.tensor(batch[2], dtype=torch.float).to(device)

        # Forward propagation
        features = base_model(x)

        if args.forg:
            if args.loss_type == 'L1':
                # Eq (3) in https://arxiv.org/abs/1705.05787
                logits = classification_layer(features)
                class_loss = F.cross_entropy(logits, y)

                forg_logits = forg_layer(features).squeeze()
                forg_loss = F.binary_cross_entropy_with_logits(forg_logits, yforg)

                loss = (1 - args.lamb) * class_loss
                loss += args.lamb * forg_loss
            else: 
                # Eq (4) in https://arxiv.org/abs/1705.05787
                logits = classification_layer(features[yforg == 0])
                class_loss = F.cross_entropy(logits, y[yforg == 0])

                forg_logits = forg_layer(features).squeeze()
                forg_loss = F.binary_cross_entropy_with_logits(forg_logits, yforg)

                loss = (1 - args.lamb) * class_loss
                loss += args.lamb * forg_loss
        else:
            # Eq (1) in https://arxiv.org/abs/1705.05787
            logits = classification_layer(features)
            loss = class_loss = F.cross_entropy(logits, y)

        # Back propagation
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_value_(optimizer.param_groups[0]['params'], 10)

        # Update weights
        optimizer.step()

        # Logging
        if callback and step % 100 == 0:
            iteration = epoch + (step / n_steps)
            callback.scalar('class_loss', iteration, class_loss.detach())

            pred = logits.argmax(1)
            if args.loss_type == 'L1': acc = y.eq(pred).float().mean()
            else: acc = y[yforg == 0].eq(pred[yforg == 0]).float().mean()
            callback.scalar('train_acc', epoch + (step / n_steps), acc.detach())
            if args.forg:
                forg_pred = forg_logits > 0
                forg_acc = yforg.long().eq(forg_pred.long()).float().mean()
                callback.scalar('forg_loss', iteration, forg_loss.detach())
                callback.scalar('forg_acc', iteration, forg_acc.detach())

        step += 1
    lr_scheduler.step()


def test(val_loader: torch.utils.data.DataLoader,
         base_model: torch.nn.Module,
         classification_layer: torch.nn.Module,
         device: torch.device,
         is_forg: bool,
         forg_layer: Optional[torch.nn.Module] = None) -> Tuple[float, float, float, float]:
    """ Test the model in a validation/test set
    Parameters
    ----------
    val_loader: torch.utils.data.DataLoader
        Iterable that loads the validation set (x, y) tuples
    base_model: torch.nn.Module
        The model architecture that "extract features" from signatures
    classification_layer: torch.nn.Module
        The classification layer (from features to predictions of which user
        wrote the signature)
    device: torch.device
        The device (CPU or GPU) to use for training
    is_forg: bool
        Whether or not forgeries are being used for training/testing
    forg_layer: torch.nn.Module
            The forgery prediction layer (from features to predictions of whether
            the signature is a forgery). Only used in is_forg = True
    Returns
    -------
    float, float
        The valication accuracy and validation loss
    """
    val_losses = []
    val_accs = []

    val_forg_losses = []
    val_forg_accs = []
    for batch in val_loader:
        x, y, yforg = batch[0], batch[1], batch[2]
        x = torch.tensor(x, dtype=torch.float).to(device)
        y = torch.tensor(y, dtype=torch.long).to(device)
        yforg = torch.tensor(yforg, dtype=torch.float).to(device)

        with torch.no_grad():
            features = base_model(x)
            logits = classification_layer(features[yforg == 0])

            loss = F.cross_entropy(logits, y[yforg == 0])
            pred = logits.argmax(1)
            acc = y[yforg == 0].eq(pred).float().mean()

            if is_forg:
                forg_logits = forg_layer(features).squeeze()
                forg_loss = F.binary_cross_entropy_with_logits(forg_logits, yforg)
                forg_pred = forg_logits > 0
                forg_acc = yforg.long().eq(forg_pred.long()).float().mean()

                val_forg_losses.append(forg_loss.item())
                val_forg_accs.append(forg_acc.item())

        val_losses.append(loss.item())
        val_accs.append(acc.item())
    val_loss = np.mean(val_losses)
    val_acc = np.mean(val_accs)
    val_forg_loss = np.mean(val_forg_losses) if len(val_forg_losses) > 0 else np.nan
    val_forg_acc= np.mean(val_forg_accs) if len(val_forg_accs) > 0 else np.nan

    if is_forg: return val_acc.item(), val_loss.item(), val_forg_acc.item(), val_forg_loss.item()
    else : return val_acc.item(), val_loss.item(), val_forg_acc, val_forg_loss

def setup_data_loaders(data, batch_size, input_size):
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(data[1])
    data = TensorDataset(torch.from_numpy(data[0]), torch.from_numpy(y), torch.from_numpy(data[2]))
    train_size = int(0.9 * len(data))
    sizes = (train_size, len(data) - train_size)
    train_set, test_set = random_split(data, sizes)
    train_transforms = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomCrop(input_size),
        transforms.ToTensor(),
    ])
    train_set = TransformDataset(train_set, train_transforms)
    val_transforms = transforms.Compose([
        transforms.ToPILImage(),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
    ])
    test_set = TransformDataset(test_set, val_transforms)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(test_set, batch_size=batch_size)
    return train_loader, val_loader

def main(args):
    # Setup logging
    logdir = pathlib.Path(args.logdir)
    if not logdir.exists():
        logdir.mkdir()

    if args.visdomport is not None:
        logger = VisdomLogger(port=args.visdomport)
    else:
        logger = None

    device = torch.device('cuda', args.gpu_idx) if torch.cuda.is_available() else torch.device('cpu')
    print('Using device: {}'.format(device))

    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)

    print('Loading Data')

    x, y, yforg, usermapping, filenames = util.load_dataset(args.dataset_path)
    data = util.get_subset((x, y, yforg), subset=range(*args.users))
    if not args.forg:
        data = util.remove_forgeries(data, forg_idx=2)

    train_loader, val_loader = setup_data_loaders(data, args.batch_size, args.input_size)

    print('Initializing Model')

    n_classes = len(np.unique(data[1]))

    base_model = models.available_models[args.model]().to(device)
    classification_layer = nn.Linear(base_model.feature_space_size, n_classes).to(device)
    if args.forg:
        forg_layer = nn.Linear(base_model.feature_space_size, 1).to(device)
    else:
        forg_layer = nn.Module()  # Stub module with no parameters

    if args.test:
        print('Testing')
        base_model_params, classification_params, forg_params = torch.load(args.checkpoint)
        base_model.load_state_dict(base_model_params)

        classification_layer.load_state_dict(classification_params)
        if args.forg:
            forg_layer.load_state_dict(forg_params)
        val_acc, val_loss, val_forg_acc, val_forg_loss = test(val_loader, base_model, classification_layer,
                                                              device, args.forg, forg_layer)
        if args.forg:
            print('Val loss: {:.4f}, Val acc: {:.2f}%,'
                  'Val forg loss: {:.4f}, Val forg acc: {:.2f}%'.format(val_loss,
                                                                        val_acc * 100,
                                                                        val_forg_loss,
                                                                        val_forg_acc * 100))
        else:
            print('Val loss: {:.4f}, Val acc: {:.2f}%'.format(val_loss, val_acc * 100))

    else:
        print('Training')
        train(base_model, classification_layer, forg_layer, train_loader, val_loader,
              device, logger, args, logdir)
    return base_model

In [5]:
def feature_extraction(args, model):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Using device: {}'.format(device))
    npz_dir = args.extraction_path
    with np.load(npz_dir, allow_pickle=True) as data:
        x, y, yforg, user_mapping, filenames = data['x'], data['y'], data['yforg'], data['user_mapping'], data['filenames']
    xc = np.zeros((x.shape[0],1,150,220), dtype=np.uint8)
    for idx in tqdm.tqdm(np.arange(x.shape[0]),ascii=True):
        xc[idx,0,:,:] = crop_center(x[idx,0,:,:], (150,220))
    del x 
    visual_f=np.zeros((xc.shape[0],2048))
    with torch.no_grad():
        for idx,gpu_slice in enumerate(np.split(xc,250)):
            visual_f[idx*len(gpu_slice):(idx+1)*len(gpu_slice),:] = base_model(torch.tensor(gpu_slice).float().div(255).to(device)).cpu()
    visual_f = pd.DataFrame(visual_f)
    if npz_dir.endswith('gpds4k.npz'):
        sig_ids = np.array([int(fname.lower().split('-')[-1].split('.')[0]) for fname in np.squeeze(filenames)])
        user_ids = np.array([int(fname.lower().split('-')[1]) for fname in np.squeeze(filenames)])
    elif npz_dir.endswith('mcyt.npz'):
        user_ids = np.array([int(fname.lower().split('_')[-1].split('v')[0].split('f')[0]) for fname in np.squeeze(filenames)])
        sig_ids = np.array([int(fname.lower().split('_')[-1].split('v')[-1].split('f')[-1].split('.')[0])+1 for fname in np.squeeze(filenames)])
    filenames_ex = np.expand_dims(filenames,axis=1)
    yforg_ex = np.expand_dims(yforg,axis=1)
    sig_ids_ex = np.expand_dims(sig_ids,axis=1)
    user_ids_ex = np.expand_dims(user_ids,axis=1)    
    data_f=pd.DataFrame(np.concatenate((filenames_ex, user_ids_ex, sig_ids_ex, yforg_ex),axis=1), columns=['path', 'user_id', 'sig_id', 'fakeness'])
    return visual_f, data_f

In [6]:
# Define training params here.
class argstore:
    def __init__(self):
        self.dataset_path = 'C:\\Users\\Mert\\Documents\\GitHub\\sigver_v2\\npz\\gpds4k.npz'
        self.input_size = (150,220)
        self.users = (1201,4000)
        self.model = 'signet'
        self.batch_size = 32
        self.lr = 1e-3
        self.lr_decay = 0.1
        self.lr_decay_times = 3.0
        self.momentum = 0.9
        self.weight_decay = 1e-4
        self.epochs = 60
        '''
        self.checkpoint = []
        '''
        self.test = False
        self.seed = 42
        self.forg = False
        '''
        self.lamb = 0
        '''
        self.loss_type = 'L2'
        self.gpu_idx = 0
        self.logdir = 'default_logdir'
        self.visdomport = None
        self.extraction_path = 'C:\\Users\\Mert\\Documents\\GitHub\\sigver_v2\\npz\\gpds4k.npz'

In [7]:
args = argstore()
base_model = main(args)

Using device: cuda:0
Loading Data
Initializing Model
Training




Epoch 0. Val loss: 6.5265, Val acc: 2.02%
Epoch 1. Val loss: 5.6090, Val acc: 12.57%
Epoch 2. Val loss: 4.8099, Val acc: 24.98%
Epoch 3. Val loss: 4.0737, Val acc: 38.09%
Epoch 4. Val loss: 3.3784, Val acc: 50.57%
Epoch 5. Val loss: 2.7551, Val acc: 62.17%
Epoch 6. Val loss: 2.2081, Val acc: 71.90%
Epoch 7. Val loss: 1.7660, Val acc: 78.22%
Epoch 8. Val loss: 1.3649, Val acc: 83.85%
Epoch 9. Val loss: 1.0909, Val acc: 87.53%
Epoch 10. Val loss: 0.8700, Val acc: 90.73%
Epoch 11. Val loss: 0.7093, Val acc: 92.08%
Epoch 12. Val loss: 0.5961, Val acc: 93.96%
Epoch 13. Val loss: 0.5001, Val acc: 94.69%
Epoch 14. Val loss: 0.4347, Val acc: 95.34%
Epoch 15. Val loss: 0.3825, Val acc: 95.94%
Epoch 16. Val loss: 0.3450, Val acc: 96.28%
Epoch 17. Val loss: 0.3055, Val acc: 96.67%
Epoch 18. Val loss: 0.2770, Val acc: 97.01%
Epoch 19. Val loss: 0.2583, Val acc: 97.29%
Epoch 20. Val loss: 0.2314, Val acc: 97.78%
Epoch 21. Val loss: 0.2145, Val acc: 97.78%
Epoch 22. Val loss: 0.2041, Val acc: 97.88%

In [8]:
visual_f, data_f = feature_extraction(args, base_model)

Using device: cuda


100%|#######################################################################| 216000/216000 [00:03<00:00, 55249.36it/s]


In [43]:
# Preprocess to convert strings to integers.
data_f['user_id']=data_f['user_id'].astype('int32')
data_f['sig_id']=data_f['sig_id'].astype('int8')
data_f['fakeness']=data_f['fakeness'].astype('int8')

In [44]:
USER_KERNEL='rbf'
TRAIN_ITERATIONS = 1

In [45]:
dev_exp_ratio = 0.30
sorted_id_list = np.sort(data_f['user_id'].unique())
dev_exp_splitter=int(len(sorted_id_list)*dev_exp_ratio)
dev_val_user_ids = sorted_id_list[:dev_exp_splitter]
exp_user_ids = sorted_id_list[dev_exp_splitter:]

In [18]:
fakes_preds = []
gens_preds = []

for fold in np.arange(0,TRAIN_ITERATIONS):
    # assert len(dev_val_user_ids)==581
    np.random.shuffle(dev_val_user_ids)
    dev_user_ids = dev_val_user_ids[0:1000]
    validation_user_ids = dev_val_user_ids[1000:len(dev_val_user_ids)]
    train_idx, test_idx = train_test_split(np.arange(1,25), train_size=0.5, test_size=0.5)

    dev_df = data_f.loc[data_f['user_id'].isin(dev_user_ids)]
    dev_vf = visual_f.loc[dev_df.index]
    val_df = data_f.loc[data_f['user_id'].isin(validation_user_ids)]
    val_vf = visual_f.loc[val_df.index]

    dev_df_gen = dev_df.loc[dev_df['fakeness']==0]
    dev_df_fake = dev_df.loc[dev_df['fakeness']==1]
    dev_df_gen_12 = dev_df_gen.loc[dev_df_gen['sig_id'].isin(train_idx)]
    dev_df_valid_12 = dev_df_gen.loc[dev_df_gen['sig_id'].isin(test_idx)]
    
    train_idx, test_idx = train_test_split(np.arange(1,25), train_size=0.5)
    val_df_gen = val_df.loc[val_df['fakeness']==0]
    val_df_fake = val_df.loc[val_df['fakeness']==1]
    val_df_gen_12 = val_df_gen.loc[val_df_gen['sig_id'].isin(train_idx)]
    val_df_valid_gen_12 = val_df_gen.loc[val_df_gen['sig_id'].isin(test_idx)]

    for user_id in tqdm.tqdm(validation_user_ids, ascii=True):
        clf = SVC(C=1,gamma='scale',class_weight='balanced', probability=False, kernel=USER_KERNEL)
        y_train = (pd.concat([val_df_gen_12.loc[val_df_gen_12['user_id']==user_id],dev_df_gen.loc[dev_df_gen['user_id']!=user_id]]))['user_id']==user_id
        X_train = visual_f.loc[y_train.index]  
        clf.fit(X_train, y_train)
        y_valid_fakes = val_df_fake.loc[(val_df_fake['user_id']==user_id)]
        X_valid_f = visual_f.loc[y_valid_fakes.index]
        fakes_preds.append(clf.decision_function(X_valid_f))
        y_valid_gens = val_df_valid_gen_12.loc[val_df_valid_gen_12['user_id']==user_id]
        X_valid_g = visual_f.loc[y_valid_gens.index]
        gens_preds.append(clf.decision_function(X_valid_g))

100%|################################################################################| 200/200 [17:58<00:00,  6.11s/it]


In [19]:
flat_fakes_preds = np.expand_dims(np.array([item for sublist in fakes_preds for item in sublist]),axis=1)
flat_gens_preds = np.expand_dims(np.array([item for sublist in gens_preds for item in sublist]),axis=1)
all_preds = np.vstack((flat_fakes_preds,flat_gens_preds))
all_labels = np.vstack((np.zeros((flat_fakes_preds.shape[0],1)),np.ones((flat_gens_preds.shape[0],1))))

fpr,tpr,threshold = roc_curve(all_labels,all_preds)
fnr = 1 - tpr
EER = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
eer_th = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
print('EER_glob : ', EER*100,'\nEER_Threshold_glob : ', eer_th)
glob_th = eer_th

EER_glob :  7.449999999999999 
EER_Threshold_glob :  -0.07249462839458398


In [20]:
assert len(fakes_preds)==len(gens_preds)
EER_accum=0
for idx,val in enumerate(fakes_preds):
    user_fakes_preds = np.expand_dims(np.array(fakes_preds[idx]),axis=1)
    user_gens_preds = np.expand_dims(np.array(gens_preds[idx]),axis=1)
    all_user_preds = np.vstack((user_fakes_preds,user_gens_preds))
    all_user_labels = np.vstack((np.zeros((user_fakes_preds.shape[0],1)),np.ones((user_gens_preds.shape[0],1)))) 
    fpr,tpr,threshold = roc_curve(all_user_labels,all_user_preds)
    fnr = 1 - tpr
    EER = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    EER_accum += EER
    
print('EER_user : ', (EER_accum*100)/len(fakes_preds)) 

EER_user :  3.6166666666666667


In [21]:
print(glob_th)

-0.07249462839458398
