In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import quantile_transform
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score ,  accuracy_score , precision_score , recall_score, confusion_matrix
from catboost import CatBoostClassifier
from random import randrange
import numpy as np

In [2]:
class CNN_layer(nn.Module):
    def __init__(self,in_channels = 1 , out_channels=128, dropout=0.4,padding='same',kernel_size=7):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,padding=padding)
        self.conv2 = nn.Conv1d(in_channels=out_channels,out_channels=out_channels,kernel_size=kernel_size,padding=padding)
        self.dropout  = nn.Dropout(dropout)
        self.norm =  nn.BatchNorm1d(out_channels)
    def forward(self,x):
        x = F.elu(self.conv1(x))
        x= F.elu(self.conv2(x))
        x=self.dropout(x)
        x= self.norm(x)
        return x
    
class CNN_net(nn.Module):
    def __init__(self,dropout=0.4):
        super().__init__()
        self.layer1 = CNN_layer(in_channels=1,out_channels=128,dropout=dropout,kernel_size=7)
        self.layer2 = CNN_layer(in_channels=128,out_channels=64,dropout=dropout,kernel_size=5)
        self.flatten = nn.Flatten()
        self.dense1 = nn.Linear(31*64,120)
        self.dropout1 = nn.Dropout(dropout)
        self.dense2 = nn.Linear(120,60)
        self.dropout2 = nn.Dropout(dropout)
        self.dense3 = nn.Linear(60,30)
    def forward(self,x):
        x= self.layer1(x)
        x=self.layer2(x)
        x=self.flatten(x)
        x=F.elu(self.dense1(x))
        x=self.dropout1(x)
        x=F.elu(self.dense2(x))
        x=self.dropout2(x)
        x=F.elu(self.dense3(x))
        return x


In [3]:
def data_generation(dataset: pd.DataFrame):
    if not isinstance(dataset, pd.DataFrame):
        raise TypeError("You can olnly use Pandas Dataframe")
    sd_factor = [0.01,0.02,0.03,0.04,5]
    header = []
    sd=[]
    for i in dataset.columns:
        header.append(i)
        if i =="subject":
            continue
        sd.append(dataset[i].std())

    generated_data = []

    for i in range(len(dataset)):
        generated_data.append(dataset.loc[i].tolist())
        for j in range(4):
            row=[]
            counter = 0
            for k in dataset.columns:
                if k =="subject":
                    row.append(dataset.loc[i,k])
                    continue
                row.append(dataset.loc[i,k]+(sd[counter]*sd_factor[j]))
                counter=counter+1
            generated_data.append(row)
    generated_data = pd.DataFrame(generated_data)
    generated_data.columns = header
    return generated_data

def generate_batch(x,batchsize):
    batches =[]
    for i in range(0,len(x),batchsize):
        batch = x[i:i+batchsize]
        tensor = torch.tensor(batch,dtype=torch.float)
        tensor = tensor.unsqueeze(1)
        batches.append(tensor)
    return batches
            


In [4]:
data = pd.read_csv("DSL-StrongPasswordData.csv")
genuine  = data[data["subject"]=="s002"]
imposter = data[data["subject"]!="s002"] #users03 is imposter 
imposter = imposter.groupby("subject").head(8)
imposter = imposter.reset_index(drop=True)
genuine = genuine.reset_index(drop=True)

In [None]:
genuine = genuine.drop(["sessionIndex","rep"],axis=1)
imposter = imposter.drop(["sessionIndex","rep"],axis=1)
genuine = data_generation(genuine)
imposter = data_generation(imposter)
data2 = pd.concat([genuine,imposter],axis=0)
X= data2.drop("subject",axis=1)
Y = data2["subject"]
scaler = StandardScaler()
X = scaler.fit_transform(X)
X=quantile_transform(X,n_quantiles=10, random_state=0, copy=True)
Y = (Y != "s002").astype(int)

X_train, X_test , Y_train , Y_test = train_test_split(X,Y,test_size=0.3,random_state=0,stratify=Y)
X_train = generate_batch(X_train,8000)
X_test = generate_batch(X_test,8000)
X_train = X_train[0]
X_test = X_test[0]
print(X_test.shape)

torch.Size([240, 1, 31])


In [6]:
cnn  = CNN_net(dropout=0.4)
cnn.eval()
with torch.no_grad():
    test_result=cnn(X_test)
    train_result=cnn(X_train)
test_result = test_result.detach().numpy()
train_result = train_result.detach().numpy()
catboost = CatBoostClassifier(verbose=0, n_estimators=100)
catboost.fit(train_result,Y_train)
predicted = catboost.predict(test_result)

f1 = f1_score(Y_test,predicted)
accuracy = accuracy_score(Y_test,predicted)
precision = precision_score(Y_test,predicted)
recall = recall_score(Y_test,predicted)
cm = confusion_matrix(Y_test,predicted)
print(cm)



[[111   9]
 [ 12 108]]


In [7]:
print(f"Accuracy : {accuracy:.4f} - Precision : {precision:.4f} - f1 : {f1:.4f} - Recall : {recall:.4f}")

Accuracy : 0.9125 - Precision : 0.9231 - f1 : 0.9114 - Recall : 0.9000
