In [1]:
import pandas as pd
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score
from keras.layers import LSTM, Dense,Input,Layer
from keras import backend as K,Model
# demonstration of calculating metrics for a neural network model using sklearn
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import os
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore", category=Warning)


In [2]:
data = pd.read_csv('creditcard.csv', na_filter=True)

data.drop(['Time'], axis=1, inplace=True)
selected_vars =['V4','V14','V10','V16','V9','V12','V11','V17','V7','V21','V18','V8','V1','V3','V6','V2','V27','V5','V26','Class']

In [3]:
df1 = data[selected_vars]
df1.shape

(284807, 20)

In [4]:
Target=df1['Class']
df1 = df1.drop(['Class'], axis = 'columns')
data.drop(['Class'], axis=1, inplace=True)

In [5]:
sampling_res=pd.DataFrame(columns=['name','fraud percent','accuracy','precision','recall','f1-score'])
sampling_c=0

In [6]:
from sklearn.preprocessing import StandardScaler
## Standardizing the data
df1= StandardScaler().fit_transform(df1)
data= StandardScaler().fit_transform(data)

In [7]:
def create_model(name,k,X_train,y_train,X_test,y_test,d,epochs,batch_size,sampling_c): 
    # X_train et X_test sont des dataframe qui contient les features
    train_LSTM_X=X_train
    test_LSTM_X=X_test

    ## Reshape input to be 3D [samples, timesteps, features] (format requis par LSTM)
    train_LSTM_X = train_LSTM_X.reshape((train_LSTM_X.shape[0], 1, train_LSTM_X.shape[1]))
    test_LSTM_X = test_LSTM_X.reshape((test_LSTM_X.shape[0], 1, test_LSTM_X.shape[1]))

    ## Recuperation des labels
    train_LSTM_y=y_train
    test_LSTM_y=y_test
    ###################
    inputs=Input((1,d))
    x1=LSTM(50)(inputs)
    outputs=Dense(1,activation='sigmoid')(x1)
    model=Model(inputs,outputs)
    ###################
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['Precision','Recall'])
    ########
    model.fit(train_LSTM_X, train_LSTM_y,epochs=epochs,batch_size=batch_size, verbose=0)
    ########
    # predict probabilities for test set
    yhat_probs = model.predict(test_LSTM_X, verbose=0)
    # reduce to 1d array
    yhat_probs = yhat_probs[:, 0]> 0.5
    #####
    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(test_LSTM_y, yhat_probs)
    # precision tp / (tp + fp)
    precision = precision_score(test_LSTM_y, yhat_probs)
    # recall: tp / (tp + fn)
    recall = recall_score(test_LSTM_y, yhat_probs)
    f1 = f1_score(test_LSTM_y, yhat_probs)
    sampling_res.loc[sampling_c]=[name,k,accuracy,precision,recall,f1]
    sampling_c+=1 
    return sampling_c

In [8]:
X_train,X_test,y_train,y_test = train_test_split(data, Target, test_size=0.3, shuffle=False,random_state=0)
sampling_c=create_model("original",1,X_train,y_train,X_test,y_test,X_train.shape[1],10,500,sampling_c)

In [9]:
X_train,X_test,y_train,y_test = train_test_split(df1, Target , test_size=0.3, shuffle=False,random_state=0)
sampling_c=create_model("feature selected",1,X_train,y_train,X_test,y_test,X_train.shape[1],5,500,sampling_c)

In [10]:
sampling_res

Unnamed: 0,name,fraud percent,accuracy,precision,recall,f1-score
0,original,1,0.999625,0.975,0.722222,0.829787
1,feature selected,1,0.999625,0.94186,0.75,0.835052


# outlier

## LocalOutlierFactor

In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import LocalOutlierFactor

In [12]:
df1=pd.DataFrame(df1)
Target=pd.Series(Target)

In [13]:
df1_fraud=df1[Target==1]
df1_normal=df1[Target==0]
df1_fraud.shape,df1_normal.shape

((492, 19), (284315, 19))

In [14]:
clf = LocalOutlierFactor()
res = clf.fit_predict(df1_normal)
np.unique(res)

array([-1,  1])

In [15]:
res[res==1] =0
df1_normal['Class']=res
df1_fraud['Class']=1

In [16]:
frames = [df1_normal, df1_fraud]
result = pd.concat(frames)

In [17]:
lof_res=result[result['Class']!=-1]
lof_out=result[result['Class']==-1]
result[result['Class']==-1].shape

(19155, 20)

## IsolationForest

In [18]:
from sklearn.ensemble import IsolationForest

In [19]:
df1_fraud=df1[Target==1]
df1_normal=df1[Target==0]
df1_fraud.shape,df1_normal.shape

((492, 19), (284315, 19))

In [20]:
clf = IsolationForest(random_state=0)
res = clf.fit_predict(df1_normal)
np.unique(res)

array([-1,  1])

In [21]:
res[res==1] =0
df1_normal['Class']=res
df1_fraud['Class']=1

In [22]:
frames = [df1_normal, df1_fraud]
result = pd.concat(frames)

In [23]:
isf_res=result[result['Class']!=-1]
isf_out=result[result['Class']==-1]
result[result['Class']==-1].shape

(12259, 20)

## EllipticEnvelope

In [24]:
from sklearn.covariance import EllipticEnvelope

In [25]:
df1_fraud=df1[Target==1]
df1_normal=df1[Target==0]
df1_fraud.shape,df1_normal.shape

((492, 19), (284315, 19))

In [26]:
clf = EllipticEnvelope(random_state=0)
res = clf.fit_predict(df1_normal)
np.unique(res)

array([-1,  1])

In [27]:
res[res==1] =0
df1_normal['Class']=res
df1_fraud['Class']=1

In [28]:
frames = [df1_normal, df1_fraud]
result = pd.concat(frames)

In [29]:
ee_res=result[result['Class']!=-1]
ee_out=result[result['Class']==-1]
result[result['Class']==-1].shape

(28431, 20)

## OneClassSVM

In [30]:
from sklearn.svm import OneClassSVM

In [31]:
df1_fraud=df1[Target==1]
df1_normal=df1[Target==0]
df1_fraud.shape,df1_normal.shape

((492, 19), (284315, 19))

In [32]:
clf = OneClassSVM()
res = clf.fit_predict(df1_normal)
np.unique(res)

array([-1,  1], dtype=int64)

In [33]:
res[res==1] = 0
df1_normal['Class']=res
df1_fraud['Class']=1

In [34]:
frames = [df1_normal, df1_fraud]
result = pd.concat(frames)

In [35]:
osvm_res=result[result['Class']!=-1]
osvm_out=result[result['Class']==-1]
result[result['Class']==-1].shape

(142159, 20)

In [36]:
lof_t=pd.DataFrame(lof_res['Class'])
lof_res.drop(['Class'],axis=1,inplace=True)
lof_ot=pd.DataFrame(lof_out['Class'])
lof_out.drop(['Class'],axis=1,inplace=True)

isf_t=pd.DataFrame(isf_res['Class'])
isf_res.drop(['Class'],axis=1,inplace=True)
isf_ot=pd.DataFrame(isf_out['Class'])
isf_out.drop(['Class'],axis=1,inplace=True)

ee_t=pd.DataFrame(ee_res['Class'])
ee_res.drop(['Class'],axis=1,inplace=True)
ee_ot=pd.DataFrame(ee_out['Class'])
ee_out.drop(['Class'],axis=1,inplace=True)

osvm_t=pd.DataFrame(osvm_res['Class'])
osvm_res.drop(['Class'],axis=1,inplace=True)
osvm_ot=pd.DataFrame(osvm_out['Class'])
osvm_out.drop(['Class'],axis=1,inplace=True)

# Balancing

In [37]:
from imblearn.under_sampling import RandomUnderSampler 
from imblearn.under_sampling import AllKNN 
from imblearn.under_sampling import NearMiss 
from imblearn.under_sampling import TomekLinks


In [None]:
ros = RandomUnderSampler(random_state=0,sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
sampling_c=create_model("RandomUnderSampler",5,X_resampled, y_resampled ,X_test,y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
ros = RandomUnderSampler(random_state=0,sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
sampling_c=create_model("RandomUnderSampler",10,X_resampled, y_resampled ,X_test,y_test,X_train.shape[1],100,500,sampling_c)

In [None]:
ros = RandomUnderSampler(random_state=0,sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
sampling_c=create_model("RandomUnderSampler",15,X_resampled, y_resampled ,X_test,y_test,X_train.shape[1],100,500,sampling_c)

In [None]:
ros = TomekLinks()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
sampling_c=create_model("TomekLinks","-",X_resampled, y_resampled ,X_test,y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
ros = AllKNN()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
sampling_c=create_model("AllKNN","-",X_resampled, y_resampled ,X_test,y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
ros = NearMiss(sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
sampling_c=create_model("NearMiss",5,X_resampled, y_resampled ,X_test,y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
ros = NearMiss( sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
sampling_c=create_model("NearMiss",10,X_resampled, y_resampled ,X_test,y_test,X_train.shape[1],20,500,sampling_c)

In [None]:
ros = NearMiss( sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
sampling_c=create_model("NearMiss",15,X_resampled, y_resampled ,X_test,y_test,X_train.shape[1],30,500,sampling_c)

RandomUnderSampler

In [None]:
X_train, X_test, y_train, y_test = train_test_split(isf_res, isf_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, isf_out])
y_resampled=pd.concat([y_resampled, isf_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + isf",5,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],35,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(isf_res, isf_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + isf",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(isf_res, isf_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + isf",15,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(lof_res, lof_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + lof",5,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(lof_res, lof_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + lof",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(lof_res, lof_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + lof",15,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ee_res, ee_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, ee_out])
y_resampled=pd.concat([y_resampled, ee_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + EllipticEnvelope",5,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ee_res, ee_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, ee_out])
y_resampled=pd.concat([y_resampled, ee_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + EllipticEnvelope",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ee_res, ee_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, ee_out])
y_resampled=pd.concat([y_resampled, ee_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + EllipticEnvelope",15,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(osvm_res, osvm_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, osvm_out])
y_resampled=pd.concat([y_resampled, osvm_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + ocsvm",5,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(osvm_res, osvm_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, osvm_out])
y_resampled=pd.concat([y_resampled, osvm_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + ocsvm",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(osvm_res, osvm_t, test_size = 0.3,random_state=0)
ros = RandomUnderSampler(random_state=0,sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, osvm_out])
y_resampled=pd.concat([y_resampled, osvm_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("RandomUnderSampler + ocsvm",15,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

NearMiss

In [None]:
X_train, X_test, y_train, y_test = train_test_split(isf_res, isf_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, isf_out])
y_resampled=pd.concat([y_resampled, isf_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + isf",5,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],50,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(isf_res, isf_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + isf",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],50,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(isf_res, isf_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + isf",15,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],50,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(lof_res, lof_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + lof",5,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(lof_res, lof_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + lof",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(lof_res, lof_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + lof",15,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ee_res, ee_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, ee_out])
y_resampled=pd.concat([y_resampled, ee_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + EllipticEnvelope",5,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],50,5000,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ee_res, ee_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=10/90)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, ee_out])
y_resampled=pd.concat([y_resampled, ee_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + EllipticEnvelope",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],20,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ee_res, ee_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, ee_out])
y_resampled=pd.concat([y_resampled, ee_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + EllipticEnvelope",15,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],20,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(osvm_res, osvm_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=5/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, osvm_out])
y_resampled=pd.concat([y_resampled, osvm_ot])
y_resampled[y_resampled==-1]=0
sampling_c=create_model("NearMiss + ocsvm",5,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(osvm_res, osvm_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=10/95)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, osvm_out])
y_resampled=pd.concat([y_resampled, osvm_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + ocsvm",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(osvm_res, osvm_t, test_size = 0.3,random_state=0)
ros = NearMiss(sampling_strategy=15/85)
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, osvm_out])
y_resampled=pd.concat([y_resampled, osvm_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("NearMiss + ocsvm",10,X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],5,500,sampling_c)

In [None]:
sampling_res.to_csv('LSTM-sampling.csv',index=False)
sampling_res.to_excel('LSTM-sampling.xlsx',index=False)

TomekLinks

In [None]:
X_train, X_test, y_train, y_test = train_test_split(isf_res, isf_t, test_size = 0.3,random_state=0)
ros = TomekLinks()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, isf_out])
y_resampled=pd.concat([y_resampled, isf_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("TomekLinks + isf","-",X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(lof_res, lof_t, test_size = 0.3,random_state=0)
ros = TomekLinks()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("TomekLinks + lof","-",X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ee_res, ee_t, test_size = 0.3,random_state=0)
ros = TomekLinks()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, ee_out])
y_resampled=pd.concat([y_resampled, ee_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("TomekLinks + EllipticEnvelope","-",X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(osvm_res, osvm_t, test_size = 0.3,random_state=0)
ros = TomekLinks()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, osvm_out])
y_resampled=pd.concat([y_resampled, osvm_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("TomekLinks + ocsvm","-",X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

AllKNN

In [None]:
X_train, X_test, y_train, y_test = train_test_split(isf_res, isf_t, test_size = 0.3,random_state=0)
ros = AllKNN()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, isf_out])
y_resampled=pd.concat([y_resampled, isf_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("AllKNN + isf","-",X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(lof_res, lof_t, test_size = 0.3,random_state=0)
ros = AllKNN()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, lof_out])
y_resampled=pd.concat([y_resampled, lof_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("AllKNN + lof","-",X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ee_res, ee_t, test_size = 0.3,random_state=0)
ros = AllKNN()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, ee_out])
y_resampled=pd.concat([y_resampled, ee_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("AllKNN + EllipticEnvelope","-",X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(osvm_res, osvm_t, test_size = 0.3,random_state=0)
ros = AllKNN()
X_resampled, y_resampled = ros.fit_resample(X_train,y_train)
X_resampled=pd.concat([X_resampled, osvm_out])
y_resampled=pd.concat([y_resampled, osvm_ot])
y_resampled[y_resampled==-1]=0

sampling_c=create_model("AllKNN + ocsvm","-",X_resampled.to_numpy(), y_resampled,X_test.to_numpy(),y_test,X_train.shape[1],10,500,sampling_c)

In [None]:
sampling_res.to_csv('LSTM-sampling.csv',index=False)
sampling_res.to_excel('LSTM-sampling.xlsx',index=False)