In [7]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
from sklearn.metrics import make_scorer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import OneClassSVM

rcParams['figure.figsize'] = 14, 8
from sklearn.metrics import (confusion_matrix, precision_recall_curve, classification_report, recall_score)

#Preprocessing

In [8]:
def process_and_split_svm_data(sample):
    dataframe = pd.read_csv("../input/creditcard.csv")
    dataframe.drop('Time', axis=1, inplace=True)
    scaler = MinMaxScaler()
    df_scaled = pd.DataFrame(scaler.fit_transform(dataframe), columns=dataframe.columns)
    if sample:
        df_scaled = df_scaled.sample(frac=0.2, random_state=5)
    target = df_scaled["Class"]
    features = df_scaled.drop("Class", axis=1)
    X_train_, X_test_, y_train_, y_test_ = train_test_split(features, target, test_size=0.33, random_state=42)
    X_train_good = X_train_[y_train_ == 0]
    y_train_good = y_train_[y_train_ == 0]
    return X_train_good, y_train_good, X_test_, y_test_

In [9]:
X_train, y_train, X_test, y_test = process_and_split_svm_data(True)

#Building the model

In [11]:
def one_class_get_best_params(X_train_good, y_train_good):
    params_grid = {'kernel': ['linear', 'rbf'],
              'gamma': ['scale', 'auto'],
              'nu': [0.01, 0.1, 0.5]}

    grid_cv = GridSearchCV(OneClassSVM(),
                           params_grid,
                           scoring=make_scorer(recall_score, average='micro'),
                           refit=True,
                           cv=2,
                           return_train_score=True)

    grid_cv.fit(X_train_good, y_train_good)
    return grid_cv.best_params_,grid_cv.best_estimator_

In [12]:
params,estimator = one_class_get_best_params(X_train,y_train)


KeyboardInterrupt



In [None]:
params

In [None]:
estimator

In [None]:
#Prediction and Result visualization

In [10]:
def confusion_heatmap(y_test_param, prediction_param):
    cf_matrix = confusion_matrix(y_test_param, prediction_param)
    group_names = ['True Neg', 'False Pos', 'False Neg', 'True Pos']
    group_counts = ["{0:0.0f}".format(value) for value in
                    cf_matrix.flatten()]
    group_percentages = ["{0:.2%}".format(value) for value in
                         cf_matrix.flatten() / np.sum(cf_matrix)]
    labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
              zip(group_names, group_counts, group_percentages)]
    labels = np.asarray(labels).reshape(2, 2)
    categories = ['Normal', 'Fraud']
    sns.heatmap(cf_matrix, annot=labels, fmt='', cmap='Blues', xticklabels=categories, yticklabels=categories)

In [None]:
prediction2 = estimator.predict(X_test)
prediction2 = ((prediction2 * -1) + 1) / 2
confusion_heatmap(y_test,prediction2)

In [None]:
X_train_full, y_train_full, X_test_full, y_test_full = process_and_split_svm_data(False)

In [None]:
est = OneClassSVM(**params).fit(X_train_full)
pred_new = est.predict(X_test_full)
pred2_new = ((pred_new * -1) + 1) / 2
print(classification_report(y_test_full, pred2_new))

In [None]:
confusion_heatmap(y_test_full,pred2_new)

In [None]:
def visualize_curve(y_test_param,y_pred_param):
    precision, recall, th = precision_recall_curve(y_test_param,y_pred_param)
    plt.plot(recall, precision, 'b', label='Precision-Recall curve')
    plt.title('Precision Recall Curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.show()

In [None]:
visualize_curve(y_test_full, pred2_new)