In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib.colors import ListedColormap
from statistics import mean
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from sklearn.metrics import average_precision_score

def train_test_equal_split(df):
    """Takes dataframe as an input.
        returns X_train,X_test,y_train,y_test as dataframes.
        The ratio of fraud obs to geniune obs are equal in both 
        train and test sets""" 
    X,y=df.drop(['Class'],axis=1),df['Class']
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=0)
    for train_index, test_index in sss.split(X, y):
        X_train,X_test=X.iloc[train_index],X.iloc[test_index]
        y_train,y_test=y.iloc[train_index],y.iloc[test_index]    
    return X_train,X_test,y_train,y_test

def scale(X_train,X_test):
    """Takes train and test sets as inputs. Scale 'Amount' and 'Time'
    attributes and returns scaled train and test dataframes"""
    X_train_ind,X_test_ind=X_train.index,X_test.index
    col=X_train.columns
    X_train_subset=X_train.drop(['Amount', 'Time'],axis=1)
    X_test_subset=X_test.drop(['Amount', 'Time'],axis=1)
    sc=StandardScaler()
    std_scale = sc.fit(X_train[['Amount', 'Time']])
    X_train_time_amount_scaled=std_scale.transform(X_train[['Amount', 'Time']])
    X_test_time_amount_scaled=std_scale.transform(X_test[['Amount', 'Time']])
    scaled_X_train=np.concatenate((X_train_time_amount_scaled, X_train_subset), axis=1)
    scaled_X_test=np.concatenate((X_test_time_amount_scaled, X_test_subset), axis=1)
    scaled_X_train_df=pd.DataFrame(scaled_X_train,index=X_train_ind,columns=col)
    scaled_X_test_df=pd.DataFrame(scaled_X_test,index=X_test_ind,columns=col)
    return scaled_X_train_df,scaled_X_test_df

In [None]:
df= pd.read_csv(r'/home/jovyan/creditcard.csv.zip',engine='python')
X_train,X_test,y_train,y_test=train_test_equal_split(df)
scaled_X_train_df,scaled_X_test_df=scale(X_train,X_test)
train_unique_label, train_counts_label = np.unique(y_train, return_counts=True)
test_unique_label, test_counts_label = np.unique(y_test, return_counts=True)
print(' ')
###burada sınıfların oranına bakabilirsin
print("Ratio of Geniune and fraud observations in training set", train_counts_label/ len(y_train))
print("Ratio of Geniune and fraud observations in test set",test_counts_label/ len(y_test))

In [None]:
df.head(5)

In [None]:
kernels=['rbf','poly','sigmoid']
color=['magenta','red','green']
i=1
test_score,pre,rec,f1,conf,AUC=[],[],[],[],[],[]
parameters=[{'C': [0.01,.1,1,10],'gamma': [0.01,.1,1,10]},
            {'C': [0.01,.1,1,10],'gamma': [0.01,.1,1,10],
            {'C': [0.01,.1,1,10],'gamma': [0.01,.1,1,10]]
for kernel,param_grid,c in zip(kernels,parameters,color):
    svm=SVC(kernel=kernel,probability=True)
    grid_search = GridSearchCV(svm, param_grid, cv=2)
    grid_search.fit(scaled_X_train_df, y_train)
    print('Kernel: ',kernel)
    print(grid_search.best_params_)
    svm=grid_search.best_estimator_
    test_score.append(svm.score(scaled_X_test_df,y_test))
    y_test_hat=svm.predict(scaled_X_test_df)
    y_test_prob=grid_search.predict_proba(scaled_X_test_df)
    TN, FP, FN, TP = confusion_matrix(y_test, y_test_hat).ravel()
    L=[TN, FP, FN, TP ]
    PRE,REC,F1=TP/(TP+FP),TP/(TP+FN),(2*TP)/(2*TP+FP+FN) 
    precision, recall, thresholds = precision_recall_curve(y_test,y_test_prob[:,1])
    Auc = auc(recall, precision)
    AUC.append(Auc)
    conf.append(L)
    pre.append(PRE)
    rec.append(REC)
    f1.append(F1)
    plt.subplot(1,len(kernels),i)
    plt.plot([0, 1], [0.5, 0.5], linestyle='--')
    plt.plot(recall, precision, marker='.',color=c)
    plt.xlabel('recall')
    plt.ylabel('precision')
    plt.title(kernel)
    i=i+1
plt.tight_layout()
plt.show()


d=pd.DataFrame(index=kernels)
d['Precision']=pre
d['Recall']=rec
d['f1']=f1
d['Area under the curve']=AUC
d['confusion_matrix']=conf
