## Firstly, we need to understand what counts as an anomaly in a dataset. The anomaly can be viewed as a rare or unusual observation in the dataset. For example in the case credit card transaction dataset, the fraudulent transactions are an anomaly as the number of fraud cases is very few as compared to normal transactions in a large dataset.
## In anomaly detection, we try to identify observations that are statistically different from the rest of the observations.

# Import Libaries

In [None]:
!pip install pyod

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
from sklearn.pipeline import make_pipeline, make_union,Pipeline
import seaborn as sns
from sklearn.preprocessing import StandardScaler,RobustScaler,LabelEncoder,Normalizer,Binarizer,MinMaxScaler,MaxAbsScaler,PolynomialFeatures
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.utils import shuffle
from sklearn.decomposition import PCA



# Import Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df  = pd.read_csv('drive/MyDrive/Credit Card Fraud Detection/creditcard.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


# Split Data

In [None]:
X = df.iloc[:, df.columns != 'Class']
y = df.iloc[:, df.columns == 'Class']
len(y[y.Class ==1])


492

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1, shuffle=True)

In [None]:
number_records_fraud = len (df[df.Class==1])
fraud_indices = np.array (df[df.Class==1].index)
normal_indices = np.array (df[df.Class==0].index)
random_normal_indices = np.random.choice (normal_indices, number_records_fraud, replace = False )
under_sample_indices = np.concatenate ([fraud_indices, random_normal_indices])
under_sample_data = df.iloc[under_sample_indices,:]
X_undersample = under_sample_data.iloc [:, under_sample_data.columns != 'Class'];
y_undersample = under_sample_data.iloc [:, under_sample_data.columns == 'Class'];

X_train_under, X_test_under, y_train_under, y_test_under = train_test_split (X_undersample,y_undersample, test_size = 0.3, random_state = 0)

# Train And Test

## GMM

### Gaussian mixture models are a probabilistic model for representing normally distributed subpopulations within an overall population. Mixture models in general don't require knowing which subpopulation a data point belongs to, allowing the model to learn the subpopulations automatically. Since subpopulation assignment is not known, this constitutes a form of unsupervised learning.

In [None]:
gm = GaussianMixture(n_components=2, covariance_type = 'diag', init_params='random', max_iter=100,random_state=0)
gm.fit(X_train)

GaussianMixture(covariance_type='diag', init_params='random', max_iter=100,
                means_init=None, n_components=2, n_init=1, precisions_init=None,
                random_state=0, reg_covar=1e-06, tol=0.001, verbose=0,
                verbose_interval=10, warm_start=False, weights_init=None)

In [None]:
metrics.roc_auc_score(y_train, gm.predict(X_train))

0.8859492522283219

In [None]:
metrics.roc_auc_score(y_test, gm.predict(X_test))

0.8721067106599775

### With UnderSampling

In [None]:
under_gm = GaussianMixture(n_components=2, covariance_type = 'diag', init_params='random', max_iter=100,random_state=0)
under_gm.fit(X_train_under)

GaussianMixture(covariance_type='diag', init_params='random', max_iter=100,
                means_init=None, n_components=2, n_init=1, precisions_init=None,
                random_state=0, reg_covar=1e-06, tol=0.001, verbose=0,
                verbose_interval=10, warm_start=False, weights_init=None)

In [None]:
metrics.roc_auc_score(y_train, under_gm.predict(X_train))

0.8995384024453792

In [None]:
metrics.roc_auc_score(y_test, under_gm.predict(X_test))

0.877847818993025

In [None]:
metrics.roc_auc_score(y_test_under, under_gm.predict(X_test_under))

0.8814545952609232

## Pyod

### COPOD class for Copula Based Outlier Detector. COPOD is a parameter-free, highly interpretable outlier detection algorithm based on empirical copula models.
#### Copulas allow one to easily model and estimate the distribution of random vectors by estimating marginals and copulae separately

In [None]:
from pyod.models.copod import COPOD
clf = COPOD() #Copula-Based Outlier Detection
clf.fit(X_train)

  import pandas.util.testing as tm


COPOD(contamination=0.1, n_jobs=1)

In [None]:
metrics.roc_auc_score(y_train, clf.predict(X_train))

0.8896559390745438

In [None]:
metrics.roc_auc_score(y_test, clf.predict(X_test))

0.8791920563771819

### With UnderSampling

In [None]:
under_copod = COPOD()
under_copod.fit(X_train_under)

COPOD(contamination=0.1, n_jobs=1)

In [None]:
metrics.roc_auc_score(y_train, under_copod.predict(X_train))

0.8976467217746286

In [None]:
metrics.roc_auc_score(y_test, under_copod.predict(X_test))

0.8885142695525372

In [None]:
metrics.roc_auc_score(y_test_under, under_copod.predict(X_test_under))

0.608843537414966

## Supervised

In [None]:
from sklearn.neural_network import MLPClassifier
MLPC = make_pipeline(
    StandardScaler(),
    PCA(n_components=11),
    MLPClassifier(hidden_layer_sizes=(200,), max_iter=10000,alpha = 0.01,random_state=42)
    )
MLPC.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('pca',
                 PCA(copy=True, iterated_power='auto', n_components=11,
                     random_state=None, svd_solver='auto', tol=0.0,
                     whiten=False)),
                ('mlpclassifier',
                 MLPClassifier(activation='relu', alpha=0.01, batch_size='auto',
                               beta_1=0.9, beta_2=0.999, early_stopping=False,
                               epsilon=1e-08, hidden_layer_sizes=(200,),
                               learning_rate='constant',
                               learning_rate_init=0.001, max_fun=15000,
                               max_iter=10000, momentum=0.9,
                               n_iter_no_change=10, nesterovs_momentum=True,
                               power_t=0.5, random_state=42, shuffle=True,
                               solver='adam', tol=0.0001,

In [None]:
metrics.roc_auc_score(y_train, MLPC.predict(X_train))

0.8996069272232062

In [None]:
metrics.roc_auc_score(y_test, MLPC.predict(X_test))

0.8714937430210398

In [None]:
metrics.roc_auc_score(y_test_under, MLPC.predict(X_test_under))

0.8843537414965986

### With UnderSampling

In [None]:
under_MLPC = make_pipeline(
    StandardScaler(),
    MLPClassifier(hidden_layer_sizes=(200,), max_iter=10000,alpha = 0.01,random_state=42)
    )
under_MLPC.fit(X_train_under, y_train_under)

  y = column_or_1d(y, warn=True)


Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('mlpclassifier',
                 MLPClassifier(activation='relu', alpha=0.01, batch_size='auto',
                               beta_1=0.9, beta_2=0.999, early_stopping=False,
                               epsilon=1e-08, hidden_layer_sizes=(200,),
                               learning_rate='constant',
                               learning_rate_init=0.001, max_fun=15000,
                               max_iter=10000, momentum=0.9,
                               n_iter_no_change=10, nesterovs_momentum=True,
                               power_t=0.5, random_state=42, shuffle=True,
                               solver='adam', tol=0.0001,
                               validation_fraction=0.1, verbose=False,
                               warm_start=False))],
         verbose=False)

In [None]:
metrics.roc_auc_score(y_train, under_MLPC.predict(X_train))

0.9652589449101076

In [None]:
metrics.roc_auc_score(y_test, under_MLPC.predict(X_test))

0.9706726702405472

In [None]:
metrics.roc_auc_score(y_test_under, under_MLPC.predict(X_test_under))

0.9525635757658768