### I - Data manipulation

#### Imports libs

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE

from keras.layers import Input, Dense
from keras.models import Model, Sequential
from keras import regularizers

ModuleNotFoundError: No module named 'tensorflow'

#### Import data 

In [None]:
directory = 'chemin/vers/dossier_datas'
data = 'data.csv'

df = pd.read_csv(data)

#### Explore data

In [None]:
df.head()
df.info()
df.describe()

#### Clean data

In [None]:
# Temps ?
# Séparer les fraudes des transactions régulières dans deux dataframes distincts df_fraud, df_normal
# Adapter aux potentielles contraintes hardwares (réduction du dataset)

#### Use data

In [None]:
X, y = df.drop('col_qui_indique_fraude_ou_non', axis=1), df['col_qui_indique_fraude_ou_non']

#### Visualize data

In [None]:
def data_plot(x, y):
    sns.set(style = 'whitegrid')
    tsne = TSNE(n_components=2, random_state=0)
    X_transformed = tsne.fit_transform(x)
    
    plt.figure(figsize=(10,10))
    
    plt.scatter(X_transformed[np.where(y==0), 0],
                X_transformed[np.where(y==0), 1],
                marker = 'o',
                color = 'y',
                linewidth = '1',
                alpha = 0.8,
                label = 'Normal')
    
    plt.scatter(X_transformed[np.where(y==1), 0],
                X_transformed[np.where(y==1), 1],
                marker = 'o',
                color = 'k',
                linewidth = '1',
                alpha = 0.8,
                label = 'Fraud')
    
    plt.legend(loc='best')
    plt.show()

In [None]:
data_plot(X, y)

### II - Data processing

#### Clean data

In [None]:
X_scaled = MinMaxScaler().fit_transform(X)
X_normal_scaled = X_scaled[y==0]
X_fraud_scaled = X_scaled[y==1]

#### Setup model

In [None]:
input_layer = Input(shape=(X.shape[1],))

encoded = Dense(100,
                activation = 'tanh',
                activity_regularizer = regularizers.l1(10e-5))(input_layer)

encoded = Dense(50,
                activation = 'tanh',
                activity_regularizer = regularizers.l1(10e-5))(encoded)

encoded = Dense(25,
                activation = 'tanh',
                activity_regularizer = regularizers.l1(10e-5))(encoded)

encoded = Dense(12,
                activation = 'tanh',
                activity_regularizer = regularizers.l1(10e-5))(encoded)

encoded = Dense(6,
                activation = 'relu')(encoded)

In [None]:
decoded = Dense(12,
                activation = 'tanh')(encoded)

decoded = Dense(25,
                activation = 'tanh')(decoded)

decoded = Dense(50,
                activation = 'tanh')(decoded)

decoded = Dense(100,
                activation = 'tanh')(decoded)

output_layer = Dense(X.shape[1], activation='relu')(decoded)

#### Train model

In [None]:
autoencoder = Model(input_layer, output_layer)
autoencoder.compile(optimizer='adadelta', loss='mse')

autoencoder.fit(X_normal_scaled,
                X_normal_scaled,
                batch_size = 16,
                epochs = 10,
                shuffle = True,
                validation_split = 0.20)

#### Extract encoder

In [None]:
hidden_representation = Sequential()
hidden_representation.add(autoencoder.layers[0])
hidden_representation.add(autoencoder.layers[1])
hidden_representation.add(autoencoder.layers[2])
hidden_representation.add(autoencoder.layers[3])
hidden_representation.add(autoencoder.layers[4])

#### Encode data

In [None]:
normal_hidden_rep = hidden_representation.predict(X_normal_scaled)
fraud_hidden_rep = hidden_representation.predict(X_fraud_scaled)

encoded_X = np.append(normal_hidden_rep, fraud_hidden_rep, axis=0)
y_normal = np.zeros(normal_hidden_rep.shape[0])
y_fraud = np.ones(fraud_hidden_rep.shape[0])
encoded_y = np.append(y_normal, y_fraud)

#### Visualize data

In [None]:
data_plot(encoded_X, encoded_y)

### III - Data analyzing

#### Prepare data

In [None]:
X_train_encoded, X_test_encoded, y_train_encoded, y_test_encoded = train_test_split(encoded_X, encoded_y, test_size=0.2)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

#### Build Logistic Regression model

In [None]:
lrclf = LogisticRegression()
lrclf.fit(X_train_encoded, y_train_encoded)

y_pred_lrclf = lrclf.predict(X_test_encoded)

#### Build Support Vector Classifier model

In [None]:
svmclf = SVC()
svmclf.fit(X_train, y_train)

y_pred_svmclf = svmclf.predict(X_test)

#### Evaluate performances

In [None]:
print('Logistic Regression Accuracy : ' + str(accuracy_score(y_test_encoded, y_pred_lrclf)))
print('Support Vector Classifier Accuracy : ' + str(accuracy_score(y_test, y_pred_svmclf)))