In [1]:
import pandas as pd
from numpy import *
import tensorflow as tf

In [2]:
df = pd.read_csv("../input/creditcardfraud/creditcard.csv")
df.head()

In [3]:
print("The dataframe has {} rows and {} columns.".format(df.shape[0],df.shape[1]))

In [4]:
df.Class.unique()

In [5]:
df.groupby(['Class']).count()

In [6]:
c = df.Class.value_counts()
c[0],c[1]

In [7]:
import seaborn as sn
import matplotlib.pyplot as plt
plt.figure(figsize=(10,5))
sn.barplot(x=c.index,y=c)
plt.xlabel("Count")
plt.ylabel("Class")
plt.legend()

In [8]:
plt.figure(figsize=(10,10))
corr = df.corr()
sn.heatmap(data=corr)
plt.title("Heatmap of correlation")

In [9]:
df.isnull().sum()

In [10]:
df.describe()

In [11]:
fraud = df[df.Class==1]
nfraud = df[df.Class==0]

from sklearn.utils import resample

overfraud = resample(fraud, replace=True,
                    n_samples=len(nfraud),
                    random_state=42)
df_over = pd.concat([overfraud, nfraud])

In [12]:
df_over.Class.value_counts()

In [13]:
g = sn.countplot(df_over.Class)
g.set_xticklabels(['Non-Fradulent','Fraudulent'])

In [14]:
X = df_over.drop(['Class'],1)
y = df_over['Class']

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, stratify=y)

In [16]:
X_train.shape, X_test.shape

In [17]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [18]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train.shape, X_test.shape

In [19]:
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [20]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1],1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [21]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(32,kernel_size=3,input_shape=X_train[0].shape, activation='relu'),
    tf.keras.layers.MaxPooling1D((1)),
    tf.keras.layers.Conv1D(64,kernel_size=3,activation='relu'),
    tf.keras.layers.MaxPool1D((2)),
    tf.keras.layers.Conv1D(128,kernel_size=3,activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(100,activation='sigmoid'),
    tf.keras.layers.Dense(1,activation='sigmoid')
])

In [22]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['acc'])

In [23]:
model.summary()

In [24]:
model.fit(X_train,y_train,epochs=20)

In [26]:
stat = pd.DataFrame(model.history.history)
stat

In [36]:
e = range(1, len(loss) + 1)
plt.figure(figsize=(9,9))
loss = stat.loss
plt.plot(e, loss,color='r')
plt.xlabel("Epochs")
plt.ylabel("Loss")

In [39]:
acc = stat.acc
plt.figure(figsize=(9,9))
plt.plot(e, acc,color='b')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")

In [43]:
i = model.predict(X_test)
pred = around(i)
pred

In [46]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test,pred))

In [58]:
plt.figure(figsize=(10,10))
x = confusion_matrix(y_test,pred)
sn.heatmap(x,annot=True,cmap='Blues_r')
plt.xlabel("Actual")
plt.ylabel("Predicted")
x