<a href="https://colab.research.google.com/github/jhammans/fraud_busters/blob/David/Deep_Learning_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

from google.colab import drive
drive.mount('/content/drive')


In [None]:

import os

# Update the file path based on where the CSV file is stored in your Google Drive
file_path = '/content/drive/My Drive/Resources/fraudTrain.csv'

# Ensure the file exists before proceeding
if not os.path.exists(file_path):
    raise FileNotFoundError(f"File not found: {file_path}")


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
import numpy as np
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv('/content/drive/My Drive/Resources/fraudTrain.csv')
df.head()

In [None]:
if 'category' in df.columns:
    le_category = LabelEncoder()
    df['category'] = le_category.fit_transform(df['category'])

if 'gender' in df.columns:
    le_gender = LabelEncoder()
    df['gender'] = le_gender.fit_transform(df['gender'])

In [None]:
relevant_columns = ['category', 'amt', 'lat', 'long', 'gender', 'city_pop', 'merch_lat', 'merch_long', 'is_fraud']
df_filtered = df[relevant_columns]

In [None]:
scaler = MinMaxScaler()
features = df_filtered.drop('is_fraud', axis=1)
normalized_features = scaler.fit_transform(features)

In [None]:
df_normalized = pd.DataFrame(normalized_features, columns=features.columns)

In [None]:
df_normalized['is_fraud'] = df_filtered['is_fraud']

In [None]:
normal_data = df_normalized[df_normalized['is_fraud'] == 0].drop('is_fraud', axis=1)
fraud_data = df_normalized[df_normalized['is_fraud'] == 1].drop('is_fraud', axis=1)

In [None]:
train_data, val_data = train_test_split(normal_data, test_size=0.2, random_state=42)

In [None]:
input_dim = train_data.shape[1]
encoding_dim = 10

In [None]:
input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='sigmoid')(encoded)

In [None]:
autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')

In [None]:
autoencoder.fit(
    train_data,
    train_data,
    epochs=50,
    batch_size=32,
    shuffle=True,
    validation_data=(val_data, val_data)
)

In [None]:
reconstructions = autoencoder.predict(df_normalized.drop('is_fraud', axis=1))
reconstruction_error = np.mean(np.square(reconstructions - df_normalized.drop('is_fraud', axis=1)), axis=1)

In [None]:
threshold = np.percentile(reconstruction_error, 95)

In [None]:
df_normalized['anomaly'] = reconstruction_error > threshold

In [None]:
print(classification_report(df_normalized['is_fraud'], df_normalized['anomaly']))

In [None]:
print(df_normalized[['is_fraud', 'anomaly']].value_counts())