# Transaction Fraud Detection

In [19]:
import warnings
import typing
import numpy as np
import pandas as pd
import sklearn.preprocessing as pre
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

Array = typing.TypeVar('np.ndarray')
sns.set()

# Load transactions dataset
df = pd.read_csv('Transactions.csv')

%matplotlib widget



In [10]:
# Describe dataset size as (rows, columns)
df.shape

(10000, 5)

In [11]:
# List variables
list(df)

['ID', 'Purpose', 'Purpose Code', 'Amount', 'Frequency']

## Plot transaction amounts, frequencies, and purposes.

In [12]:
plt.scatter(df['Amount'], df['Frequency'])
plt.xlabel('Amount')
plt.ylabel('Frequency')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xs=df['Purpose Code'], ys=df['Amount'], zs=df['Frequency'])
ax.set_xlabel('Purpose Code')
ax.set_ylabel('Amount')
ax.set_zlabel('Frequency')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Calculate transaction distances within groups.

In [22]:
# Function to normalize a vector
def normalize(x: Array) -> Array:
    return (x - x.mean()) / x.std()

# Use short variable names and standardize variables
df['x'] = normalize(df['Amount'])
df['y'] = normalize(df['Frequency'])
df['g'] = normalize(df['Purpose Code'])

# Calculate Euclidean distances
df = df.join(df.groupby('g')['x'].mean(), on='g', rsuffix='mean')
df = df.join(df.groupby('g')['y'].mean(), on='g', rsuffix='mean')
df['Distance'] = np.sqrt(np.power(df['x'] - df['xmean'], 2)
                         + np.power(df['y'] - df['ymean'], 2))

In [23]:
# Sort by distance
df.sort_values(by = 'Distance', ascending = False, inplace = True)

# Flag transactions with largest distances
df['Flag'] = 'blue'
df.loc[df.index[range(0, 5)], 'Flag'] = 'red'

## Plot transactions, highlighting distant points.

In [24]:
# Highlight points with large distances
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xs=df['Purpose Code'], ys=df['Amount'], zs=df['Frequency'],
           c=df['Flag'])
ax.set_xlabel('Purpose Code')
ax.set_ylabel('Amount')
ax.set_zlabel('Frequency')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …