# Fraud Detection & Transaction Monitoring

In [1]:
import pandas as pd

df = pd.read_csv('/Users/gabrielstalley/Desktop/banking_analyst_portfolio/synthetic_fraud_transactions.csv')

df.head()
df['is_fraud'].value_counts(normalize=True)

0    0.840875
1    0.159125
Name: is_fraud, dtype: float64

#### Overall Fraud Rate: 15.9%

## Fraud Behavior Analysis

In [2]:
df.groupby('channel')['is_fraud'].mean()

channel
card_not_present    0.243034
card_present        0.113069
Name: is_fraud, dtype: float64

#### A card not being present is riskier when it comes to fraudulent transactions.

In [3]:
df.groupby('foreign_transaction')['is_fraud'].mean()


foreign_transaction
0    0.147768
1    0.239636
Name: is_fraud, dtype: float64

#### 0 = non-foreign transaction, 1 = foreign transaction. This means that a foreign transaction is almost 10% more likely to be fraud compared to non-foreign transactions.

In [4]:
df.groupby('merchant_category')['is_fraud'].mean().sort_values(ascending=False)

merchant_category
electronics      0.242158
online_retail    0.223620
travel           0.150155
other            0.136816
restaurant       0.129590
gas              0.127383
grocery          0.127038
Name: is_fraud, dtype: float64

#### Electronics and online-retail are higher risk categories when it comes to fraud being committed. 

In [5]:
df.groupby('transaction_hour')['is_fraud'].mean()

transaction_hour
0     0.203704
1     0.220820
2     0.185294
3     0.211310
4     0.163793
5     0.139241
6     0.115152
7     0.153153
8     0.133333
9     0.147335
10    0.165663
11    0.164179
12    0.152493
13    0.167647
14    0.157407
15    0.175325
16    0.157051
17    0.167155
18    0.129794
19    0.148485
20    0.169054
21    0.123123
22    0.129870
23    0.144970
Name: is_fraud, dtype: float64

#### Transaction Fraud by hour, with a line chart we will be able to see fraud trends through out the day

## Fraud Risk Flag

In [6]:
df['high_risk_flag'] = (
    (df['transaction_amount'] > 200) |
    (df['foreign_transaction'] == 1) |
    (df['channel'] == 'card_not_presented') &
    (df['transaction_hour'].isin([0,1,2,3,4]))
).astype(int)

df.groupby('high_risk_flag')['is_fraud'].mean()

high_risk_flag
0    0.113678
1    0.362022
Name: is_fraud, dtype: float64

#### This high risk flag shows that 40% of the transactions that have these identifiers are fraudulent transactions

In [7]:
df.to_csv('/Users/gabrielstalley/Desktop/banking_analyst_portfolio/synthetic_fraud_transactions_tableau.csv', index=False)