In [1]:
"""reading the csv file"""

import pandas as pd

df = pd.read_csv("Fraud.csv")
rawdf = pd.read_csv("raw.csv")

df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [2]:
"""Flagging Transactions where the amount is greater than 200k"""
condition = df["amount"] > 200000
df.loc[condition, 'isFlaggedFraud'] == 1

15         False
19         False
24         False
82         False
84         False
           ...  
6362615    False
6362616    False
6362617    False
6362618    False
6362619    False
Name: isFlaggedFraud, Length: 1673570, dtype: bool

In [3]:
"""storing the rows marked fraud for my referance"""
fraudtrans =  df.loc[df['isFraud'] == 1]
fraudtrans.to_csv('isfraud.csv', index = False)

In [4]:
"""now we analyze the data that we previously flagged"""
flaggedtrans = df.loc[df['isFlaggedFraud'] == 1]
flaggedtrans.to_csv('flagged.csv', index = False)

In [5]:
"""Here are a few conditions that would indicate fraudulant activity:
1. if the transaction type is cash-out/transfer/debit/payment and the amount is greater than oldbalanceOrig 
2. if the value of newbalanceDest does not increase when the type is transfer"""
transaction_types_tocheck = ['CASH-OUT','TRANSFER', 'DEBIT', 'PAYMENT']
check_type = df['type'].isin(transaction_types_tocheck)
sus_bal = df['oldbalanceOrg'] < df['amount']
sus_rows = check_type & sus_bal
if not df[sus_rows].empty:
    print("found suspicious transactions")
    print("-"*20)
    print(df[sus_rows])
    print("-"*20)
    df.loc[sus_rows, 'isFraud'] = 1

found suspicious transactions
--------------------
         step      type     amount     nameOrig  oldbalanceOrg  \
8           1   PAYMENT    4024.36  C1265012928         2671.0   
10          1     DEBIT    9644.94  C1900366749         4465.0   
13          1   PAYMENT   11633.76  C1716932897        10127.0   
16          1   PAYMENT    1563.82   C761750706          450.0   
19          1  TRANSFER  215310.30  C1670993182          705.0   
...       ...       ...        ...          ...            ...   
6362310   718   PAYMENT    6416.50  C1532874529          360.0   
6362311   718   PAYMENT    4276.61   C590780626          155.0   
6362314   718   PAYMENT   17841.23  C1045048098        10182.0   
6362316   718   PAYMENT    1022.91  C1203084509           12.0   
6362322   718  TRANSFER   82096.45   C614459560        13492.0   

         newbalanceOrig     nameDest  oldbalanceDest  newbalanceDest  isFraud  \
8                   0.0  M1176932104             0.0            0.00       

In [6]:
"""Now 2. if the value of newbalanceDest does not increase when the type is transfer"""

is_transfer = df['type'] == 'TRANSFER'
transfer_condition = df['oldbalanceDest'] >= df['newbalanceDest']
exclude_merchants = ~df['nameDest'].str.startswith('M')

sus_transfer_rows = is_transfer & transfer_condition & exclude_merchants
if not df[sus_transfer_rows].empty:
    print("suspicious transfers found!")
    print("-"*80)
    print(df[sus_transfer_rows])
    print("-"*80)
    df.loc[sus_transfer_rows, 'isFraud'] = 1

suspicious transfers found!
--------------------------------------------------------------------------------
         step      type      amount     nameOrig  oldbalanceOrg  \
2           1  TRANSFER      181.00  C1305486145         181.00   
19          1  TRANSFER   215310.30  C1670993182         705.00   
78          1  TRANSFER    42712.39   C283039401       10363.39   
79          1  TRANSFER    77957.68   C207471778           0.00   
80          1  TRANSFER    17231.46  C1243171897           0.00   
...       ...       ...         ...          ...            ...   
6362610   742  TRANSFER    63416.99   C778071008       63416.99   
6362612   743  TRANSFER  1258818.82  C1531301470     1258818.82   
6362614   743  TRANSFER   339682.13  C2013999242      339682.13   
6362616   743  TRANSFER  6311409.28  C1529008245     6311409.28   
6362618   743  TRANSFER   850002.52  C1685995037      850002.52   

         newbalanceOrig     nameDest  oldbalanceDest  newbalanceDest  isFraud  \
2    

In [7]:
"""Calculating the total number of fraudulant transactions caught by my code"""

fraud_count = len(df[sus_rows]) + len(df[sus_transfer_rows])
print("The number of fraudulant activities identified are", fraud_count)

The number of fraudulant activities identified are 1628527
