The goal of this project build a fraud detection model using an unlabeled dataset from Palpay, using popular machine learning-based techniques for anomaly detection.

In [1]:
import pandas as pd
import numpy as np
import matplotlib
import seaborn
import matplotlib.dates as md
from matplotlib import pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#### 1.Data Preprocessing

#### 1.1 Loading Data

In [2]:
data = pd.read_csv("palpay_card_transactions_sample.csv")

In [3]:
data

Unnamed: 0,Transaction Date,Card Category,Card Type,Card Type AR,Channel,Channel AR,Transaction Type,Transaction Type Group,Entry Mode,Transaction Status,Outlet ID,Merchant Acronym,Merchant Desc,Merchant Country,Merchant Activity,Merchant Activity sub,Client Code,Amount,Amount USD,Transaction Currency
0,1-Jan-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,ATM,???????? ??????,Withdrawal,ATM Transactions,Chip & Pin,PROCESSED,110002011,ALNASER NEW,BOP EL Naser St GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00589393,-300.0,-86.460,ILS
1,1-Jan-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,ATM,???????? ??????,Withdrawal,ATM Transactions,Chip & Pin,PROCESSED,110002011,ALNASER NEW,BOP EL Naser St GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00589393,-100.0,-28.820,ILS
2,1-Jan-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,POS,???? ?????,Purchase,Purchases,Chip & Pin,PROCESSED,110002413,YOUSEF MARKET,YOUSEF MARKET >YOUSEF MARKETPS,Palestine,,"Grocery stores,supermarkets",P00589801,-50.0,-14.409,ILS
3,1-Jan-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,ATM,???????? ??????,Withdrawal,ATM Transactions,Chip & Pin,PROCESSED,110002011,ALNASER NEW,BOP EL Naser St GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00565050,-1100.0,-317.000,ILS
4,1-Jan-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,ATM,???????? ??????,Withdrawal,ATM Transactions,Other,PROCESSED,800000010,DEIR BALAH OLD,BOP Salah ElDain St GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00575741,-150.0,-43.230,ILS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88089,30-Jun-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,POS,???? ?????,Purchase,Purchases,Paywave,PROCESSED,310103241,AL BADER MARKET,AL BADER MARKET >BETHLEHEM PS,Palestine,,"Grocery stores,supermarkets",P00357371,-19.0,-5.205,ILS
88090,30-Jun-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,ATM,???????? ??????,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00494710,-600.0,-600.000,USD
88091,30-Jun-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,ATM,???????? ??????,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00352736,-1000.0,-1000.000,USD
88092,30-Jun-23,Palpay Cards,Visa Classic Debit,???? ?????? ??? ????????,POS,???? ?????,Purchase,Purchases,Paywave,PROCESSED,400000936,FAHED REST,FAHED REST >ALREMAL PS,Palestine,Entertainment & Restaurants,"Eating places,Restaurants",P00635693,-9.0,-2.466,ILS


#### 1.2 Understand data

In [4]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88094 entries, 0 to 88093
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Transaction Date        88094 non-null  object 
 1   Card Category           88094 non-null  object 
 2   Card Type               88094 non-null  object 
 3   Card Type AR            88094 non-null  object 
 4   Channel                 88094 non-null  object 
 5   Channel AR              88094 non-null  object 
 6   Transaction Type        88094 non-null  object 
 7   Transaction Type Group  88094 non-null  object 
 8   Entry Mode              88094 non-null  object 
 9   Transaction Status      88094 non-null  object 
 10  Outlet ID               88094 non-null  int64  
 11   Merchant Acronym       87524 non-null  object 
 12  Merchant Desc           87514 non-null  object 
 13  Merchant Country        88094 non-null  object 
 14  Merchant Activity       49577 non-null

In [5]:
print(data.columns)

Index(['Transaction Date ', 'Card Category ', 'Card Type', 'Card Type AR ',
       'Channel ', 'Channel AR ', 'Transaction Type', 'Transaction Type Group',
       'Entry Mode', 'Transaction Status ', 'Outlet ID', ' Merchant Acronym',
       'Merchant Desc', 'Merchant Country', 'Merchant Activity ',
       'Merchant Activity sub ', 'Client Code', 'Amount ', 'Amount USD',
       'Transaction Currency '],
      dtype='object')


In [6]:
data.shape

(88094, 20)

In [7]:
print(data['Transaction Date '].head(10))

0    1-Jan-23
1    1-Jan-23
2    1-Jan-23
3    1-Jan-23
4    1-Jan-23
5    1-Jan-23
6    1-Jan-23
7    1-Jan-23
8    1-Jan-23
9    1-Jan-23
Name: Transaction Date , dtype: object


In [8]:
data['Transaction Date '] = pd.to_datetime(data['Transaction Date '])

#### 1.3 Delete duplicate columns

In [9]:
data.drop(columns=['Card Type AR ', 'Channel AR ', 'Amount ','Transaction Currency '], inplace=True)

In [10]:
print(data.columns)

Index(['Transaction Date ', 'Card Category ', 'Card Type', 'Channel ',
       'Transaction Type', 'Transaction Type Group', 'Entry Mode',
       'Transaction Status ', 'Outlet ID', ' Merchant Acronym',
       'Merchant Desc', 'Merchant Country', 'Merchant Activity ',
       'Merchant Activity sub ', 'Client Code', 'Amount USD'],
      dtype='object')


In [11]:
data

Unnamed: 0,Transaction Date,Card Category,Card Type,Channel,Transaction Type,Transaction Type Group,Entry Mode,Transaction Status,Outlet ID,Merchant Acronym,Merchant Desc,Merchant Country,Merchant Activity,Merchant Activity sub,Client Code,Amount USD
0,2023-01-01,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Chip & Pin,PROCESSED,110002011,ALNASER NEW,BOP EL Naser St GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00589393,-86.460
1,2023-01-01,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Chip & Pin,PROCESSED,110002011,ALNASER NEW,BOP EL Naser St GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00589393,-28.820
2,2023-01-01,Palpay Cards,Visa Classic Debit,POS,Purchase,Purchases,Chip & Pin,PROCESSED,110002413,YOUSEF MARKET,YOUSEF MARKET >YOUSEF MARKETPS,Palestine,,"Grocery stores,supermarkets",P00589801,-14.409
3,2023-01-01,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Chip & Pin,PROCESSED,110002011,ALNASER NEW,BOP EL Naser St GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00565050,-317.000
4,2023-01-01,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Other,PROCESSED,800000010,DEIR BALAH OLD,BOP Salah ElDain St GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00575741,-43.230
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88089,2023-06-30,Palpay Cards,Visa Classic Debit,POS,Purchase,Purchases,Paywave,PROCESSED,310103241,AL BADER MARKET,AL BADER MARKET >BETHLEHEM PS,Palestine,,"Grocery stores,supermarkets",P00357371,-5.205
88090,2023-06-30,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00494710,-600.000
88091,2023-06-30,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00352736,-1000.000
88092,2023-06-30,Palpay Cards,Visa Classic Debit,POS,Purchase,Purchases,Paywave,PROCESSED,400000936,FAHED REST,FAHED REST >ALREMAL PS,Palestine,Entertainment & Restaurants,"Eating places,Restaurants",P00635693,-2.466


In [12]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88094 entries, 0 to 88093
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Transaction Date        88094 non-null  datetime64[ns]
 1   Card Category           88094 non-null  object        
 2   Card Type               88094 non-null  object        
 3   Channel                 88094 non-null  object        
 4   Transaction Type        88094 non-null  object        
 5   Transaction Type Group  88094 non-null  object        
 6   Entry Mode              88094 non-null  object        
 7   Transaction Status      88094 non-null  object        
 8   Outlet ID               88094 non-null  int64         
 9    Merchant Acronym       87524 non-null  object        
 10  Merchant Desc           87514 non-null  object        
 11  Merchant Country        88094 non-null  object        
 12  Merchant Activity       49577 non-null  object

In [13]:
data.shape

(88094, 16)

#### 1.4 Handling duplicate rows

In [14]:
duplicate_rows = data.duplicated().sum()

In [15]:
duplicate_rows

5460

In [16]:
duplicate_mask = data.duplicated(keep=False)

In [17]:
duplicate_rows1 = data[duplicate_mask]

In [18]:
duplicate_rows1

Unnamed: 0,Transaction Date,Card Category,Card Type,Channel,Transaction Type,Transaction Type Group,Entry Mode,Transaction Status,Outlet ID,Merchant Acronym,Merchant Desc,Merchant Country,Merchant Activity,Merchant Activity sub,Client Code,Amount USD
8,2023-01-01,Palpay Cards,Visa Classic Debit,E-Commerce,Purchase,Purchases,E-commerce,PROCESSED,9999999999,APPLE.COM/BILL,APPLE.COM/BILL ITUNES.COM,VISA_IE,,"Digital Books Media: Books, movi",P00285871,-1.020
50,2023-01-01,Palpay Cards,Visa Classic Debit,E-Commerce,Purchase,Purchases,E-commerce,PROCESSED,9999999999,APPLE.COM/BILL,APPLE.COM/BILL ITUNES.COM,VISA_IE,,"Digital Books Media: Books, movi",P00285871,-1.020
62,2023-01-01,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00589862,-86.460
93,2023-01-01,Palpay Cards,Visa Classic Debit,POS,Purchase,Purchases,Paywave,PROCESSED,301001036,EYAD HALAWAH SUPERMARKET,EYAD HALAWAH SUPERMARKET>EYAD HALAWAH PS,Palestine,Supermarkets & Confectionary,"Grocery stores,supermarkets",P00557265,-2.882
124,2023-01-01,Palpay Cards,Visa Classic Debit,POS,Purchase,Purchases,Paywave,PROCESSED,301001036,EYAD HALAWAH SUPERMARKET,EYAD HALAWAH SUPERMARKET>EYAD HALAWAH PS,Palestine,Supermarkets & Confectionary,"Grocery stores,supermarkets",P00557265,-2.882
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88016,2023-06-29,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00569588,-41.100
88017,2023-06-29,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00569588,-41.100
88020,2023-06-29,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00569588,-13.700
88025,2023-06-29,Palpay Cards,Visa Classic Debit,ATM,Withdrawal,ATM Transactions,Other,PROCESSED,10000401,OMAR ELMOKHTAR,BOP OMAR ELMOKHTAR ST - GAZA STRIP PS,Palestine,Other,AUTOMATED CASH,P00569588,-41.100


#### 1.5 Handling Missing Values

In [19]:
data.isnull().sum()

Transaction Date              0
Card Category                 0
Card Type                     0
Channel                       0
Transaction Type              0
Transaction Type Group        0
Entry Mode                    0
Transaction Status            0
Outlet ID                     0
 Merchant Acronym           570
Merchant Desc               580
Merchant Country              0
Merchant Activity         38517
Merchant Activity sub         0
Client Code                   0
Amount USD                    0
dtype: int64