# Track A: SWIFT-only example baseline model

This notebook, provided by SWIFT, contains two example baseline models for Track A: Financial Crime Prevention for the [PETs Prize Challenge](https://petsprizechallenges.com/).

These two models are intended as an example to help you get started on developing a model for this use case. Note that these example models only incorporate features from the SWIFT transactions dataset. These models are not examples of full centralized solutions in the context of the challenge, as they do not incorporate the account data from the banks. 

## Imports

In [1]:
### Libraries for Data Handling

from pathlib import Path

import numpy as np
import pandas as pd

pd.set_option("display.max_columns", None)

In [2]:
### Libraries for Algorithms

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import ShuffleSplit, StratifiedKFold, StratifiedShuffleSplit
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import sklearn.utils
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

from xgboost import XGBClassifier

# Fetch Data

In [95]:
train = pd.read_csv("swift_transaction_train_dataset.csv", index_col="MessageId"
)
train["Timestamp"] = train["Timestamp"].astype("datetime64[ns]")
test = pd.read_csv("swift_transaction_test_dataset.csv", index_col="MessageId")
test["Timestamp"] = test["Timestamp"].astype("datetime64[ns]")

In [96]:
swift = pd.concat([train, test])

In [117]:
swift.to_csv('./swift.csv', index=False)

In [9]:
swift = pd.read_csv('./swift.csv')
bank = pd.read_csv('./bank_dataset.csv')
swift["Timestamp"] = swift["Timestamp"].astype("datetime64[ns]")

In [76]:
swift

Unnamed: 0_level_0,Timestamp,UETR,Sender,Receiver,TransactionReference,OrderingAccount,OrderingName,OrderingStreet,OrderingCountryCityZip,BeneficiaryAccount,BeneficiaryName,BeneficiaryStreet,BeneficiaryCountryCityZip,SettlementDate,SettlementCurrency,SettlementAmount,InstructedCurrency,InstructedAmount,Label,sender_flag,order_flag,bene_flag
MessageId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
TRA7CGN3FF,2022-01-01 00:00:00,f474fdb3-4675-4fff-ab7e-3469f82bd6a7,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-7054,FR90714755422956984353,PHACELIA HETEROPHYLLA,3| RUE HAMON,FR/42859 SAINTE AURÉLIE,611024064274704358,PAPAVER CALIFORNICUM,2584 CHARLES PLACE,US/ROJASLAND| DC 58442,220101,USD,1.746319e+09,EUR,1.560189e+09,0,0.0,0.0,0.0
TRPNEMZIR7,2022-01-01 00:00:00,c9158def-dab1-4bfb-a31f-7f51c6679d60,BRRGPTPL,CBLHESMM,PETX22-NO-FX-1736,PT8895792452733129969,GONOLOBUS STEPHANOTRICHUS,AV RITA ALVES| 60,PT/5863-752 CANTANHEDE,ES61897100852916932423,MINUARTIA NUTTALLII-GREGARIA,ACCESO DE CARMINA ARAGÓN 83 PUERTA 4,ES/ÁVILA| 02281,220101,EUR,4.711420e+06,EUR,4.711420e+06,0,0.0,0.0,0.0
TR6S6A5JYL,2022-01-01 00:00:00,d371ba0a-823f-4243-98ba-94ff18523420,BRRGPTPL,CBLHESMM,PETX22-NO-FX-1687,PT92895792452733126420,LECHEA INTERMEDIA-INTERMEDIA,PRAÇA VALENTE| 85,PT/1100-087 BARCELOS,ES31897100852916935097,ASTRAGALUS MAGDALENAE,PASADIZO ANÍBAL LUJÁN 57,ES/SEGOVIA| 40727,220101,EUR,7.528216e+05,EUR,7.528216e+05,0,0.0,0.0,0.0
TRT4VPEHAK,2022-01-01 00:00:00,5a53a257-4dc9-4800-abb2-4cd1d55c8345,DPSUFRPP,ABVVUS6S,DPSU22-FXIYA-517,358727697099645998,SCLERANTHUS,341 4 CHOME 4 BAN 2 GO,JP/FUKUOKA PREFECTURE|ŌKAWA,611024064274698543,SYNGONIUM PODOPHYLLUM,7864 MORRIS MEWS APT. 464,US/DPO AE 78549,220101,USD,6.371209e+06,JPY,6.490487e+08,0,0.0,0.0,0.0
TRTADQPGAV,2022-01-01 00:00:00,f27867ac-35e2-46af-8248-0a2d0d9bf00d,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-11878,FR71714755422956985471,SELAGINELLA ASPRELLA,28| BOULEVARD LÉVÊQUE,FR/36357 TURPIN,611024064274707099,GALACTIA PARVIFOLIA,363 ROBERT GARDENS,US/NEW KAREN| MS 49461,220101,USD,5.179423e+06,EUR,4.627377e+06,0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TEQTQRLGGX,2022-01-30 11:32:00,384a1331-b107-4037-b934-d4cb572c4db7,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-14755,FR47714755422957005109,HIPPOBROMA,92| BOULEVARD GERMAIN,FR/91908 VASSEURDAN,611024064274705999,LUPINUS EXCUBITUS-EXCUBITUS,5413 JEFFREY TRAIL,US/NEW KATRINAFURT| OR 55598,220130,USD,9.858599e+05,EUR,8.807826e+05,0,0.0,0.0,0.0
TE03856K6O,2022-01-25 09:12:00,5fe8d78f-91fc-4f6e-824e-2fa6665e3d35,FAMAPEPL,KNPVECEQ,PETX22-NO-FX-1419,PE17811955907108603437,ILEX CASSINE,STEVEN PARRAGUEZ 329 PISO 85,PE/GENERAL LAGOS| REGIÓN DE ARICA Y PARINACOTA,126792733154041024,ELEOCHARIS SINTENISII,CALLE MANUEL RODRÍGUEZ 9651,EC/MULCHÉN| REGIÓN DEL BIOBÍO,220125,EUR,1.569079e+05,EUR,1.568879e+05,0,0.0,0.0,0.0
TEFVA9PMX6,2022-01-28 08:30:00,2af913c9-c9a0-4c84-a4ec-432081ace9f0,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-9676,FR32714755422956981614,LUPINUS DENSIFLORUS-DENSIFLORUS,5| CHEMIN BOURGEOIS,FR/09981 SAINTE MARCELLE-LA-FORÊT,611024064274691278,SCHKUHRIA PINNATA-PINNATA,685 THOMAS LAKE SUITE 847,US/ADRIANBURY| ID 81927,220128,USD,1.963640e+06,EUR,1.754347e+06,0,0.0,0.0,0.0
TEZKONOZUE,2022-01-30 08:35:00,32444207-b95d-4f5b-b92f-13a25f1691cf,DPSUFRPP,WVOLDEMM,DPSU22-FXIYA-1179,611024064274713201,HARPEROCALLIS,7132 DENNIS VIA,US/RAYMONDBERG| ND 51398,358727697099649494,MONTRICHARDIA,487 5 CHOME 5 BAN 8 GO,JP/GIFU PREFECTURE|TAKAYAMA,220130,EUR,6.542899e+07,USD,7.323591e+07,0,0.0,0.0,0.0


In [41]:
swift[swift['Label']==0].UETR.nunique()/len(swift)

0.8709281906629314

In [28]:
swift['UETR'].value_counts()

fd160df8-3034-496e-872f-261bfeb55920    3
34847a25-a966-4104-8bb3-c065ff3ba773    3
12006634-f3d9-4cb0-a67e-a1783bd6a61c    3
f8a710b5-322c-4fa1-8deb-51d39ca9f948    3
0e1c0f3f-ac08-42ec-a8f2-f4f06e214b2f    3
                                       ..
970316b8-ac2b-4724-8168-f30bc439082b    1
29e2ae47-c8db-4543-b658-5104a163d29c    1
a8b425f4-c72e-422b-a3ba-eca8f5891607    1
eaadb4c7-41f2-4317-ada7-b27a4284b97f    1
f91d9d03-4a83-47ba-abf5-d253a8d39bc4    1
Name: UETR, Length: 4705862, dtype: int64

In [44]:
swift[swift['UETR']== 'fd160df8-3034-496e-872f-261bfeb55920']

Unnamed: 0,Timestamp,UETR,Sender,Receiver,TransactionReference,OrderingAccount,OrderingName,OrderingStreet,OrderingCountryCityZip,BeneficiaryAccount,BeneficiaryName,BeneficiaryStreet,BeneficiaryCountryCityZip,SettlementDate,SettlementCurrency,SettlementAmount,InstructedCurrency,InstructedAmount,Label,order_flag,bene_flag,hour,sender_hour,sender_hour_freq,sender_currency,sender_currency_freq,sender_currency_amount_average,sender_receiver,sender_receiver_freq
1286414,2022-01-07 00:39:00,fd160df8-3034-496e-872f-261bfeb55920,DPSUFRPP,ABVVUS6S,DPSU22-FXIYA-477,358727697099655461,ORCUTTIA PILOSA,403 5 CHOME 5 BAN 4 GO,JP/HYōGO PREFECTURE|ITAMI,611024064274709314,POTENTILLA STIPULARIS-GROENLANDICA,185 TAYLOR VILLE APT. 562,US/SOUTH KRISTINASIDE| GA 19688,220107,USD,8893576.52,JPY,906000414.0,0,0.0,0.0,0,DPSUFRPP0,19046,DPSUFRPPJPY,24711,499388700.0,DPSUFRPPABVVUS6S,1966056
1462514,2022-01-07 22:20:00,fd160df8-3034-496e-872f-261bfeb55920,DECKJPJJ,WVOLDEMM,PETX22-FXIYA-477,358727697099655461,ORCUTTIA PILOSA,403 5 CHOME 5 BAN 4 GO,JP/HYōGO PREFECTURE|ITAMI,611024064274709314,POTENTILLA STIPULARIS-GROENLANDICA,185 TAYLOR VILLE APT. 562,US/SOUTH KRISTINASIDE| GA 19688,220107,EUR,7945679.36,JPY,906000414.0,0,0.0,0.0,22,DECKJPJJ22,12970,DECKJPJJJPY,34558,724210300000.0,DECKJPJJWVOLDEMM,27038
1465444,2022-01-07 23:07:00,fd160df8-3034-496e-872f-261bfeb55920,WVOLDEMM,DPSUFRPP,WVOL22-FXIYA-477,358727697099655461,ORCUTTIA PILOSA,403 5 CHOME 5 BAN 4 GO,JP/HYōGO PREFECTURE|ITAMI,611024064274709314,POTENTILLA STIPULARIS-GROENLANDICA,185 TAYLOR VILLE APT. 562,US/SOUTH KRISTINASIDE| GA 19688,220107,EUR,7945669.36,JPY,906000414.0,0,0.0,0.0,23,WVOLDEMM23,15318,WVOLDEMMJPY,27014,924740600000.0,WVOLDEMMDPSUFRPP,504048


In [45]:
swift[swift['UETR']== '34847a25-a966-4104-8bb3-c065ff3ba773']

Unnamed: 0,Timestamp,UETR,Sender,Receiver,TransactionReference,OrderingAccount,OrderingName,OrderingStreet,OrderingCountryCityZip,BeneficiaryAccount,BeneficiaryName,BeneficiaryStreet,BeneficiaryCountryCityZip,SettlementDate,SettlementCurrency,SettlementAmount,InstructedCurrency,InstructedAmount,Label,order_flag,bene_flag,hour,sender_hour,sender_hour_freq,sender_currency,sender_currency_freq,sender_currency_amount_average,sender_receiver,sender_receiver_freq
1710724,2022-01-09 07:23:00,34847a25-a966-4104-8bb3-c065ff3ba773,ABVVUS6S,DPSUFRPP,PETX22-FXIYA-702,611024064274707123,ELAEAGNUS COMMUTATA,12916 MILLER RAPID,US/LAKE DAVID| WY 56808,358727697099657132,CAREX JAMESII,647 7 CHOME 7 BAN 8 GO,JP/SHIZUOKA PREFECTURE|FUJINOMIYA,220109,EUR,100189200.0,USD,112143700.0,0,0.0,0.0,7,ABVVUS6S7,59558,ABVVUS6SUSD,422089,110353900.0,ABVVUS6SDPSUFRPP,145498
1727129,2022-01-09 08:16:00,34847a25-a966-4104-8bb3-c065ff3ba773,DPSUFRPP,WVOLDEMM,DPSU22-FXIYA-702,611024064274707123,ELAEAGNUS COMMUTATA,12916 MILLER RAPID,US/LAKE DAVID| WY 56808,358727697099657132,CAREX JAMESII,647 7 CHOME 7 BAN 8 GO,JP/SHIZUOKA PREFECTURE|FUJINOMIYA,220109,EUR,100189200.0,USD,112143700.0,0,0.0,0.0,8,DPSUFRPP8,195031,DPSUFRPPUSD,145608,76516370.0,DPSUFRPPWVOLDEMM,145528
1758302,2022-01-09 09:52:00,34847a25-a966-4104-8bb3-c065ff3ba773,WVOLDEMM,DECKJPJJ,WVOL22-FXIYA-702,611024064274707123,ELAEAGNUS COMMUTATA,12916 MILLER RAPID,US/LAKE DAVID| WY 56808,358727697099657132,CAREX JAMESII,647 7 CHOME 7 BAN 8 GO,JP/SHIZUOKA PREFECTURE|FUJINOMIYA,220109,JPY,11417560000.0,USD,112143700.0,0,0.0,0.0,9,WVOLDEMM9,120171,WVOLDEMMUSD,270804,100286800.0,WVOLDEMMDECKJPJJ,196086


In [35]:
swift[swift['Label']==1].UETR.nunique()

5608

In [37]:
len(swift[swift['Label']==1])

5661

In [None]:
4900 + 761 

In [42]:
swift.columns

Index(['Timestamp', 'UETR', 'Sender', 'Receiver', 'TransactionReference',
       'OrderingAccount', 'OrderingName', 'OrderingStreet',
       'OrderingCountryCityZip', 'BeneficiaryAccount', 'BeneficiaryName',
       'BeneficiaryStreet', 'BeneficiaryCountryCityZip', 'SettlementDate',
       'SettlementCurrency', 'SettlementAmount', 'InstructedCurrency',
       'InstructedAmount', 'Label', 'order_flag', 'bene_flag', 'hour',
       'sender_hour', 'sender_hour_freq', 'sender_currency',
       'sender_currency_freq', 'sender_currency_amount_average',
       'sender_receiver', 'sender_receiver_freq'],
      dtype='object')

## Add Features for Model Training

In [10]:
%%time
# Hour
swift["hour"] = swift["Timestamp"].dt.hour

# Hour frequency for each sender
senders = swift["Sender"].unique()
swift["sender_hour"] = swift["Sender"] + swift["hour"].astype(str)
sender_hour_frequency = {}
for s in senders:
    sender_rows = swift[swift["Sender"] == s]
    for h in range(24):
        sender_hour_frequency[s + str(h)] = len(sender_rows[sender_rows["hour"] == h])

swift["sender_hour_freq"] = swift["sender_hour"].map(sender_hour_frequency)


CPU times: user 6.93 s, sys: 700 ms, total: 7.63 s
Wall time: 7.64 s


In [11]:
%%time
# Sender-Currency Frequency and Average Amount per Sender-Currency
swift["sender_currency"] = swift["Sender"] + swift["InstructedCurrency"]

sender_currency_freq = {}
sender_currency_avg = {}

for sc in set(
    list(swift["sender_currency"].unique())):
    sender_currency_freq[sc] = len(swift[swift["sender_currency"] == sc])
    sender_currency_avg[sc] = swift[swift["sender_currency"] == sc][
        "InstructedAmount"
    ].mean()

swift["sender_currency_freq"] = swift["sender_currency"].map(sender_currency_freq)
swift["sender_currency_amount_average"] = swift["sender_currency"].map(
    sender_currency_avg
)


CPU times: user 19.2 s, sys: 676 ms, total: 19.9 s
Wall time: 20 s


In [12]:
%%time
# Sender-Receiver Frequency
swift["sender_receiver"] = swift["Sender"] + swift["Receiver"]

sender_receiver_freq = {}

for sr in set(
    list(swift["sender_receiver"].unique())
):
    sender_receiver_freq[sr] = len(swift[swift["sender_receiver"] == sr])

swift["sender_receiver_freq"] = swift["sender_receiver"].map(sender_receiver_freq)


CPU times: user 21.6 s, sys: 709 ms, total: 22.3 s
Wall time: 22.3 s


In [97]:
len(swift['BeneficiaryAccount'].unique())

228006

In [98]:
len(swift['OrderingAccount'].unique())

329823

In [99]:
len(bank['Account'].unique())

527503

In [112]:
order_miss = list(swift[swift['order_flag'].isnull()].OrderingAccount.unique())
bene_miss = list(swift[swift['bene_flag'].isnull()].BeneficiaryAccount.unique())

In [116]:
set(order_miss).intersection(bene_miss)

set()

In [14]:
acc_flag = pd.Series(bank.Flags.values,index=bank.Account).to_dict()
swift['order_flag'] = swift['OrderingAccount'].map(acc_flag)
swift['bene_flag'] = swift['BeneficiaryAccount'].map(acc_flag)

In [12]:
# Account previously used

In [13]:
train.head(3)

Unnamed: 0_level_0,Timestamp,UETR,Sender,Receiver,TransactionReference,OrderingAccount,OrderingName,OrderingStreet,OrderingCountryCityZip,BeneficiaryAccount,BeneficiaryName,BeneficiaryStreet,BeneficiaryCountryCityZip,SettlementDate,SettlementCurrency,SettlementAmount,InstructedCurrency,InstructedAmount,Label,hour,sender_hour,sender_hour_freq,sender_currency,sender_currency_freq,sender_currency_amount_average,sender_receiver,sender_receiver_freq
MessageId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
TRA7CGN3FF,2022-01-01,f474fdb3-4675-4fff-ab7e-3469f82bd6a7,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-7054,FR90714755422956984353,PHACELIA HETEROPHYLLA,3| RUE HAMON,FR/42859 SAINTE AURÉLIE,611024064274704358,PAPAVER CALIFORNICUM,2584 CHARLES PLACE,US/ROJASLAND| DC 58442,220101,USD,1746319000.0,EUR,1560189000.0,0,0,DPSUFRPP0,16519,DPSUFRPPEUR,1598751,169246500.0,DPSUFRPPABVVUS6S,1708051
TRPNEMZIR7,2022-01-01,c9158def-dab1-4bfb-a31f-7f51c6679d60,BRRGPTPL,CBLHESMM,PETX22-NO-FX-1736,PT8895792452733129969,GONOLOBUS STEPHANOTRICHUS,AV RITA ALVES| 60,PT/5863-752 CANTANHEDE,ES61897100852916932423,MINUARTIA NUTTALLII-GREGARIA,ACCESO DE CARMINA ARAGÓN 83 PUERTA 4,ES/ÁVILA| 02281,220101,EUR,4711420.0,EUR,4711420.0,0,0,BRRGPTPL0,4214,BRRGPTPLEUR,36690,1667354.0,BRRGPTPLCBLHESMM,36690
TR6S6A5JYL,2022-01-01,d371ba0a-823f-4243-98ba-94ff18523420,BRRGPTPL,CBLHESMM,PETX22-NO-FX-1687,PT92895792452733126420,LECHEA INTERMEDIA-INTERMEDIA,PRAÇA VALENTE| 85,PT/1100-087 BARCELOS,ES31897100852916935097,ASTRAGALUS MAGDALENAE,PASADIZO ANÍBAL LUJÁN 57,ES/SEGOVIA| 40727,220101,EUR,752821.6,EUR,752821.6,0,0,BRRGPTPL0,4214,BRRGPTPLEUR,36690,1667354.0,BRRGPTPLCBLHESMM,36690


In [124]:
swift.head()

Unnamed: 0_level_0,Timestamp,UETR,Sender,Receiver,TransactionReference,OrderingAccount,OrderingName,OrderingStreet,OrderingCountryCityZip,BeneficiaryAccount,BeneficiaryName,BeneficiaryStreet,BeneficiaryCountryCityZip,SettlementDate,SettlementCurrency,SettlementAmount,InstructedCurrency,InstructedAmount,Label,order_flag,bene_flag,hour,sender_hour,sender_hour_freq,sender_currency,sender_receiver,sender_receiver_freq,sender_currency_freq,sender_currency_amount_average
MessageId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
TRA7CGN3FF,2022-01-01,f474fdb3-4675-4fff-ab7e-3469f82bd6a7,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-7054,FR90714755422956984353,PHACELIA HETEROPHYLLA,3| RUE HAMON,FR/42859 SAINTE AURÉLIE,611024064274704358,PAPAVER CALIFORNICUM,2584 CHARLES PLACE,US/ROJASLAND| DC 58442,220101,USD,1746319000.0,EUR,1560189000.0,0,0.0,0.0,0,DPSUFRPP0,19046,DPSUFRPPEUR,DPSUFRPPABVVUS6S,1966056,1840201,169010400.0
TRPNEMZIR7,2022-01-01,c9158def-dab1-4bfb-a31f-7f51c6679d60,BRRGPTPL,CBLHESMM,PETX22-NO-FX-1736,PT8895792452733129969,GONOLOBUS STEPHANOTRICHUS,AV RITA ALVES| 60,PT/5863-752 CANTANHEDE,ES61897100852916932423,MINUARTIA NUTTALLII-GREGARIA,ACCESO DE CARMINA ARAGÓN 83 PUERTA 4,ES/ÁVILA| 02281,220101,EUR,4711420.0,EUR,4711420.0,0,0.0,0.0,0,BRRGPTPL0,4868,BRRGPTPLEUR,BRRGPTPLCBLHESMM,42551,42551,1673719.0
TR6S6A5JYL,2022-01-01,d371ba0a-823f-4243-98ba-94ff18523420,BRRGPTPL,CBLHESMM,PETX22-NO-FX-1687,PT92895792452733126420,LECHEA INTERMEDIA-INTERMEDIA,PRAÇA VALENTE| 85,PT/1100-087 BARCELOS,ES31897100852916935097,ASTRAGALUS MAGDALENAE,PASADIZO ANÍBAL LUJÁN 57,ES/SEGOVIA| 40727,220101,EUR,752821.6,EUR,752821.6,0,0.0,0.0,0,BRRGPTPL0,4868,BRRGPTPLEUR,BRRGPTPLCBLHESMM,42551,42551,1673719.0
TRT4VPEHAK,2022-01-01,5a53a257-4dc9-4800-abb2-4cd1d55c8345,DPSUFRPP,ABVVUS6S,DPSU22-FXIYA-517,358727697099645998,SCLERANTHUS,341 4 CHOME 4 BAN 2 GO,JP/FUKUOKA PREFECTURE|ŌKAWA,611024064274698543,SYNGONIUM PODOPHYLLUM,7864 MORRIS MEWS APT. 464,US/DPO AE 78549,220101,USD,6371209.0,JPY,649048700.0,0,0.0,0.0,0,DPSUFRPP0,19046,DPSUFRPPJPY,DPSUFRPPABVVUS6S,1966056,24711,499388700.0
TRTADQPGAV,2022-01-01,f27867ac-35e2-46af-8248-0a2d0d9bf00d,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-11878,FR71714755422956985471,SELAGINELLA ASPRELLA,28| BOULEVARD LÉVÊQUE,FR/36357 TURPIN,611024064274707099,GALACTIA PARVIFOLIA,363 ROBERT GARDENS,US/NEW KAREN| MS 49461,220101,USD,5179423.0,EUR,4627377.0,0,0.0,0.0,0,DPSUFRPP0,19046,DPSUFRPPEUR,DPSUFRPPABVVUS6S,1966056,1840201,169010400.0


In [15]:
# Exclude below categorical columns for training and testing

columns_to_drop = [
    "UETR",
    "Sender",
    "Receiver",
    "TransactionReference",
    "OrderingAccount",
    "OrderingName",
    "OrderingStreet",
    "OrderingCountryCityZip",
    "BeneficiaryAccount",
    "BeneficiaryName",
    "BeneficiaryStreet",
    "BeneficiaryCountryCityZip",
    "SettlementDate",
    "SettlementCurrency",
    "InstructedCurrency",
    "Timestamp",
    "sender_hour",
    "sender_currency",
    "sender_receiver",
]

data = swift.copy().drop(columns_to_drop, axis=1)
data.head(3)

Unnamed: 0,SettlementAmount,InstructedAmount,Label,order_flag,bene_flag,hour,sender_hour_freq,sender_currency_freq,sender_currency_amount_average,sender_receiver_freq
0,1746319000.0,1560189000.0,0,0.0,0.0,0,19046,1840201,169010400.0,1966056
1,4711420.0,4711420.0,0,0.0,0.0,0,4868,42551,1673719.0,42551
2,752821.6,752821.6,0,0.0,0.0,0,4868,42551,1673719.0,42551


In [126]:
swift[swift["Label"] == 1]

Unnamed: 0_level_0,Timestamp,UETR,Sender,Receiver,TransactionReference,OrderingAccount,OrderingName,OrderingStreet,OrderingCountryCityZip,BeneficiaryAccount,BeneficiaryName,BeneficiaryStreet,BeneficiaryCountryCityZip,SettlementDate,SettlementCurrency,SettlementAmount,InstructedCurrency,InstructedAmount,Label,order_flag,bene_flag,hour,sender_hour,sender_hour_freq,sender_currency,sender_receiver,sender_receiver_freq,sender_currency_freq,sender_currency_amount_average
MessageId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
TRHBNUNW24,2022-01-01 00:21:00,21109ecb-b82e-428c-909c-7d196b7e0833,ABVVUS6S,WVOLDEMM,PETX22-FXIDA-4472,611024064274693544,PACKERA CONTERMINA,658 SHERRY FERRY APT. 030,US/PETERFURT| AL 25220,DE31020224692198589228,RUMEX SANGUINEUS,THERES-PUTZ-GASSE 17,DE/52093 KULMBACH,220101,EUR,1.516912e+08,USD,1.697910e+08,1,0.0,0.0,0,ABVVUS6S0,51,ABVVUS6SUSD,ABVVUS6SWVOLDEMM,276604,422089,1.103539e+08
TR5PG7D9PB,2022-01-01 01:01:00,1864003d-418e-46e1-bd62-972099e8515f,ZOUOGB22,WMVZGB2L,PETX22-FXIDA-5143,GB94679059808029746046,GILIA BRECCIARUM-NEGLECTA,FLAT 78 O'BRIEN SKYWAY RYANPORT,GB/L4 8DU,199377071818559934,LYSIMACHIA ◊RADFORDII,566 HENRY MOUNTAINS BAKERBOROUGH,GB/S6K 3WH,220101,GBP,2.433016e+06,GBP,2.433016e+06,1,0.0,0.0,1,ZOUOGB221,72,ZOUOGB22GBP,ZOUOGB22WMVZGB2L,307700,608771,1.674291e+06
TRO5CCVOGF,2022-01-01 01:04:00,b60d7297-c127-4fcb-8cd7-bac07f49c148,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-34626,FR37714755422957005677,AVICENNIA MARINA-RESINIFERA,99| AVENUE NICOLAS,FR/21079 REGNIER,61102406474705654,EUPHORBIA TRICHOTOMA,1510 JONES WAY APT. 019,US/HARRISSTAD| MD 35600,220101,USD,3.768829e+06,EUR,3.367130e+06,1,0.0,,1,DPSUFRPP1,33584,DPSUFRPPEUR,DPSUFRPPABVVUS6S,1966056,1840201,1.690104e+08
TRKMGKXV70,2022-01-01 01:33:00,509b76ef-e17f-4866-9d04-10e122c4648b,ZOUOGB22,WMVZGB2L,PETX22-FXIDA-8515,GB76679059808029766934,TRITELEIA IXIOIDES-COOKII,FLAT 1 FRANCIS BRIDGE WHITETOWN,GB/CT11 9GU,199377071818556353,GILIA BRECCIARUM-JACENS,9 ANNETTE COVE SOUTH TINABURY,GB/S46 8WY,220101,GBP,9.325149e+05,GBP,9.325149e+05,1,0.0,0.0,1,ZOUOGB221,72,ZOUOGB22GBP,ZOUOGB22WMVZGB2L,307700,608771,1.674291e+06
TR93KY2K2Z,2022-01-01 01:38:00,b81293d2-3d02-4eb1-b151-2b34f31623f6,ZOUOGB22,WMVZGB2L,PETX22-FXIDA-9651,GB54679059808029759182,HEUCHERA PARVIFOLIA-PARVIFOLIA,FLAT 8 BURROWS MALL LEONARDSHIRE,GB/SO78 9ER,199377071818547405,PASPALUM BIFIDUM,87 WEBB DAM PAMELAMOUTH,GB/SR06 3RT,220101,GBP,1.959181e+06,GBP,1.959181e+06,1,0.0,0.0,1,ZOUOGB221,72,ZOUOGB22GBP,ZOUOGB22WMVZGB2L,307700,608771,1.674291e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TEIFN9H1VG,2022-01-28 04:53:00,c87a8c77-63e3-43fb-9586-52b9a9a514be,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-260,FR71714755422956974025,CYNOGLOSSUM FURCATUM,43| AVENUE VINCENT,FR/88504 SAINT NOÉMI,6110240642746969XX,SANICULA MARIVERSA,47035 THOMPSON CLUB,"US/NEW LINDSEY, MA 70633",220128,USD,3.658144e+06,GBP,2.939922e+06,1,0.0,4.0,4,DPSUFRPP4,130291,DPSUFRPPGBP,DPSUFRPPABVVUS6S,1966056,251926,3.699564e+06
TEDKORH5M2,2022-01-29 11:27:00,a1a6a27e-6b58-47c6-b7a8-80972e3bb9b7,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-8525,FR2114755422956996847,LESQUERELLA UTAHENSIS,24| RUE DE RAYMOND,FR/62344 SAINT JULES,611024064274710012,MONARDELLA PALMERI,85893 MONTGOMERY PATH,US/HANNAHFURT| WY 32536,220129,USD,2.723516e+06,GBP,2.188794e+06,1,,0.0,11,DPSUFRPP11,635743,DPSUFRPPGBP,DPSUFRPPABVVUS6S,1966056,251926,3.699564e+06
TEFKDNVAKL,2022-01-27 22:51:00,1039e175-019f-465c-9cda-00022d6b11de,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-5439,FR11714755422956976851,SWALLENIA ALEXANDRAE,57| CHEMIN BENOÎT BRUNET,FR/20424 VOISIN,611024064274702714,EPIPACTIS GIGANTEA,186 BRADLEY HOLLOW SUITE 807,US/PORT SARAH| HI 27543,220127,USD,3.186411e+06,GBP,2.560806e+06,1,0.0,0.0,22,DPSUFRPP22,4810,DPSUFRPPGBP,DPSUFRPPABVVUS6S,1966056,251926,3.699564e+06
TE3DIV307H,2022-01-24 06:23:00,45e49da2-72d5-4fb0-9394-2143ad03703d,DPSUFRPP,ABVVUS6S,PETX22-FXIDA-17879,FR37714755422957001118,PHASEOLUS FILIFORMIS,4| BOULEVARD DE BODIN,FR/58826 ALEXANDRE-SUR-LAPORE,611024064274695205,ASTRAGALUS AMBLYTROPIS,UNIT 7596 BOX 6095,US/NEW COURTNEYSHIRE| ID 53170,220124,USD,4.485390e+06,EUR,4.007317e+06,1,0.0,0.0,6,DPSUFRPP6,160416,DPSUFRPPEUR,DPSUFRPPABVVUS6S,1966056,1840201,1.690104e+08


In [16]:
test[test["Label"] == 1]

Unnamed: 0_level_0,SettlementAmount,InstructedAmount,Label,hour,sender_hour_freq,sender_currency_freq,sender_currency_amount_average,sender_receiver_freq
MessageId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TEKE9CP94L,2.544254e+06,2.273076e+06,1,22,4174,1598751,1.692465e+08,1708051
TE25TS2GOM,1.454529e+08,1.628083e+08,1,4,43,366914,1.102004e+08,240731
TE43OHEG32,2.768278e+06,2.224767e+06,1,11,552300,58,2.861585e+08,1708051
TEHRG80QLN,4.762671e+06,4.255044e+06,1,15,63231,1598751,1.692465e+08,126189
TEGAOGU1QK,5.506012e+06,4.919156e+06,1,8,169510,1598751,1.692465e+08,1708051
...,...,...,...,...,...,...,...,...
TEIFN9H1VG,3.658144e+06,2.939922e+06,1,4,113181,218987,3.691763e+06,1708051
TEDKORH5M2,2.723516e+06,2.188794e+06,1,11,552300,218987,3.691763e+06,1708051
TEFKDNVAKL,3.186411e+06,2.560806e+06,1,22,4174,218987,3.691763e+06,1708051
TE3DIV307H,4.485390e+06,4.007317e+06,1,6,139377,1598751,1.692465e+08,1708051


# Separate Label and Normalize

In [60]:
imp = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value = 8)
data_imp = imp.fit_transform(data)

In [62]:
imp_csv = pd.DataFrame(data_imp, columns= data.columns)

In [64]:
imp_csv.to_csv('swift_w_flags.csv', index=False)

In [17]:
X_data = np.delete(data_imp, 2, 1)

In [18]:
X_train, X_test, Y_train, Y_test = train_test_split(X_data,data_imp[:,2], test_size=0.3, stratify = data_imp[:,2])

In [19]:
# Normalize
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [160]:
X_data[0]

array([1.74631905e+09, 1.56018856e+09, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 1.90460000e+04, 1.96605600e+06, 1.84020100e+06,
       1.69010396e+08])

In [20]:
X_train_o = np.delete(X_train, [2,3], 1)
X_test_o = np.delete(X_test, [2,3], 1)

# Random Forest

After adding only Risk Flag from the Bank data<br>
>AUPRC increased from 0.50 to 0.65<br>
>TP(class anomalies) increased 577 from to 893<br>
>Recall increased from 0.34 to 0.53<br>
 

In [156]:
%%time
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(max_depth=7, random_state=0, n_estimators=10)
kfold = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
cv_results = cross_val_score(rf, X_train, Y_train, cv=kfold, scoring="f1")

rf.fit(X_train, Y_train)
print("Minimum:", cv_results.min())
print("Maximum:", cv_results.max())
print("StanDev:", cv_results.std())

Minimum: 0.6490630323679727
Maximum: 0.6997558991049634
StanDev: 0.02116375333525937
CPU times: user 1min 47s, sys: 2 s, total: 1min 49s
Wall time: 1min 50s


In [157]:
pred_rf = rf.predict(X_test)
print("Random Forest Classification Report=\n\n", classification_report(Y_test, pred_rf))
print("Random Forest Confusion Matrix=\n\n", confusion_matrix(Y_test, pred_rf))
pred_proba_rf = rf.predict_proba(X_test)[:, 1]

print("AUPRC:", metrics.average_precision_score(y_true=Y_test, y_score=pred_proba_rf))

Random Forest Classification Report=

               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00   1617352
         1.0       1.00      0.53      0.69      1698

    accuracy                           1.00   1619050
   macro avg       1.00      0.76      0.84   1619050
weighted avg       1.00      1.00      1.00   1619050

Random Forest Confusion Matrix=

 [[1617351       1]
 [    805     893]]
AUPRC: 0.6494496668640578


In [165]:
%%time
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(max_depth=7, random_state=0, n_estimators=10)
kfold = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
cv_results = cross_val_score(rf, X_train_o, Y_train, cv=kfold, scoring="f1")

rf.fit(X_train_o, Y_train)
print("Minimum:", cv_results.min())
print("Maximum:", cv_results.max())
print("StanDev:", cv_results.std())

Minimum: 0.48863636363636365
Maximum: 0.5157699443413729
StanDev: 0.011650524648735628
CPU times: user 1min 29s, sys: 1.85 s, total: 1min 31s
Wall time: 1min 31s


In [166]:
pred_rf = rf.predict(X_test_o)
print("Random Forest Classification Report=\n\n", classification_report(Y_test, pred_rf))

Random Forest Classification Report=

               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00   1617352
         1.0       0.99      0.34      0.51      1698

    accuracy                           1.00   1619050
   macro avg       1.00      0.67      0.75   1619050
weighted avg       1.00      1.00      1.00   1619050



In [167]:
print("Random Forest Confusion Matrix=\n\n", confusion_matrix(Y_test, pred_rf))

Random Forest Confusion Matrix=

 [[1617349       3]
 [   1121     577]]


In [169]:
pred_proba_rf = rf.predict_proba(X_test_o)[:, 1]

print("AUPRC:", metrics.average_precision_score(y_true=Y_test, y_score=pred_proba_rf))

AUPRC: 0.49064999737588916


# XGBoost

After adding only the Risk Flag from the Bank data<br>
>AUPRC increased from 0.50 to 0.80<br>
>TP(class anomalies) increased 808 from to 1149<br>
>Recall increased from 0.60 to 0.68<br>

In [171]:
%%time

xgb = XGBClassifier(n_estimators=100)
kfold = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
cv_results = cross_val_score(xgb, X_train, Y_train, cv=kfold, scoring="f1")

xgb.fit(X_train, Y_train)
print("Minimum:", cv_results.min())
print("Maximum:", cv_results.max())
print("StanDev:", cv_results.std())

pred_xgb = xgb.predict(X_test)
print("XGBoost Classification Report=\n\n", classification_report(Y_test, pred_xgb))

print("XGBoost Confusion Matrix=\n\n", confusion_matrix(Y_test, pred_xgb))

pred_proba_xgb = xgb.predict_proba(X_test)[:, 1]

print("AUPRC:", metrics.average_precision_score(y_true=Y_test, y_score=pred_proba_xgb))

Minimum: 0.7468448403860432
Maximum: 0.7906976744186047
StanDev: 0.016093153120466653
XGBoost Classification Report=

               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00   1617352
         1.0       0.92      0.68      0.78      1698

    accuracy                           1.00   1619050
   macro avg       0.96      0.84      0.89   1619050
weighted avg       1.00      1.00      1.00   1619050

XGBoost Confusion Matrix=

 [[1617252     100]
 [    549    1149]]
AUPRC: 0.7938137502285596
CPU times: user 36min 19s, sys: 1min 31s, total: 37min 50s
Wall time: 5min 28s


In [172]:
%%time

xgb = XGBClassifier(n_estimators=100)
kfold = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
cv_results = cross_val_score(xgb, X_train_o, Y_train, cv=kfold, scoring="f1")

xgb.fit(X_train_o, Y_train)
print("Minimum:", cv_results.min())
print("Maximum:", cv_results.max())
print("StanDev:", cv_results.std())

pred_xgb = xgb.predict(X_test_o)
print("XGBoost Classification Report=\n\n", classification_report(Y_test, pred_xgb))

print("XGBoost Confusion Matrix=\n\n", confusion_matrix(Y_test, pred_xgb))

pred_proba_xgb = xgb.predict_proba(X_test_o)[:, 1]

print("AUPRC:", metrics.average_precision_score(y_true=Y_test, y_score=pred_proba_xgb))

Minimum: 0.5986622073578595
Maximum: 0.642681929681112
StanDev: 0.015974436781914864
XGBoost Classification Report=

               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00   1617352
         1.0       0.90      0.48      0.62      1698

    accuracy                           1.00   1619050
   macro avg       0.95      0.74      0.81   1619050
weighted avg       1.00      1.00      1.00   1619050

XGBoost Confusion Matrix=

 [[1617261      91]
 [    890     808]]
AUPRC: 0.5959007065416678
CPU times: user 30min 37s, sys: 1min 25s, total: 32min 2s
Wall time: 5min 15s


# Logistic Regression

After adding only the Risk Flag from the Bank data<br>
>AUPRC increased from 0 to 0.2<br>
>TP (class anomalies) increased 0 from to 341<br>
>Recall increased from 0.003 to 0.21<br>

In [176]:
LR = LogisticRegression()
kfold = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
cv_results = cross_val_score(LR, X_train, Y_train, cv=kfold, scoring="f1")

LR.fit(X_train, Y_train)
print("Minimum:", cv_results.min())
print("Maximum:", cv_results.max())
print("StanDev:", cv_results.std())

pred_lr = LR.predict(X_test)
print("XGBoost Classification Report=\n\n", classification_report(Y_test, pred_lr))

print("XGBoost Confusion Matrix=\n\n", confusion_matrix(Y_test, pred_lr))

pred_proba_lr = LR.predict_proba(X_test)[:, 1]

print("AUPRC:", metrics.average_precision_score(y_true=Y_test, y_score=pred_proba_lr))

Minimum: 0.3127659574468085
Maximum: 0.36024844720496896
StanDev: 0.019006278822346245
XGBoost Classification Report=

               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00   1617352
         1.0       1.00      0.20      0.33      1698

    accuracy                           1.00   1619050
   macro avg       1.00      0.60      0.67   1619050
weighted avg       1.00      1.00      1.00   1619050

XGBoost Confusion Matrix=

 [[1617352       0]
 [   1357     341]]
AUPRC: 0.2115593522018645


In [177]:
LR = LogisticRegression()
kfold = StratifiedKFold(n_splits=5, random_state=0, shuffle=True)
cv_results = cross_val_score(LR, X_train_o, Y_train, cv=kfold, scoring="f1")

LR.fit(X_train_o, Y_train)
print("Minimum:", cv_results.min())
print("Maximum:", cv_results.max())
print("StanDev:", cv_results.std())

pred_lr = LR.predict(X_test_o)
print("XGBoost Classification Report=\n\n", classification_report(Y_test, pred_lr))

print("XGBoost Confusion Matrix=\n\n", confusion_matrix(Y_test, pred_lr))

pred_proba_lr = LR.predict_proba(X_test_o)[:, 1]

print("AUPRC:", metrics.average_precision_score(y_true=Y_test, y_score=pred_proba_lr))

Minimum: 0.0
Maximum: 0.0
StanDev: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


XGBoost Classification Report=

               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00   1617352
         1.0       0.00      0.00      0.00      1698

    accuracy                           1.00   1619050
   macro avg       0.50      0.50      0.50   1619050
weighted avg       1.00      1.00      1.00   1619050

XGBoost Confusion Matrix=

 [[1617352       0]
 [   1698       0]]
AUPRC: 0.003137289660485082


# Ensemble

In [59]:
xgb = XGBClassifier(n_estimators=100)
xgb.fit(X_train, Y_train)
y_pred_nb = xgb.predict(X_test)

In [50]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(max_depth=7, random_state=0, n_estimators=10)

rf.fit(X_train, Y_train)
y_pred_rf = rf.predict(X_test)


In [52]:
y_pred_11 = [1 if (y_pred_nb[i] == 1 and y_pred_rf[i]== 1) else 0 for i in range(len(y_pred_nb))]

In [53]:
y_pred_01 = [1 if (y_pred_nb[i] == 1 or y_pred_rf[i]== 1) else 0 for i in range(len(y_pred_nb))]

In [54]:
y_pred_01

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [57]:
print("Both Confusion Matrix=\n\n", confusion_matrix(Y_test, y_pred_01))
print("Eitheror Confusion Matrix=\n\n", confusion_matrix(Y_test, y_pred_11))

Both Confusion Matrix=

 [[1617352       0]
 [   1373     325]]
Eitheror Confusion Matrix=

 [[1617349       3]
 [    838     860]]
