In [2]:
import pandas as pd
import datetime
from datetime import timedelta
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(rc={'figure.figsize':(11.7,8.27)})

In [5]:
df = pd.read_csv("vtf2020_module2_data.csv",  parse_dates=True)

In [6]:
df.count()

userID         2103296
reqDate        2103296
transID        2103296
bankCode       1051478
amount         2103296
transStatus    2103296
type           2103296
dtype: int64

In [7]:
df['reqDate']

0          2018-12-28 00:07:36.302
1          2018-12-28 00:08:35.218
2          2018-12-28 00:27:33.530
3          2018-12-28 01:00:55.220
4          2018-12-28 01:48:51.443
                    ...           
2103291    2018-12-14 13:17:52.888
2103292    2018-12-14 15:41:49.949
2103293    2018-12-30 08:57:41.758
2103294    2018-12-07 12:09:06.035
2103295    2018-12-06 19:37:38.320
Name: reqDate, Length: 2103296, dtype: object

In [8]:
df['reqDate'] = df['reqDate'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f').date())

# Bank Success Rate

In [9]:
df.loc[df['bankCode'] == 'BIDV']

Unnamed: 0,userID,reqDate,transID,bankCode,amount,transStatus,type
899399,b65832ffed27be9673e83962da3da6c8,2018-12-20,181220000142753,BIDV,790000,0,138
1466314,15e59ee761c854df56df4ac4c896ff40,2018-12-24,181224000019983,BIDV,117000,0,24
1520545,30884aff88caa1497b5e248dbc99226e,2018-12-26,181226000126720,BIDV,239000,0,237


In [10]:
sorted_date = sorted(df['reqDate'].unique())

In [11]:
bank_success = df.groupby(['bankCode', 'reqDate']).agg({
    'transStatus': 'sum',
    'transID': 'count'
})

bank_success.columns = ['successTransactions', 'totalTransactions']
bank_success['SuccessRate'] = bank_success['successTransactions'] / bank_success['totalTransactions']
bank_success

Unnamed: 0_level_0,Unnamed: 1_level_0,successTransactions,totalTransactions,SuccessRate
bankCode,reqDate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BIDV,2018-12-20,0,1,0.000000
BIDV,2018-12-24,0,1,0.000000
BIDV,2018-12-26,0,1,0.000000
CC,2018-12-26,0,1,0.000000
CC,2018-12-30,0,1,0.000000
...,...,...,...,...
ZPVCCB,2018-12-27,52,91,0.571429
ZPVCCB,2018-12-28,63,75,0.840000
ZPVCCB,2018-12-29,28,29,0.965517
ZPVCCB,2018-12-30,24,28,0.857143


In [12]:
bank_success_reset_index = bank_success.reset_index()
bank_success_reset_index

Unnamed: 0,bankCode,reqDate,successTransactions,totalTransactions,SuccessRate
0,BIDV,2018-12-20,0,1,0.000000
1,BIDV,2018-12-24,0,1,0.000000
2,BIDV,2018-12-26,0,1,0.000000
3,CC,2018-12-26,0,1,0.000000
4,CC,2018-12-30,0,1,0.000000
...,...,...,...,...,...
286,ZPVCCB,2018-12-27,52,91,0.571429
287,ZPVCCB,2018-12-28,63,75,0.840000
288,ZPVCCB,2018-12-29,28,29,0.965517
289,ZPVCCB,2018-12-30,24,28,0.857143


# RFM

In [14]:
max_date = max(df['reqDate'])
max_date

datetime.date(2018, 12, 31)

In [15]:
rfm = df.groupby(['userID']).agg({
    'reqDate': lambda x: (max_date + timedelta(1) - x.max()).days,
    'transID': 'count',
    'amount': 'sum'
})

In [16]:
rfm.columns = ['recency', 'frequency', 'monetary']

In [17]:
rfm['r-score'] = pd.qcut(rfm['recency'], q=4, labels=range(4, 0, -1))
rfm['f-score'] = pd.qcut(rfm['frequency'], q=4, labels=range(1, 5, 1))
rfm['m-score'] = pd.qcut(rfm['monetary'], q=4, labels=range(1, 5, 1))
rfm

Unnamed: 0_level_0,recency,frequency,monetary,r-score,f-score,m-score
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000084454cb2ae6275bb479b957d6b7b,18,7,591530,1,3,3
0000c78ffe9aaa92a4694f65e5465b24,11,2,350000,2,1,2
0000cce93f5aa9df5c0f5c9c002e2e75,20,2,1392600,1,1,4
0001081adfa0c6443ef076f85eeeee5c,11,3,383100,2,2,2
000169c037c452a0d0386f9d06933c5a,2,85,28827200,4,4,4
...,...,...,...,...,...,...
ffffa20c3d0c58f41dec3c5e99f848b2,9,2,211000,2,1,2
ffffa3034495807b1f393ef0a090d6e5,7,1,536000,3,1,3
ffffb1a8994d83f712c2819ab5e97173,11,2,12000,2,1,1
ffffe96889018bde56c719ae1b88711d,25,1,485000,1,1,3


In [18]:
rfm['RFM-score'] = rfm['r-score'].astype(str) + rfm['f-score'].astype(str) + rfm['m-score'].astype(str)

In [20]:
rfm_reset_index = rfm.reset_index()

In [21]:
rfm_reset_index

Unnamed: 0,userID,recency,frequency,monetary,r-score,f-score,m-score,RFM-score
0,000084454cb2ae6275bb479b957d6b7b,18,7,591530,1,3,3,133
1,0000c78ffe9aaa92a4694f65e5465b24,11,2,350000,2,1,2,212
2,0000cce93f5aa9df5c0f5c9c002e2e75,20,2,1392600,1,1,4,114
3,0001081adfa0c6443ef076f85eeeee5c,11,3,383100,2,2,2,222
4,000169c037c452a0d0386f9d06933c5a,2,85,28827200,4,4,4,444
...,...,...,...,...,...,...,...,...
311199,ffffa20c3d0c58f41dec3c5e99f848b2,9,2,211000,2,1,2,212
311200,ffffa3034495807b1f393ef0a090d6e5,7,1,536000,3,1,3,313
311201,ffffb1a8994d83f712c2819ab5e97173,11,2,12000,2,1,1,211
311202,ffffe96889018bde56c719ae1b88711d,25,1,485000,1,1,3,113


# AppID

In [33]:
appid = df.loc[~df['type'].isin([1,2,3,4])]
appid = appid.groupby(['userID', 'type']).agg({
    'amount': 'sum'
})
appid_reset_index = appid.reset_index()
appid[10:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,amount
userID,type,Unnamed: 2_level_1
0001e384498697abaf4e4006be5d7a66,24,583567
0002af59815e210f0d57af3b01c89867,61,95500
00030aea176d5f2e42ed1653636d16cf,328,270000
000316e2888097423207f37eb2875b4f,17,328524
00031adec38699bb6ce08bb08b0336d7,264,1918000
000373ad46ba53bb4578b51eea6b8eeb,15,46750
00049122f3a8eea8e60ecc0d21a4acb5,12,38800
00049122f3a8eea8e60ecc0d21a4acb5,328,500000
0004aeb79a693350a1112beecd20557a,15,205700
0004d164c8d1181e78095cd3b8958d3f,15,130900


In [30]:
appid_reset_index

Unnamed: 0,userID,type,amount
0,000084454cb2ae6275bb479b957d6b7b,61,391530
1,0000c78ffe9aaa92a4694f65e5465b24,328,250000
2,0000cce93f5aa9df5c0f5c9c002e2e75,118,692600
3,0001081adfa0c6443ef076f85eeeee5c,12,97000
4,0001081adfa0c6443ef076f85eeeee5c,61,286100
...,...,...,...
431079,ffffa20c3d0c58f41dec3c5e99f848b2,24,111000
431080,ffffa3034495807b1f393ef0a090d6e5,264,536000
431081,ffffe96889018bde56c719ae1b88711d,12,485000
431082,ffffe9cff443b9f021f1945a9b66ad8c,19,100000


# Users Transfer

In [26]:
ut = pd.read_csv('transfer_network_201812.csv', parse_dates=True)
ut

Unnamed: 0,sender,receiver,transID,reqDate,amount,type
0,192614cd07e29f4f1f78da281ebeb7ef,b616ace146ca031bfd5b63f08be2ebb5,181227000016480,2018-12-27 04:46:59.009,100000,ZaloPay
1,e230a20a8a1bdfb8a4ef7376689a92ae,0cb3c56df5febd13d5123e98f3031a06,181227000036940,2018-12-27 08:50:45.500,11000,ZaloPay
2,8ccccf41e5d1db2a9c0c85580ba452a3,f1aea120ad4b1b901561c0cb69f5af78,181227000039884,2018-12-27 09:05:58.860,102595,ZaloPay
3,177ad70e864b250986cd8cf9b6b445c0,5c98bc292facf20ee231c0e1602c29a1,181227000044600,2018-12-27 09:15:16.834,1820000,ZaloPay
4,b4fe7056bd4a9ba2bac7dbea137037db,a87188af4cd2e9707dc755f022d3b83c,181227000054879,2018-12-27 09:44:47.729,8000,ZaloPay
...,...,...,...,...,...,...
111246,aaada843937d95068536f9e7156678a9,0aae280061a6034e7fc6650013331bc9,181206000092235,2018-12-06 15:59:50.347,315000,ZaloPay
111247,2bcdd979b7b22286ff31fe40b1d10159,89ad1e4da9219d6f7882f0afb963a0e1,181206000119710,2018-12-06 20:19:03.521,100000,ZaloPay
111248,9101c2058fa8cd4e3316fd3facb6eb3d,ca2c5b84065e6efb059dec713b2188f5,181206000126342,2018-12-06 21:15:18.659,500000,ZaloPay
111249,a398d9e85817730be11885d7137b1506,174dc3e94591b12d04e22ef73756e223,181206000128069,2018-12-06 21:32:43.962,200000,ZaloPay


In [28]:
ut.groupby(['sender', 'receiver']).agg({
    'transID': 'count',
    'amount': 'sum'
})[:20]

Unnamed: 0_level_0,Unnamed: 1_level_0,transID,amount
sender,receiver,Unnamed: 2_level_1,Unnamed: 3_level_1
00055b984cdfbe6b799d5debf8323c64,43026332e53541a19627d074c1833200,1,1500
0006544b05ceb936b9ea7b2af41e8f22,060c23c14d40d3e85d6054783db38a02,1,20000
000708e22b25c950d014c17e0424cbfb,042c9ebffdff74530d48a92d3a6b9ea3,1,500000
00085550d7aa4e4f322a79effc6cc3fc,a6238e443bc03bfc46b3f6799c16ae2c,2,1033143
000c1d6349b283a51bc071e16a8ecb7a,9b3b61b0e9f6a17ad23750e4a6ad24a2,1,300000
000d69f7c9ef2652b631062b6376c606,c23a13b8204936d5a0e581ca7c5c9307,1,227000
000ea5580bdc773a6f7f3f77c8f11f17,c07123020748b6342ad5cafdc06662ab,1,25000
000ec49f2c8a4f9882db8167f6cc4542,b766a2b2775712111f73c354b258e1a2,1,50000
0010058c779f4507618e1a9d341e6929,1c54a9159553e1f321a9fa2b7f7e72b8,2,201000
0010058c779f4507618e1a9d341e6929,2606c7cba485fdbcd053fd9f01489ca6,2,163500
