# Computing Fraud Risk Score For New Accounts

In this script, we will import and compute the Fraud Risk Score (FRC) for any new accounts signup in last 91 days. 

In [1]:
import pandas as pd
import numpy as np
from datetime import date

from scipy import stats
get_ipython().magic(u'config IPCompleter.greedy=True')

import pickle

from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn import mixture
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix

## Import Data from AWS Redshift

In [2]:
################################## Import New Accounts Information ########################################

## Runing AWS SQL connector and scripts; and save the data in local disk
## Needs to run this for each WSD day model
%run ./wsm_x_days_test.ipynb   # Day 0X

## New user features data file path and details
path = "file-path"
file_name = "new_users_features_"
# today = str(date.today())

## User data file names and paths
data_day_X = path + file_name + "day_X_" + today + ".tsv"


## Import data from the local disk
df_day_X = pd.read_csv(data_day_X, sep="\t")

## Drop rows with NaN
df_day_X = df_day_X.dropna()

## Sorting columns
df_day_X_sort = df_day_X.reindex(sorted(df_day_X.columns), axis=1)

## Create a copy of the dataframe for analysis
df_day_X_ana = df_day_X_sort.copy()


## Data standarization: Mini-Max Scaler


In [8]:
##################################### Day 07: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_07 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_07.sav'
min_max_scaler_day_07 = pickle.load(open(path_minimax_scaler_day_07, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_07_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_07 = df_day_07_ana[column_names_to_normalize].values
x_scaled_day_07 = min_max_scaler_day_07.fit_transform(x_day_07)
df_day_07_scaled = pd.DataFrame(x_scaled_day_07, columns=column_names_to_normalize, index = df_day_07_ana.index)
df_day_07_ana[column_names_to_normalize] = df_day_07_scaled

In [9]:
# ## Get the column index
# col_names = list(df_day_07_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [10]:
##################################### Day 14: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_14 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_14.sav'
min_max_scaler_day_14 = pickle.load(open(path_minimax_scaler_day_14, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_14_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_14 = df_day_14_ana[column_names_to_normalize].values
x_scaled_day_14 = min_max_scaler_day_14.fit_transform(x_day_14)
df_day_14_scaled = pd.DataFrame(x_scaled_day_14, columns=column_names_to_normalize, index = df_day_14_ana.index)
df_day_14_ana[column_names_to_normalize] = df_day_14_scaled

In [11]:
# ## Get the column index
# col_names = list(df_day_14_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [12]:
##################################### Day 21: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_21 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_21.sav'
min_max_scaler_day_21 = pickle.load(open(path_minimax_scaler_day_21, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_21_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_21 = df_day_21_ana[column_names_to_normalize].values
x_scaled_day_21 = min_max_scaler_day_21.fit_transform(x_day_21)
df_day_21_scaled = pd.DataFrame(x_scaled_day_21, columns=column_names_to_normalize, index = df_day_21_ana.index)
df_day_21_ana[column_names_to_normalize] = df_day_21_scaled

In [13]:
# ## Get the column index
# col_names = list(df_day_21_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [14]:
##################################### Day 28: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_28 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_28.sav'
min_max_scaler_day_28 = pickle.load(open(path_minimax_scaler_day_28, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_28_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_28 = df_day_28_ana[column_names_to_normalize].values
x_scaled_day_28 = min_max_scaler_day_28.fit_transform(x_day_28)
df_day_28_scaled = pd.DataFrame(x_scaled_day_28, columns=column_names_to_normalize, index = df_day_28_ana.index)
df_day_28_ana[column_names_to_normalize] = df_day_28_scaled

In [15]:
## Get the column index
# col_names = list(df_day_28_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [16]:
##################################### Day 35: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_35 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_35.sav'
min_max_scaler_day_35 = pickle.load(open(path_minimax_scaler_day_35, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_35_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_35 = df_day_35_ana[column_names_to_normalize].values
x_scaled_day_35 = min_max_scaler_day_35.fit_transform(x_day_35)
df_day_35_scaled = pd.DataFrame(x_scaled_day_35, columns=column_names_to_normalize, index = df_day_35_ana.index)
df_day_35_ana[column_names_to_normalize] = df_day_35_scaled

In [17]:
## Get the column index
# col_names = list(df_day_35_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [18]:
##################################### Day 42: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_42 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_42.sav'
min_max_scaler_day_42 = pickle.load(open(path_minimax_scaler_day_42, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_42_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_42 = df_day_42_ana[column_names_to_normalize].values
x_scaled_day_42 = min_max_scaler_day_42.fit_transform(x_day_42)
df_day_42_scaled = pd.DataFrame(x_scaled_day_42, columns=column_names_to_normalize, index = df_day_42_ana.index)
df_day_42_ana[column_names_to_normalize] = df_day_42_scaled

In [19]:
## Get the column index
# col_names = list(df_day_42_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [20]:
##################################### Day 49: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_49 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_49.sav'
min_max_scaler_day_49 = pickle.load(open(path_minimax_scaler_day_49, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_49_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_49 = df_day_49_ana[column_names_to_normalize].values
x_scaled_day_49 = min_max_scaler_day_49.fit_transform(x_day_49)
df_day_49_scaled = pd.DataFrame(x_scaled_day_49, columns=column_names_to_normalize, index = df_day_49_ana.index)
df_day_49_ana[column_names_to_normalize] = df_day_49_scaled

In [21]:
## Get the column index
# col_names = list(df_day_49_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [22]:
##################################### Day 56: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_56 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_56.sav'
min_max_scaler_day_56 = pickle.load(open(path_minimax_scaler_day_56, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_56_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_56 = df_day_56_ana[column_names_to_normalize].values
x_scaled_day_56 = min_max_scaler_day_56.fit_transform(x_day_56)
df_day_56_scaled = pd.DataFrame(x_scaled_day_56, columns=column_names_to_normalize, index = df_day_56_ana.index)
df_day_56_ana[column_names_to_normalize] = df_day_56_scaled

In [23]:
# # Get the column index
# col_names = list(df_day_56_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [24]:
##################################### Day 63: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_63 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_63.sav'
min_max_scaler_day_63 = pickle.load(open(path_minimax_scaler_day_63, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_63_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_63 = df_day_63_ana[column_names_to_normalize].values
x_scaled_day_63 = min_max_scaler_day_63.fit_transform(x_day_63)
df_day_63_scaled = pd.DataFrame(x_scaled_day_63, columns=column_names_to_normalize, index = df_day_63_ana.index)
df_day_63_ana[column_names_to_normalize] = df_day_63_scaled

In [25]:
# ## Get the column index
# col_names = list(df_day_63_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [26]:
##################################### Day 70: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_70 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_70.sav'
min_max_scaler_day_70 = pickle.load(open(path_minimax_scaler_day_70, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_70_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_70 = df_day_70_ana[column_names_to_normalize].values
x_scaled_day_70 = min_max_scaler_day_70.fit_transform(x_day_70)
df_day_70_scaled = pd.DataFrame(x_scaled_day_70, columns=column_names_to_normalize, index = df_day_70_ana.index)
df_day_70_ana[column_names_to_normalize] = df_day_70_scaled

In [27]:
## Get the column index
# col_names = list(df_day_70_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [28]:
##################################### Day 77: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_77 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_77.sav'
min_max_scaler_day_77 = pickle.load(open(path_minimax_scaler_day_77, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_77_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_77 = df_day_77_ana[column_names_to_normalize].values
x_scaled_day_77 = min_max_scaler_day_77.fit_transform(x_day_77)
df_day_77_scaled = pd.DataFrame(x_scaled_day_77, columns=column_names_to_normalize, index = df_day_77_ana.index)
df_day_77_ana[column_names_to_normalize] = df_day_77_scaled

In [29]:
## Get the column index
# col_names = list(df_day_77_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [30]:
##################################### Day 84: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_84 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_84.sav'
min_max_scaler_day_84 = pickle.load(open(path_minimax_scaler_day_84, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_84_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_84 = df_day_84_ana[column_names_to_normalize].values
x_scaled_day_84 = min_max_scaler_day_84.fit_transform(x_day_84)
df_day_84_scaled = pd.DataFrame(x_scaled_day_84, columns=column_names_to_normalize, index = df_day_84_ana.index)
df_day_84_ana[column_names_to_normalize] = df_day_84_scaled

In [31]:
## Get the column index
# col_names = list(df_day_84_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [32]:
##################################### Day 91: Data Standarizationå ##############################################

# Load standarization parameter from the disk
path_minimax_scaler_day_91 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/minmax_scaler_gmm_day_91.sav'
min_max_scaler_day_91 = pickle.load(open(path_minimax_scaler_day_91, 'rb'))

# List of columns to normalize
column_names_to_not_normalize = ['admin_email', 'days_on_platform', 'effective_date', 
                                 'is_sales_managed', 'signup_date', 'systemid']
column_names_to_normalize = [x for x in list(df_day_91_ana) if x not in column_names_to_not_normalize ]

# Normalized all features columns except the 'systemid'
x_day_91 = df_day_91_ana[column_names_to_normalize].values
x_scaled_day_91 = min_max_scaler_day_91.fit_transform(x_day_91)
df_day_91_scaled = pd.DataFrame(x_scaled_day_91, columns=column_names_to_normalize, index = df_day_91_ana.index)
df_day_91_ana[column_names_to_normalize] = df_day_91_scaled

In [33]:
## Get the column index
# col_names = list(df_day_91_scaled)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

## Unsupervised (GMM) Clustering
### Predicting clusters
Now, we will predict clusters corresponding to each new users accounts.

In [34]:
##################################### Day 07: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_07 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_07_k7_model.sav'
model_gmm_day_07 = pickle.load(open(path_gmm_day_07, 'rb'))

# Predicting clustering
gmm_cluster_id_day_07 = model_gmm_day_07.predict(df_day_07_scaled)

# Adding clusters id of each account to the dataframe
df_day_07['gmm_cluster_id_day_07'] = gmm_cluster_id_day_07

In [35]:
# # Get the column index
# col_names = list(df_day_07)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [36]:
##################################### Day 14: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_14 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_14_k8_model.sav'
model_gmm_day_14 = pickle.load(open(path_gmm_day_14, 'rb'))

# Predicting clustering
gmm_cluster_id_day_14 = model_gmm_day_14.predict(df_day_14_scaled)

# Adding clusters id of each account to the dataframe
df_day_14['gmm_cluster_id_day_14'] = gmm_cluster_id_day_14

In [37]:
# # Get the column index
# col_names = list(df_day_14)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [38]:
##################################### Day 21: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_21 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_21_k6_model.sav'
model_gmm_day_21 = pickle.load(open(path_gmm_day_21, 'rb'))

# Predicting clustering
gmm_cluster_id_day_21 = model_gmm_day_21.predict(df_day_21_scaled)

# Adding clusters id of each account to the dataframe
df_day_21['gmm_cluster_id_day_21'] = gmm_cluster_id_day_21

In [39]:
# # Get the column index
# col_names = list(df_day_21)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [40]:
##################################### Day 28: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_28 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_28_k6_model.sav'
model_gmm_day_28 = pickle.load(open(path_gmm_day_28, 'rb'))

# Predicting clustering
gmm_cluster_id_day_28 = model_gmm_day_28.predict(df_day_28_scaled)

# Adding clusters id of each account to the dataframe
df_day_28['gmm_cluster_id_day_28'] = gmm_cluster_id_day_28

In [41]:
## Get the column index
# col_names = list(df_day_28)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [42]:
##################################### Day 35: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_35 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_35_k8_model.sav'
model_gmm_day_35 = pickle.load(open(path_gmm_day_35, 'rb'))

# Predicting clustering
gmm_cluster_id_day_35 = model_gmm_day_35.predict(df_day_35_scaled)

# Adding clusters id of each account to the dataframe
df_day_35['gmm_cluster_id_day_35'] = gmm_cluster_id_day_35

In [43]:
##################################### Day 42: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_42 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_42_k7_model.sav'
model_gmm_day_42 = pickle.load(open(path_gmm_day_42, 'rb'))

# Predicting clustering
gmm_cluster_id_day_42 = model_gmm_day_42.predict(df_day_42_scaled)

# Adding clusters id of each account to the dataframe
df_day_42['gmm_cluster_id_day_42'] = gmm_cluster_id_day_42

In [44]:
##################################### Day 49: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_49 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_49_k6_model.sav'
model_gmm_day_49 = pickle.load(open(path_gmm_day_49, 'rb'))

# Predicting clustering
gmm_cluster_id_day_49 = model_gmm_day_49.predict(df_day_49_scaled)

# Adding clusters id of each account to the dataframe
df_day_49['gmm_cluster_id_day_49'] = gmm_cluster_id_day_49

In [45]:
##################################### Day 56: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_56 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_56_k8_model.sav'
model_gmm_day_56 = pickle.load(open(path_gmm_day_56, 'rb'))

# Predicting clustering
gmm_cluster_id_day_56 = model_gmm_day_56.predict(df_day_56_scaled)

# Adding clusters id of each account to the dataframe
df_day_56['gmm_cluster_id_day_56'] = gmm_cluster_id_day_56

In [46]:
# # Get the column index
# col_names = list(df_day_56_sort)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [47]:
##################################### Day 63: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_63 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_63_k6_model.sav'
model_gmm_day_63 = pickle.load(open(path_gmm_day_63, 'rb'))

# Predicting clustering
gmm_cluster_id_day_63 = model_gmm_day_63.predict(df_day_63_scaled)

# Adding clusters id of each account to the dataframe
df_day_63['gmm_cluster_id_day_63'] = gmm_cluster_id_day_63

In [48]:
# # Get the column index
# col_names = list(df_day_63)
# L = len(col_names)

# for i in range(0, L):
#     print i, col_names[i]

In [49]:
##################################### Day 70: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_70 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_70_k8_model.sav'
model_gmm_day_70 = pickle.load(open(path_gmm_day_70, 'rb'))

# Predicting clustering
gmm_cluster_id_day_70 = model_gmm_day_70.predict(df_day_70_scaled)

# Adding clusters id of each account to the dataframe
df_day_70['gmm_cluster_id_day_70'] = gmm_cluster_id_day_70

In [50]:
##################################### Day 77: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_77 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_77_k6_model.sav'
model_gmm_day_77 = pickle.load(open(path_gmm_day_77, 'rb'))

# Predicting clustering
gmm_cluster_id_day_77 = model_gmm_day_77.predict(df_day_77_scaled)

# Adding clusters id of each account to the dataframe
df_day_77['gmm_cluster_id_day_77'] = gmm_cluster_id_day_77

In [51]:
##################################### Day 84: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_84 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_84_k8_model.sav'
model_gmm_day_84 = pickle.load(open(path_gmm_day_84, 'rb'))

# Predicting clustering
gmm_cluster_id_day_84 = model_gmm_day_84.predict(df_day_84_scaled)

# Adding clusters id of each account to the dataframe
df_day_84['gmm_cluster_id_day_84'] = gmm_cluster_id_day_84

In [52]:
##################################### Day 91: Predicting Clusters ##############################################

# Load the trained GMM models from disk
path_gmm_day_91 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_clustering_day_91_k7_model.sav'
model_gmm_day_91 = pickle.load(open(path_gmm_day_91, 'rb'))

# Predicting clustering
gmm_cluster_id_day_91 = model_gmm_day_91.predict(df_day_91_scaled)

# Adding clusters id of each account to the dataframe
df_day_91['gmm_cluster_id_day_91'] = gmm_cluster_id_day_91

## Filtering users accounts associated with the fraud clusters

From the analysis on the training data, our observation on the fraud risk clusters are in the follwing:
- **Day 07 (week 01):** Number of clusters - 6; Fraud risk cluster - C01
- **Day 14 (week 02):** Number of clusters - 6; Fraud risk cluster - C01
- **Day 21 (week 03):** Number of clusters - 8; Fraud risk cluster - C05
- **Day 28 (week 04):** Number of clusters - 8; Fraud risk cluster - C05
- **Day 35 (week 05):** Number of clusters - 7; Fraud risk cluster - C05
- **Day 42 (week 06):** Number of clusters - 6; Fraud risk cluster - C01
- **Day 49 (week 07):** Number of clusters - 8; Fraud risk cluster - C02
- **Day 56 (week 08):** Number of clusters - 7; Fraud risk cluster - C05
- **Day 63 (week 09):** Number of clusters - 7; Fraud risk cluster - C05
- **Day 70 (week 10):** Number of clusters - 8; Fraud risk cluster - C07
- **Day 77 (week 11):** Number of clusters - 6; Fraud risk cluster - C01
- **Day 84 (week 12):** Number of clusters - 6; Fraud risk cluster - C01
- **Day 91 (week 13):** Number of clusters - 7; Fraud risk cluster - C05


In [53]:
####################### Day 07: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c01 (fraud cluster)
df_day_07_gmm_c01 = df_day_07[df_day_07.gmm_cluster_id_day_07 == 1]

df_day_07_gmm_c01 = df_day_07_gmm_c01.reset_index() # resetting index
df_day_07_gmm_c01 = df_day_07_gmm_c01.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [54]:
# df_day_07_gmm_c01.head()
# df_day_07_gmm_c01.shape

In [55]:
####################### Day 14: Filtering Users Accounts Associated With Fraud Clusters #############


# The only the accounts labeled as cluster c07 (fraud cluster)
df_day_14_gmm_c07 = df_day_14[df_day_14.gmm_cluster_id_day_14 == 7]

df_day_14_gmm_c07 = df_day_14_gmm_c07.reset_index() # resetting index
df_day_14_gmm_c07 = df_day_14_gmm_c07.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [56]:
# df_day_14_gmm_c07.head()
# df_day_14_gmm_c07.shape

In [57]:
####################### Day 21: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c05 (fraud cluster)
df_day_21_gmm_c05 = df_day_21[df_day_21.gmm_cluster_id_day_21 == 5]

df_day_21_gmm_c05 = df_day_21_gmm_c05.reset_index() # resetting index
df_day_21_gmm_c05 = df_day_21_gmm_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [58]:
# df_day_21_gmm_c05.head()
# df_day_21_gmm_c05.shape

In [59]:
####################### Day 28: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c05 (fraud cluster)
df_day_28_gmm_c05 = df_day_28[df_day_28.gmm_cluster_id_day_28 == 5]

df_day_28_gmm_c05 = df_day_28_gmm_c05.reset_index() # resetting index
df_day_28_gmm_c05 = df_day_28_gmm_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [60]:
####################### Day 35: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c05 (fraud cluster)
df_day_35_gmm_c05 = df_day_35[df_day_35.gmm_cluster_id_day_35 == 5]

df_day_35_gmm_c05 = df_day_35_gmm_c05.reset_index() # resetting index
df_day_35_gmm_c05 = df_day_35_gmm_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [61]:
####################### Day 42: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c05 (fraud cluster)
df_day_42_gmm_c05 = df_day_42[df_day_42.gmm_cluster_id_day_42 == 5]

df_day_42_gmm_c05 = df_day_42_gmm_c05.reset_index() # resetting index
df_day_42_gmm_c05 = df_day_42_gmm_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [62]:
####################### Day 49: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c01 (fraud cluster)
df_day_49_gmm_c01 = df_day_49[df_day_49.gmm_cluster_id_day_49 == 1]

df_day_49_gmm_c01 = df_day_49_gmm_c01.reset_index() # resetting index
df_day_49_gmm_c01 = df_day_49_gmm_c01.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [63]:
####################### Day 56: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c05 (fraud cluster)
df_day_56_gmm_c05 = df_day_56[df_day_56.gmm_cluster_id_day_56 == 5]

df_day_56_gmm_c05 = df_day_56_gmm_c05.reset_index() # resetting index
df_day_56_gmm_c05 = df_day_56_gmm_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [64]:
####################### Day 63: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c04 (fraud cluster)
df_day_63_gmm_c04 = df_day_63[df_day_63.gmm_cluster_id_day_63 == 4]

df_day_63_gmm_c04 = df_day_63_gmm_c04.reset_index() # resetting index
df_day_63_gmm_c04 = df_day_63_gmm_c04.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [65]:
####################### Day 70: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c07 (fraud cluster)
df_day_70_gmm_c07 = df_day_70[df_day_70.gmm_cluster_id_day_70 == 7]

df_day_70_gmm_c07 = df_day_70_gmm_c07.reset_index() # resetting index
df_day_70_gmm_c07 = df_day_70_gmm_c07.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [66]:
####################### Day 77: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c01 (fraud cluster)
df_day_77_gmm_c01 = df_day_77[df_day_77.gmm_cluster_id_day_77 == 1]

df_day_77_gmm_c01 = df_day_77_gmm_c01.reset_index() # resetting index
df_day_77_gmm_c01 = df_day_77_gmm_c01.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [67]:
####################### Day 84: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c04 (fraud cluster)
df_day_84_gmm_c04 = df_day_84[df_day_84.gmm_cluster_id_day_84 == 4]

df_day_84_gmm_c04 = df_day_84_gmm_c04.reset_index() # resetting index
df_day_84_gmm_c04 = df_day_84_gmm_c04.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [68]:
####################### Day 91: Filtering Users Accounts Associated With Fraud Clusters #############

# The only the accounts labeled as cluster c01 (fraud cluster)
df_day_91_gmm_c01 = df_day_91[df_day_91.gmm_cluster_id_day_91 == 1]

df_day_91_gmm_c01 = df_day_91_gmm_c01.reset_index() # resetting index
df_day_91_gmm_c01 = df_day_91_gmm_c01.drop(columns=['index'], axis=1) # droping adding columns during the resetting

## Neural Network (NN) Classifier
### Predicting Class

In [69]:
##################################### Day 07: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_07 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_07_k7_model.sav'
model_nn_day_07 = pickle.load(open(path_nn_day_07, 'rb'))

# Predicting clustering
nn_classifier_id_day_07 = model_nn_day_07.predict(df_day_07_scaled)

# Adding clusters id of each account to the dataframe
df_day_07['nn_classifier_id_day_07'] = nn_classifier_id_day_07

In [70]:
# df_day_07.head()

In [71]:
##################################### Day 14: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_14 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_14_k8_model.sav'
model_nn_day_14 = pickle.load(open(path_nn_day_14, 'rb'))

# Predicting clustering
nn_classifier_id_day_14 = model_nn_day_14.predict(df_day_14_scaled)

# Adding clusters id of each account to the dataframe
df_day_14['nn_classifier_id_day_14'] = nn_classifier_id_day_14

In [72]:
# df_day_14.head()

In [73]:
##################################### Day 21: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_21 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_21_k6_model.sav'
model_nn_day_21 = pickle.load(open(path_nn_day_21, 'rb'))

# Predicting clustering
nn_classifier_id_day_21 = model_nn_day_21.predict(df_day_21_scaled)

# Adding clusters id of each account to the dataframe
df_day_21['nn_classifier_id_day_21'] = nn_classifier_id_day_21

In [74]:
# df_day_21.head()

In [75]:
##################################### Day 28: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_28 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_28_k6_model.sav'
model_nn_day_28 = pickle.load(open(path_nn_day_28, 'rb'))

# Predicting clustering
nn_classifier_id_day_28 = model_nn_day_28.predict(df_day_28_scaled)

# Adding clusters id of each account to the dataframe
df_day_28['nn_classifier_id_day_28'] = nn_classifier_id_day_28

In [76]:
##################################### Day 35: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_35 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_35_k8_model.sav'
model_nn_day_35 = pickle.load(open(path_nn_day_35, 'rb'))

# Predicting clustering
nn_classifier_id_day_35 = model_nn_day_35.predict(df_day_35_scaled)

# Adding clusters id of each account to the dataframe
df_day_35['nn_classifier_id_day_35'] = nn_classifier_id_day_35

In [77]:
##################################### Day 42: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_42 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_42_k7_model.sav'
model_nn_day_42 = pickle.load(open(path_nn_day_42, 'rb'))

# Predicting clustering
nn_classifier_id_day_42 = model_nn_day_42.predict(df_day_42_scaled)

# Adding clusters id of each account to the dataframe
df_day_42['nn_classifier_id_day_42'] = nn_classifier_id_day_42

In [78]:
##################################### Day 49: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_49 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_49_k6_model.sav'
model_nn_day_49 = pickle.load(open(path_nn_day_49, 'rb'))

# Predicting clustering
nn_classifier_id_day_49 = model_nn_day_49.predict(df_day_49_scaled)

# Adding clusters id of each account to the dataframe
df_day_49['nn_classifier_id_day_49'] = nn_classifier_id_day_49

In [79]:
##################################### Day 56: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_56 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_56_k8_model.sav'
model_nn_day_56 = pickle.load(open(path_nn_day_56, 'rb'))

# Predicting clustering
nn_classifier_id_day_56 = model_nn_day_56.predict(df_day_56_scaled)

# Adding clusters id of each account to the dataframe
df_day_56['nn_classifier_id_day_56'] = nn_classifier_id_day_56

In [80]:
##################################### Day 63: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_63 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_63_k6_model.sav'
model_nn_day_63 = pickle.load(open(path_nn_day_63, 'rb'))

# Predicting clustering
nn_classifier_id_day_63 = model_nn_day_63.predict(df_day_63_scaled)

# Adding clusters id of each account to the dataframe
df_day_63['nn_classifier_id_day_63'] = nn_classifier_id_day_63

In [81]:
##################################### Day 70: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_70 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_70_k8_model.sav'
model_nn_day_70 = pickle.load(open(path_nn_day_70, 'rb'))

# Predicting clustering
nn_classifier_id_day_70 = model_nn_day_70.predict(df_day_70_scaled)

# Adding clusters id of each account to the dataframe
df_day_70['nn_classifier_id_day_70'] = nn_classifier_id_day_70

In [82]:
##################################### Day 77: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_77 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_77_k6_model.sav'
model_nn_day_77 = pickle.load(open(path_nn_day_77, 'rb'))

# Predicting clustering
nn_classifier_id_day_77 = model_nn_day_77.predict(df_day_77_scaled)

# Adding clusters id of each account to the dataframe
df_day_77['nn_classifier_id_day_77'] = nn_classifier_id_day_77

In [83]:
##################################### Day 84: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_84 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_84_k8_model.sav'
model_nn_day_84 = pickle.load(open(path_nn_day_84, 'rb'))

# Predicting clustering
nn_classifier_id_day_84 = model_nn_day_84.predict(df_day_84_scaled)

# Adding clusters id of each account to the dataframe
df_day_84['nn_classifier_id_day_84'] = nn_classifier_id_day_84

In [84]:
##################################### Day 91: NN Classifier  ##################################################

# Load the trained GMM models from disk (pikled file)
path_nn_day_91 = '/Users/dwahid/Documents/GitHub/fraud_detection/src/saved_models/fraud_detection_nn_classifier_day_91_k7_model.sav'
model_nn_day_91 = pickle.load(open(path_nn_day_91, 'rb'))

# Predicting clustering
nn_classifier_id_day_91 = model_nn_day_91.predict(df_day_91_scaled)

# Adding clusters id of each account to the dataframe
df_day_91['nn_classifier_id_day_91'] = nn_classifier_id_day_91

## Filtering users accounts associated with the fraud clusters

In [85]:
# The only the accounts labeled as cluster c01 (fraud cluster)
df_day_07_nn_c01 = df_day_07[df_day_07.nn_classifier_id_day_07 == 1]

## Re-indexing
df_day_07_nn_c01 = df_day_07_nn_c01.reset_index() # resetting index
df_day_07_nn_c01 = df_day_07_nn_c01.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [86]:
# df_day_07_nn_c01.tail()
# df_day_07_nn_c01.shape

In [87]:
# The only the accounts labeled as cluster c07 (fraud cluster)
df_day_14_nn_c07 = df_day_14[df_day_14.nn_classifier_id_day_14 == 7]

## Re-indexing
df_day_14_nn_c07 = df_day_14_nn_c07.reset_index() # resetting index
df_day_14_nn_c07 = df_day_14_nn_c07.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [88]:
# df_day_14_nn_c07.tail()
# df_day_14_nn_c07.shape

In [89]:
# The only the accounts labeled as cluster c05 (fraud cluster)
df_day_21_nn_c05 = df_day_21[df_day_21.nn_classifier_id_day_21 == 5]

## Re-indexing
df_day_21_nn_c05 = df_day_21_nn_c05.reset_index() # resetting index
df_day_21_nn_c05 = df_day_21_nn_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [90]:
# df_day_21_nn_c05.tail()
# df_day_21_nn_c05.shape

In [91]:
# The only the accounts labeled as cluster c05 (fraud cluster)
df_day_28_nn_c05 = df_day_28[df_day_28.nn_classifier_id_day_28 == 5]
 
## Re-indexing
df_day_28_nn_c05 = df_day_28_nn_c05.reset_index() # resetting index
df_day_28_nn_c05 = df_day_28_nn_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [92]:
# The only the accounts labeled as cluster c01 (fraud cluster)
df_day_35_nn_c05 = df_day_35[df_day_35.nn_classifier_id_day_35 == 5]
 
## Re-indexing
df_day_35_nn_c05 = df_day_35_nn_c05.reset_index() # resetting index
df_day_35_nn_c05 = df_day_35_nn_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [93]:
# The only the accounts labeled as cluster c05 (fraud cluster)
df_day_42_nn_c05 = df_day_42[df_day_42.nn_classifier_id_day_42 == 5]
 
## Re-indexing
df_day_42_nn_c05 = df_day_42_nn_c05.reset_index() # resetting index
df_day_42_nn_c05 = df_day_42_nn_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [94]:
## The only the accounts labeled as cluster c01 (fraud cluster)
df_day_49_nn_c01 = df_day_49[df_day_49.nn_classifier_id_day_49 == 1]
 
## Re-indexing
df_day_49_nn_c01 = df_day_49_nn_c01.reset_index() # resetting index
df_day_49_nn_c01 = df_day_49_nn_c01.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [95]:
## The only the accounts labeled as cluster c05 (fraud cluster)
df_day_56_nn_c05 = df_day_56[df_day_56.nn_classifier_id_day_56 == 5]
 
## Re-indexing
df_day_56_nn_c05 = df_day_56_nn_c05.reset_index() # resetting index
df_day_56_nn_c05 = df_day_56_nn_c05.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [96]:
## The only the accounts labeled as cluster c04 (fraud cluster)
df_day_63_nn_c04 = df_day_63[df_day_63.nn_classifier_id_day_63 == 4]
 
## Re-indexing
df_day_63_nn_c04 = df_day_63_nn_c04.reset_index() # resetting index
df_day_63_nn_c04 = df_day_63_nn_c04.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [97]:
## The only the accounts labeled as cluster c07 (fraud cluster)
df_day_70_nn_c07 = df_day_70[df_day_70.nn_classifier_id_day_70 == 7]
 
## Re-indexing
df_day_70_nn_c07 = df_day_70_nn_c07.reset_index() # resetting index
df_day_70_nn_c07 = df_day_70_nn_c07.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [98]:
## The only the accounts labeled as cluster c01 (fraud cluster)
df_day_77_nn_c01 = df_day_77[df_day_77.nn_classifier_id_day_77 == 1]
 
## Re-indexing
df_day_77_nn_c01 = df_day_77_nn_c01.reset_index() # resetting index
df_day_77_nn_c01 = df_day_77_nn_c01.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [99]:
## The only the accounts labeled as cluster c04 (fraud cluster)
df_day_84_nn_c04 = df_day_84[df_day_84.nn_classifier_id_day_84 == 4]
 
## Re-indexing
df_day_84_nn_c04 = df_day_84_nn_c04.reset_index() # resetting index
df_day_84_nn_c04 = df_day_84_nn_c04.drop(columns=['index'], axis=1) # droping adding columns during the resetting

In [100]:
## The only the accounts labeled as cluster c01 (fraud cluster)
df_day_91_nn_c01 = df_day_91[df_day_91.nn_classifier_id_day_91 == 1]
 
## Re-indexing
df_day_91_nn_c01 = df_day_91_nn_c01.reset_index() # resetting index
df_day_91_nn_c01 = df_day_91_nn_c01.drop(columns=['index'], axis=1) # droping adding columns during the resetting

## Merging GMM Cluster and NN Class: For Each Week

In [101]:
#################### Day 07: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_07_gmm_c01 = df_day_07_gmm_c01.drop(columns=['gmm_cluster_id_day_07'], axis=1)
df_day_07_nn_c01_id = pd.DataFrame(df_day_07_nn_c01['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_07_c01 = pd.merge(df_day_07_gmm_c01, df_day_07_nn_c01_id, how='inner', on=['systemid'])

In [102]:
# df_day_07_c01.iloc[16]['client_count_day_7']
# list(df_day_07_c01)

In [103]:
#################### Day 14: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_14_gmm_c07 = df_day_14_gmm_c07.drop(columns=['gmm_cluster_id_day_14'], axis=1)
df_day_14_nn_c07_id = pd.DataFrame(df_day_14_nn_c07['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_14_c07 = pd.merge(df_day_14_gmm_c07, df_day_14_nn_c07_id, how='inner', on=['systemid'])

In [104]:
#################### Day 21: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_21_gmm_c05 = df_day_21_gmm_c05.drop(columns=['gmm_cluster_id_day_21'], axis=1)
df_day_21_nn_c05_id = pd.DataFrame(df_day_21_nn_c05['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_21_c05 = pd.merge(df_day_21_gmm_c05, df_day_21_nn_c05_id, how='inner', on=['systemid'])

In [105]:
#################### Day 28: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_28_gmm_c05 = df_day_28_gmm_c05.drop(columns=['gmm_cluster_id_day_28'], axis=1)
df_day_28_nn_c05_id = pd.DataFrame(df_day_28_nn_c05['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_28_c05 = pd.merge(df_day_28_gmm_c05, df_day_28_nn_c05_id, how='inner', on=['systemid'])

In [106]:
# list(df_day_28_c05)
# df_day_28_c05.head()

In [107]:
#################### Day 35: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_35_gmm_c05 = df_day_35_gmm_c05.drop(columns=['gmm_cluster_id_day_35'], axis=1)
df_day_35_nn_c05_id = pd.DataFrame(df_day_35_nn_c05['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_35_c05 = pd.merge(df_day_35_gmm_c05, df_day_35_nn_c05_id, how='inner', on=['systemid'])

In [108]:
#################### Day 42: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_42_gmm_c05 = df_day_42_gmm_c05.drop(columns=['gmm_cluster_id_day_42'], axis=1)
df_day_42_nn_c05_id = pd.DataFrame(df_day_42_nn_c05['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_42_c05 = pd.merge(df_day_42_gmm_c05, df_day_42_nn_c05_id, how='inner', on=['systemid'])

In [109]:
#################### Day 49: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_49_gmm_c01 = df_day_49_gmm_c01.drop(columns=['gmm_cluster_id_day_49'], axis=1)
df_day_49_nn_c01_id = pd.DataFrame(df_day_49_nn_c01['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_49_c01 = pd.merge(df_day_49_gmm_c01, df_day_49_nn_c01_id, how='inner', on=['systemid'])

In [110]:
#################### Day 56: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_56_gmm_c05 = df_day_56_gmm_c05.drop(columns=['gmm_cluster_id_day_56'], axis=1)
df_day_56_nn_c05_id = pd.DataFrame(df_day_56_nn_c05['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_56_c05 = pd.merge(df_day_56_gmm_c05, df_day_56_nn_c05_id, how='inner', on=['systemid'])

In [111]:
#################### Day 63: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_63_gmm_c04 = df_day_63_gmm_c04.drop(columns=['gmm_cluster_id_day_63'], axis=1)
df_day_63_nn_c04_id = pd.DataFrame(df_day_63_nn_c04['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_63_c04 = pd.merge(df_day_63_gmm_c04, df_day_63_nn_c04_id, how='inner', on=['systemid'])

In [112]:
#################### Day 70: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_70_gmm_c07 = df_day_70_gmm_c07.drop(columns=['gmm_cluster_id_day_70'], axis=1)
df_day_70_nn_c07_id = pd.DataFrame(df_day_70_nn_c07['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_70_c07 = pd.merge(df_day_70_gmm_c07, df_day_70_nn_c07_id, how='inner', on=['systemid'])

In [113]:
#################### Day 77: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_77_gmm_c01 = df_day_77_gmm_c01.drop(columns=['gmm_cluster_id_day_77'], axis=1)
df_day_77_nn_c01_id = pd.DataFrame(df_day_77_nn_c01['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_77_c01 = pd.merge(df_day_77_gmm_c01, df_day_77_nn_c01_id, how='inner', on=['systemid'])

In [114]:
#################### Day 84: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_84_gmm_c04 = df_day_84_gmm_c04.drop(columns=['gmm_cluster_id_day_84'], axis=1)
df_day_84_nn_c04_id = pd.DataFrame(df_day_84_nn_c04['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_84_c04 = pd.merge(df_day_84_gmm_c04, df_day_84_nn_c04_id, how='inner', on=['systemid'])

In [115]:
#################### Day 91: Merging GMM and NN Class ##############################################

## Droping the cluster and class id lable columns
df_day_91_gmm_c01 = df_day_91_gmm_c01.drop(columns=['gmm_cluster_id_day_91'], axis=1)
df_day_91_nn_c01_id = pd.DataFrame(df_day_91_nn_c01['systemid'])

## Merge two dataframes based on the 'systemid'
df_day_91_c01 = pd.merge(df_day_91_gmm_c01, df_day_91_nn_c01_id, how='inner', on=['systemid'])

## Renaming Columns in the Weekly FRC DataFrames

In [116]:
############### Day 07: Renaming Columns ########################################################

df_day_07_frc = df_day_07_c01.rename(columns={"avg_wc_address_day_7": "avg_wc_address", 
                                              "avg_wc_description_day_7": "avg_wc_description",
                                              "avg_wc_notes_day_7": "avg_wc_notes",
                                              "avg_wc_terms_day_7": "avg_wc_terms",
                                              "client_count_day_7": "client_count",
                                              "invoice_count_day_7": "invoice_count"})


In [117]:
############### Day 14: Renaming Columns ########################################################

df_day_14_frc = df_day_14_c07.rename(columns={"avg_wc_address_day_14": "avg_wc_address", 
                                              "avg_wc_description_day_14": "avg_wc_description",
                                              "avg_wc_notes_day_14": "avg_wc_notes",
                                              "avg_wc_terms_day_14": "avg_wc_terms",
                                              "client_count_day_14": "client_count",
                                              "invoice_count_day_14": "invoice_count"})

In [118]:
############### Day 21: Renaming Columns ########################################################

df_day_21_frc = df_day_21_c05.rename(columns={"avg_wc_address_day_21": "avg_wc_address", 
                                              "avg_wc_description_day_21": "avg_wc_description",
                                              "avg_wc_notes_day_21": "avg_wc_notes",
                                              "avg_wc_terms_day_21": "avg_wc_terms",
                                              "client_count_day_21": "client_count",
                                              "invoice_count_day_21": "invoice_count"})

In [119]:
############### Day 28: Renaming Columns ########################################################

df_day_28_frc = df_day_28_c05.rename(columns={"avg_wc_address_day_28": "avg_wc_address", 
                                              "avg_wc_description_day_28": "avg_wc_description",
                                              "avg_wc_notes_day_28": "avg_wc_notes",
                                              "avg_wc_terms_day_28": "avg_wc_terms",
                                              "client_count_day_28": "client_count",
                                              "invoice_count_day_28": "invoice_count"})

In [120]:
############### Day 35: Renaming Columns ########################################################

df_day_35_frc = df_day_35_c05.rename(columns={"avg_wc_address_day_35": "avg_wc_address", 
                                              "avg_wc_description_day_35": "avg_wc_description",
                                              "avg_wc_notes_day_35": "avg_wc_notes",
                                              "avg_wc_terms_day_35": "avg_wc_terms",
                                              "client_count_day_35": "client_count",
                                              "invoice_count_day_35": "invoice_count"})

In [121]:
############### Day 42: Renaming Columns ########################################################

df_day_42_frc = df_day_42_c05.rename(columns={"avg_wc_address_day_42": "avg_wc_address", 
                                              "avg_wc_description_day_42": "avg_wc_description",
                                              "avg_wc_notes_day_42": "avg_wc_notes",
                                              "avg_wc_terms_day_42": "avg_wc_terms",
                                              "client_count_day_42": "client_count",
                                              "invoice_count_day_42": "invoice_count"})

In [122]:
############### Day 49: Renaming Columns ########################################################

df_day_49_frc = df_day_49_c01.rename(columns={"avg_wc_address_day_49": "avg_wc_address", 
                                              "avg_wc_description_day_49": "avg_wc_description",
                                              "avg_wc_notes_day_49": "avg_wc_notes",
                                              "avg_wc_terms_day_49": "avg_wc_terms",
                                              "client_count_day_49": "client_count",
                                              "invoice_count_day_49": "invoice_count"})

In [123]:
############### Day 56: Renaming Columns ########################################################

df_day_56_frc = df_day_56_c05.rename(columns={"avg_wc_address_day_56": "avg_wc_address", 
                                              "avg_wc_description_day_56": "avg_wc_description",
                                              "avg_wc_notes_day_56": "avg_wc_notes",
                                              "avg_wc_terms_day_56": "avg_wc_terms",
                                              "client_count_day_56": "client_count",
                                              "invoice_count_day_56": "invoice_count"})

In [124]:
############### Day 63: Renaming Columns ########################################################

df_day_63_frc = df_day_63_c04.rename(columns={"avg_wc_address_day_63": "avg_wc_address", 
                                              "avg_wc_description_day_63": "avg_wc_description",
                                              "avg_wc_notes_day_63": "avg_wc_notes",
                                              "avg_wc_terms_day_63": "avg_wc_terms",
                                              "client_count_day_63": "client_count",
                                              "invoice_count_day_63": "invoice_count"})

In [125]:
############### Day 70: Renaming Columns ########################################################

df_day_70_frc = df_day_70_c07.rename(columns={"avg_wc_address_day_70": "avg_wc_address", 
                                              "avg_wc_description_day_70": "avg_wc_description",
                                              "avg_wc_notes_day_70": "avg_wc_notes",
                                              "avg_wc_terms_day_70": "avg_wc_terms",
                                              "client_count_day_70": "client_count",
                                              "invoice_count_day_70": "invoice_count"})

In [126]:
############### Day 77: Renaming Columns ########################################################

df_day_77_frc = df_day_77_c01.rename(columns={"avg_wc_address_day_77": "avg_wc_address", 
                                              "avg_wc_description_day_77": "avg_wc_description",
                                              "avg_wc_notes_day_77": "avg_wc_notes",
                                              "avg_wc_terms_day_77": "avg_wc_terms",
                                              "client_count_day_77": "client_count",
                                              "invoice_count_day_77": "invoice_count"})

In [127]:
############### Day 84: Renaming Columns ########################################################

df_day_84_frc = df_day_84_c04.rename(columns={"avg_wc_address_day_84": "avg_wc_address", 
                                              "avg_wc_description_day_84": "avg_wc_description",
                                              "avg_wc_notes_day_84": "avg_wc_notes",
                                              "avg_wc_terms_day_84": "avg_wc_terms",
                                              "client_count_day_84": "client_count",
                                              "invoice_count_day_84": "invoice_count"})

In [128]:
############### Day 91: Renaming Columns ########################################################

df_day_91_frc = df_day_91_c01.rename(columns={"avg_wc_address_day_91": "avg_wc_address", 
                                              "avg_wc_description_day_91": "avg_wc_description",
                                              "avg_wc_notes_day_91": "avg_wc_notes",
                                              "avg_wc_terms_day_91": "avg_wc_terms",
                                              "client_count_day_91": "client_count",
                                              "invoice_count_day_91": "invoice_count"})

## Adding Model Name Column

In [129]:
############### Adding Predicting Column Name ##################################################
df_day_07_frc['model_name'] = 'D07'
df_day_14_frc['model_name'] = 'D14'
df_day_21_frc['model_name'] = 'D21'
df_day_28_frc['model_name'] = 'D28'
df_day_35_frc['model_name'] = 'D35'
df_day_42_frc['model_name'] = 'D42'
df_day_49_frc['model_name'] = 'D49'
df_day_56_frc['model_name'] = 'D56'
df_day_63_frc['model_name'] = 'D63'
df_day_70_frc['model_name'] = 'D70'
df_day_77_frc['model_name'] = 'D77'
df_day_84_frc['model_name'] = 'D84'
df_day_91_frc['model_name'] = 'D91'


## Merging All Weekly Fraud Risk Clusters DataFrames 


In [130]:
## Merging all FRC
df_merge_frc = df_day_07_frc.copy()
df_merge_frc = df_merge_frc.append(df_day_14_frc)
df_merge_frc = df_merge_frc.append(df_day_21_frc)
df_merge_frc = df_merge_frc.append(df_day_28_frc)
df_merge_frc = df_merge_frc.append(df_day_35_frc)
df_merge_frc = df_merge_frc.append(df_day_42_frc)
df_merge_frc = df_merge_frc.append(df_day_49_frc)
df_merge_frc = df_merge_frc.append(df_day_56_frc)
df_merge_frc = df_merge_frc.append(df_day_63_frc)
df_merge_frc = df_merge_frc.append(df_day_70_frc)
df_merge_frc = df_merge_frc.append(df_day_77_frc)
df_merge_frc = df_merge_frc.append(df_day_84_frc)
df_merge_frc = df_merge_frc.append(df_day_91_frc)


In [131]:
## Reindexing the merged dataframe
df_merge_frc = df_merge_frc.reset_index(drop=True)

In [132]:
# list(df_merge_frc)
df_merge_frc.head()

Unnamed: 0,activateexpense,activateotherincome,activatepayment,admin_email,admindeactivation,adminonlinepaymentattempt,adminpayinvoiceonlineinvoice,adminpayinvoiceonlinelistview,archiveclient,archiveexpense,...,updatecategory,updateclient,updatecompanyprofile,updatecontractor,updatecreditnote,updateestimate,updateexpense,updateinvoicesample,updateitem,updateservice
0,0.0,0.0,0.0,pira_2@yahoo.co.uk,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,0.0,msy082212@gmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,ben@kitchendiscorecords.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,dipanjanbaidya1@gmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,ishtiakkhan@hotmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,5.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0


In [133]:
df_merge_frc.shape

(40320, 94)

## Computing Fraud Risk Score (FRS)
Computing Fraud Risk Score (FRS) based on the follwining formula: 

- 'days_on_platform' = $d$
- 'declinedonlinepaymentnotification' = $p$
- 'emailinvoice' = $e$
- 'invoice_count' = $i$


$frs\_gmm = \frac{p + e + i}{d}$


In [134]:
def fraud_risk_score(row):
    
    # selected column values
    d = row['model_name']
    p = float(row['declinedonlinepaymentnotification'])
    e = float(row['emailinvoice'])
    i = float(row['invoice_count'])
    
    # fraud risk score
    if d == 'D07':
        #print("D7")
        frs = (p + e + i) / 7 
    elif d == 'D14':
        #print("D14")
        frs = (p + e + i) / 14
    elif d == 'D21':
        #print("D21")
        frs = (p + e + i) / 21
    elif d == 'D28':
        #print("D28")
        frs = (p + e + i) / 28
    elif d == 'D35':
        #print("D35")
        frs = (p + e + i) / 35
    elif d == 'D42':
        #print("D42")
        frs = (p + e + i) / 42
    elif d == 'D49':
        #print("D49")
        frs = (p + e + i) / 49
    else:
        #print("Exception")
        frs = 0
    return frs


In [135]:
## Calculating and adding Fraud Risk Score (FRS) column
col_list = ['days_on_platform', 'declinedonlinepaymentnotification', 'emailinvoice', 'invoice_count']
fraud_risk_score = df_merge_frc.apply(lambda x: fraud_risk_score(x), axis=1)
df_merge_frc['fraud_risk_score'] = fraud_risk_score     # adding this fraud risk score as a column in the dataframe


In [136]:
df_merge_frc.head()

Unnamed: 0,activateexpense,activateotherincome,activatepayment,admin_email,admindeactivation,adminonlinepaymentattempt,adminpayinvoiceonlineinvoice,adminpayinvoiceonlinelistview,archiveclient,archiveexpense,...,updateclient,updatecompanyprofile,updatecontractor,updatecreditnote,updateestimate,updateexpense,updateinvoicesample,updateitem,updateservice,fraud_risk_score
0,0.0,0.0,0.0,pira_2@yahoo.co.uk,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,0.0,msy082212@gmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,ben@kitchendiscorecords.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.142857
3,0.0,0.0,0.0,dipanjanbaidya1@gmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,ishtiakkhan@hotmail.com,0.0,0,0,0.0,0.0,0.0,...,5.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.142857


In [137]:
df_merge_frc['invoice_count'].max()

575

## Remove Already Labeld Fraud Risk Accounts

In [138]:
###################### Remove Already Labeled Fraud Risk Accounts #################################

## Improt labeled fraud risk accounts (labeled by support team)
df_labled_fraud = pd.read_csv('/Users/dwahid/Documents/GitHub/fraud_detection/data/fraud_risk_acc_labeled/all_fraud_status_labeled_by_support.csv', sep=",")
df_labled_fraud_systemid = pd.DataFrame(df_labled_fraud['systemid'])


In [139]:
df_labled_fraud.tail()

Unnamed: 0,systemid,admin_email,signup_date,effective_date,fraud_label,days_on_platform,support_note
460,5152105,enquiries@anisdincatering.com,2020-03-30,2020-05-12,0.0,43,catering services
461,5236163,raysonloo.sh@gmail.com,2020-05-01,2020-05-12,0.0,11,selling bags
462,5245075,shawndmix@gmail.com,2020-05-04,2020-05-12,0.0,8,trucking services
463,5209249,amandaphan1234@gmail.com,2020-04-21,2020-05-12,1.0,21,"cannot verify any information, has not logged ..."
464,5218019,katayres20@gmail.com,2020-04-24,2020-05-12,0.0,18,selling jewellery as an associate of Paparazzi


In [140]:
## Cross users accounts, if any account is already labeled then remove it 
df_merge_frc_and_labeled = pd.merge(df_merge_frc, df_labled_fraud_systemid, how='left', on=['systemid'], indicator=True)
df_merge_frc_nolabeled = df_merge_frc_and_labeled[df_merge_frc_and_labeled._merge != 'both']
df_merge_frc_nolabeled = df_merge_frc_nolabeled.drop(columns=['_merge'], axis=1)


In [141]:
df_merge_frc_nolabeled.tail()

Unnamed: 0,activateexpense,activateotherincome,activatepayment,admin_email,admindeactivation,adminonlinepaymentattempt,adminpayinvoiceonlineinvoice,adminpayinvoiceonlinelistview,archiveclient,archiveexpense,...,updateclient,updatecompanyprofile,updatecontractor,updatecreditnote,updateestimate,updateexpense,updateinvoicesample,updateitem,updateservice,fraud_risk_score
40315,0.0,0.0,0.0,michelle.lemoine@mediacom.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
40316,0.0,0.0,0.0,kyle@contou.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
40317,0.0,0.0,0.0,joerg.neumann@protonmail.ch,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
40318,0.0,0.0,0.0,laramkrenny456@outlook.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
40319,0.0,0.0,0.0,info@densonadmissions.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0


## Sorting Based on FRS and Re-Indexing

In [142]:
############################ Sorting Based on FRS and Re-Indexing ####################################

## Sorting and reindexing reprot data frame based on the 'fraud_risk_score'
df_merge_frc_nolabeled.sort_values('fraud_risk_score', axis = 0, ascending = False, inplace = True, na_position ='last') 

## Reindexing the sorted dataframe
df_merge_frc_nolabeled = df_merge_frc_nolabeled.reset_index(drop=True)


In [143]:
df_merge_frc_nolabeled.tail()

Unnamed: 0,activateexpense,activateotherincome,activatepayment,admin_email,admindeactivation,adminonlinepaymentattempt,adminpayinvoiceonlineinvoice,adminpayinvoiceonlinelistview,archiveclient,archiveexpense,...,updateclient,updatecompanyprofile,updatecontractor,updatecreditnote,updateestimate,updateexpense,updateinvoicesample,updateitem,updateservice,fraud_risk_score
40306,0.0,0.0,0.0,info@theindylab.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
40307,0.0,0.0,0.0,bamriver@sbcglobal.net,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
40308,0.0,0.0,0.0,nancyvolinsky@gmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
40309,0.0,0.0,0.0,conradifenster@gmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
40310,0.0,0.0,0.0,info@densonadmissions.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0


## Reporting: To Be Labeled (TBL) by Support Team

In [144]:
## Adding effective date and model name column
df_merge_frc_nolabeled['fraud_label'] = 'TBL'  # To Be Labeled (TBL) by support team
df_merge_frc_nolabeled['support_note'] = 'NaN'  # To Be Labeled (TBL) by support team

In [145]:
df_merge_frc_nolabeled.head()

Unnamed: 0,activateexpense,activateotherincome,activatepayment,admin_email,admindeactivation,adminonlinepaymentattempt,adminpayinvoiceonlineinvoice,adminpayinvoiceonlinelistview,archiveclient,archiveexpense,...,updatecontractor,updatecreditnote,updateestimate,updateexpense,updateinvoicesample,updateitem,updateservice,fraud_risk_score,fraud_label,support_note
0,0.0,0.0,0.0,christian@gruppdigital.com,0.0,0,0,0.0,61.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,2.0,79.285714,TBL,
1,0.0,0.0,0.0,angie@sconehengeberkeley.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,1.0,0.0,0.0,14.428571,TBL,
2,0.0,0.0,0.0,purplestarvaping@gmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0,0.0,2.0,1.0,0.0,0.0,9.428571,TBL,
3,0.0,0.0,0.0,rippdogco@gmail.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0,0.0,35.0,1.0,1.0,0.0,8.857143,TBL,
4,0.0,0.0,0.0,sharon_dickinson@live.com,0.0,0,0,0.0,0.0,0.0,...,0.0,0,0.0,17.0,0.0,0.0,0.0,8.0,TBL,


In [146]:
## Path and file name for user accounts for need to be labeled by support team
path = "/Users/dwahid/Documents/GitHub/fraud_detection/data/fraud_risk_acc_to_be_labeled_all_features/"

# file_name = "new_fraud_risk_acc_tbl_all_features"
# frc_tbl = path + file_name + ".tsv"

file_name = "new_fraud_risk_acc_tbl_all_features_"
today = str(date.today())

frc_tbl = path + file_name + today + ".tsv"


## Save the user accounts for need to be labeled by support team
df_merge_frc_nolabeled.to_csv(frc_tbl, sep="\t", index=False)