In [1]:
import numpy as np                  # Mathetimatical Operations
import pandas as pd                 # Data manipulation

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt     
%matplotlib inline

# Sklearn
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, auc, roc_curve, roc_auc_score, classification_report, mean_squared_error, confusion_matrix, f1_score, precision_recall_curve, r2_score 
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.ensemble import AdaBoostRegressor, BaggingRegressor, GradientBoostingRegressor, RandomForestRegressor

# Scipy
from scipy.stats import stats
from scipy.stats import ttest_ind, ttest_ind_from_stats

# XGBoost
from xgboost import XGBClassifier
from xgboost import XGBRegressor
import xgboost as xgb

# Datetime
import datetime 
import time
from datetime import datetime

# Folium
import folium 
from folium import plugins
from folium.plugins import HeatMap

# Image
from IPython.display import Image

# Bayesian Optimizer
from skopt import BayesSearchCV

# Itertools
import itertools

# Remove warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
df_info = pd.read_csv('account_info.csv')
df_labels = pd.read_csv('account_labels.csv')

In [3]:
df_info.head()

Unnamed: 0,account_id,category,balance,debits,credits
0,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,ATM/Cash Withdrawals,-100.0,1,0
1,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Checks,-39182.84,39,0
2,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Gasoline/Fuel,0.0,0,0
3,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Groceries,0.0,0,0
4,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Mortgages,0.0,0,0


In [4]:
df_labels.head()

Unnamed: 0,account_id,label
0,anl7fnp2cHVlentxfWZoYW93ZWxncW5mZnZtZXpobmpxbnV1,1
1,aHxwaW10fmhldn1xamZoam5zZWxwcXtmZHV-anx2bnBpbXl2,1
2,em1ycHZyfWZlaHtpfGZoaX5mZW18a2tmZ3ZpZXFlbnF7cGdm,0
3,en5pb2d2cXVlZ3t9e2ZoYXt0ZXV9bHFmaGZ8YmltbGt8b2Vq,0
4,cHp8amh0amJldmloe2ZocntkZWx9cH5maXJsYmhlenFqb2xy,1


In [5]:
df = df_info.merge(df_labels)

In [6]:
df.head(20)

Unnamed: 0,account_id,category,balance,debits,credits,label
0,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,ATM/Cash Withdrawals,-100.0,1,0,0
1,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Checks,-39182.84,39,0,0
2,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Gasoline/Fuel,0.0,0,0,0
3,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Groceries,0.0,0,0,0
4,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Mortgages,0.0,0,0,0
5,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Restaurants/Dining,-26.36,1,0,0
6,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Sales,0.0,0,0,0
7,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Taxes,-6907.52,1,0,0
8,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Wages Paid,0.0,0,0,0
9,a21sanZjcHRlbGl8f2ZoZXJ2ZWx4a3pmdmh9aHp2emttbmVi,Overall,11574.41,240,192,0


In [7]:
df['label'].value_counts()

0    8520
1    1480
Name: label, dtype: int64

In [8]:
df[df['label']==1].head(20)

Unnamed: 0,account_id,category,balance,debits,credits,label
20,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,ATM/Cash Withdrawals,-1004.95,3,0,1
21,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Checks,-27028.16,70,0,1
22,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Gasoline/Fuel,-852.7,19,0,1
23,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Groceries,-145.09,3,0,1
24,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Mortgages,0.0,0,0,1
25,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Restaurants/Dining,-246.43,9,0,1
26,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Sales,0.0,0,0,1
27,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Taxes,-249.93,2,0,1
28,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Wages Paid,0.0,0,0,1
29,a21xbnpzfWdlbHl7fWZod3BlZW1tbXBmenNqYXtqfGxtaXhq,Overall,7225.57,374,36,1
