# ***Team Modelling***

In [76]:
# Import modules

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score, roc_curve, confusion_matrix, make_scorer, matthews_corrcoef
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from Error_analysis import calculate_cost

RSEED= 42



In [77]:
# Import Dataset

df= pd.read_csv('data/training_preprocessed.csv')

In [78]:
y= df.FraudResult
df.drop("FraudResult", axis=1, inplace=True)
X= df
X.drop(["TransactionId"], axis=1, inplace=True)

In [79]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=RSEED)

In [80]:
rf = RandomForestClassifier()

rf.fit(X_train, y_train)

In [81]:
y_pred = rf.predict(X_test)

In [82]:
confusion_matrix(y_test, y_pred)

array([[23861,     7],
       [   18,    30]])

In [83]:
print(recall_score(y_test, y_pred))
print(roc_auc_score(y_test, y_pred))

0.625
0.8123533601474778


In [84]:
minmax = MinMaxScaler()
stdsc = StandardScaler()

In [85]:
X_train['ModAmount'] = stdsc.fit_transform(pd.DataFrame(X_train['ModAmount']))
X_test['ModAmount'] = stdsc.transform(pd.DataFrame(X_test['ModAmount']));

In [86]:
""" rand_param_grid = {'n_estimators': np.linspace(10, 100).astype(int),
                    'max_depth': [None] + list(np.linspace(3, 100).astype(int)),
                    'max_features': ['sqrt', None] + list(np.arange(0.5, 1, 0.1)),
                    'max_leaf_nodes': [None] + list(np.linspace(10, 50).astype(int)),
                    'min_samples_split': [2, 5, 10, 50]
                    }

scorer = make_scorer(matthews_corrcoef)

rand_grid = RandomizedSearchCV(rf, param_distributions=rand_param_grid, n_jobs=-1, scoring=scorer, n_iter=100, verbose=0, random_state=RSEED) """;

In [87]:
rand_grid.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
rand_grid.best_params_

In [None]:
rand_grid.best_estimator_

In [None]:
y_pred_grid = rand_grid.best_estimator_.predict(X_test)

In [None]:
print(recall_score(y_test, y_pred_grid))
print(roc_auc_score(y_test, y_pred_grid))
print(matthews_corrcoef(y_test, y_pred_grid))

In [None]:
confusion_matrix(y_test, y_pred_grid)

In [None]:
calculate_cost(X_test, y_test, y_pred_grid)