#Loading models, lists and imputors

In [1]:
import os
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import f1_score

In [2]:
from google.colab import drive

drive.mount('/content/drive')    

Mounted at /content/drive


In [3]:
%cd /content/drive/MyDrive/              

/content/drive/MyDrive


In [6]:
def loading_assets():
    global cat_imputer, mice_list, cat, new_var, num_imputer, cols, low_correlation, top_10, final_cols, model_1, model_2, model_3, model_4, metamodel
    cat_imputer = pickle.load(open('cat_imputer.dat', 'rb'))
    mice_list = pickle.load(open('mice_list.pkl' , 'rb'))
    cat = pickle.load(open('cat_cols.pkl', 'rb'))
    new_var = pickle.load(open('new_var.pkl', 'rb'))
    num_imputer = pickle.load(open('num_imputer.dat', 'rb'))
    cols = pickle.load(open('all_cols.pkl', 'rb'))
    low_correlation = pickle.load(open('low_correlation.pkl', 'rb'))
    top_10 = pickle.load(open('top_10.pkl', 'rb'))
    final_cols = pickle.load(open('final_cols.pkl', 'rb'))
    model_1 = pickle.load(open('base_xg0.dat', 'rb'))
    model_2 = pickle.load(open('base_gnb1.dat', 'rb'))
    model_3 = pickle.load(open('base_xg2.dat', 'rb'))
    model_4 = pickle.load(open('base_gnb3.dat', 'rb'))
    metamodel = pickle.load(open('metamodel.dat', 'rb'))

loading_assets()

#Pre-process and other functions

In [7]:
def new_features(train, feature):
     train[feature + 'mean'] = train.groupby('uid', as_index=False)[feature].transform('mean')
     train[feature + 'sum'] = train.groupby('uid', as_index=False)[feature].transform('sum')

In [8]:
def pre_process(test):
  test.columns = [c.replace('-', '_') for c in test.columns]
  test.loc[:,cat] = (test[cat].apply(lambda x: pd.factorize(x)[0]))
  test.loc[:,mice_list] = cat_imputer.transform(test[mice_list])
  for po in new_var:
     nan_var = 2
     test.loc[:,po] = test[po].fillna(nan_var)
  test.loc[:,'P_emaildomain'] = test['P_emaildomain'].fillna(0)
  test.loc[:,cols] = num_imputer.transform(test[cols]) 
  for i in low_correlation:
     fea_sc = i + 'sincos'
     test[fea_sc] = np.cos(test[i]) * np.sin(test[i])
  test['uid'] = test['P_emaildomain'].astype(str) + '_' + test['M5'].astype(str) + '_' + test['ProductCD'].astype(str)
  for i in top_10:
     fea = ''.join(i)
     new_features(test, fea)
  test['tday'] = test['TransactionDT']/(24*60*60)
  test['TransactionAmtcount'] = test.groupby('uid', as_index=False)['TransactionAmt'].transform('count')
  test = test[final_cols].astype(float)
  return test

In [9]:
def prediction_dataset(sec_test_x):
  list_of_models = [model_1, model_2, model_3, model_4]
  all_pred = []
  for tt in range(len(list_of_models)):
     predict = list_of_models[tt].predict(sec_test_x)
     all_pred.append(predict)
     
  final_pred = np.stack(all_pred, axis=1)
  return final_pred

In [19]:
def meta_predict(sec_test_x):
    final_pred = prediction_dataset(sec_test_x)
    meta_prediction = metamodel.predict(final_pred)
    result = {}
    for k in range(len(meta_prediction)):
         if meta_prediction[k] == 0:
            result['row number-'+str(k)] = 'Transaction is not fraud'
         elif meta_prediction[k] == 1:
            result['row number-'+str(k)] = 'Transaction is fraud'

    return result, meta_prediction

In [11]:
def metric_eval(test_y, meta_prediction):
    fone = f1_score(test_y, meta_prediction, average=None)
    if len(fone) > 1:
        print('Transaction not being fraud f1 score:', fone[0])
        print('Transaction being fraud f1 score:', fone[1])
    elif len(fone) < 2:
        if sum(test_y) > 0:
           print('Transaction not being fraud f1 score:', 0)
           print('Transaction being fraud f1 score:', fone[0])
        else:
           print('Transaction not being fraud f1 score:', fone[0])
           print('Transaction being fraud f1 score:', 0)

#Final functions

In [20]:
def final_fun_1(test_transaction,test_identity):
   test = test_transaction.merge(test_identity, on='TransactionID', how='left')
   test_final = pre_process(test)
   result, meta_prediction = meta_predict(test_final.values)
   return result, meta_prediction

In [21]:
def final_fun_2(test_transaction,test_identity,test_y):
   test = test_transaction.merge(test_identity, on='TransactionID', how='left')
   test_final = pre_process(test)
   result, meta_prediction = meta_predict(test_final.values)
   metric_eval(test_y, meta_prediction)

In [14]:
test_tran = pd.read_csv('train_transaction.csv')
test_id = pd.read_csv('train_identity.csv')

In [22]:
final_fun_1(test_tran[:1], test_id[:1])

({'row number-0': 'Transaction is not fraud'}, array([0]))

In [24]:
final_fun_2(test_tran[:1], test_id[:1],test_tran['isFraud'][:1])

Transaction not being fraud f1 score: 1.0
Transaction being fraud f1 score: 0


In [23]:
final_fun_2(test_tran[:100], test_id[:100],test_tran['isFraud'][:100])

Transaction not being fraud f1 score: 1.0
Transaction being fraud f1 score: 0
