In [1]:
import os

import cudf
import numpy as np
import gc

import time
import tritonclient.grpc as triton_grpc
import tritonclient.http as httpclient
from tritonclient.utils import triton_to_np_dtype
from tritonclient import utils as triton_utils
HOST = 'localhost'
PORT = 8001
TIMEOUT = 60

In [2]:
PATH = '/raid/data/ml/kaggle/amex'

In [3]:
%%time

train = cudf.read_parquet(f'{PATH}/train.parquet')
train['cid'], _ = train.customer_ID.factorize()

mask = train['cid']%4 == 0
test = train.loc[mask]
test = test.sort_values('cid')
test = test.reset_index(drop=True)
del train
gc.collect()


CPU times: user 2.06 s, sys: 1.42 s, total: 3.48 s
Wall time: 3.48 s


1197

In [4]:
from rnn import TestRnnDataset,load_yaml
from torch.utils.data import DataLoader

In [5]:
config = load_yaml('rnn.yaml')

Config(model='rnn', epochs=5, batch_size=512, seq=5, H1=512, H2=128, layers=1, E=192, dropout=0, lr=0.001, wd=0.0, tcols='all')


In [6]:
test_ds = TestRnnDataset(test,config)

RnnDataset not used columns:
['customer_ID', 'S_2', 'cid']


In [7]:
batch_size = config.batch_size
cpu_workers = 4

test_dl = DataLoader(test_ds, batch_size=batch_size,
                    shuffle=False, num_workers=cpu_workers,
                    drop_last=False)

In [8]:
batch = next(iter(test_dl))

In [9]:
batch.shape,batch.dtype

(torch.Size([512, 5, 177]), torch.float32)

In [10]:
client = triton_grpc.InferenceServerClient(url=f'{HOST}:{PORT}')

In [11]:
def triton_predict(model_name, arr):
    triton_input = triton_grpc.InferInput('input__0', arr.shape, 'FP32')
    triton_input.set_data_from_numpy(arr)
    triton_output = triton_grpc.InferRequestedOutput('output__0')
    response = client.infer(model_name, model_version='1', inputs=[triton_input], outputs=[triton_output])
    return response.as_numpy('output__0')

In [12]:
rnn_fea = triton_predict('AutoRegressiveRNN',batch.numpy())
rnn_fea.shape

(512, 13, 177)

In [13]:
x = np.hstack([batch[:,-1,:],rnn_fea[:,-1,:]])
x.shape

(512, 354)

In [14]:
pred = triton_predict('amex_xgb',x)
pred.shape

(512, 2)

In [15]:
%%time

yps = []
for batch in test_dl:
    rnn_fea = triton_predict('AutoRegressiveRNN',batch.numpy())
    x = np.hstack([batch[:,-1,:],rnn_fea[:,-1,:]])
    pred = triton_predict('amex_xgb',x)
    yps.append(pred)
yp = np.vstack(yps)

CPU times: user 1.34 s, sys: 741 ms, total: 2.08 s
Wall time: 5.39 s


In [16]:
yp.shape

(114729, 2)

In [17]:
%%time
test = test.drop_duplicates('cid')
trainl = cudf.read_csv(f'{PATH}/train_labels.csv')
test = test.merge(trainl, on='customer_ID', how='left')
test = test.sort_values('cid')
test.head()

CPU times: user 80 ms, sys: 112 ms, total: 192 ms
Wall time: 191 ms


Unnamed: 0,customer_ID,S_2,P_2,D_39,B_1,B_2,R_1,S_3,D_41,B_3,...,D_138,D_139,D_140,D_141,D_142,D_143,D_144,D_145,cid,target
24992,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,2017-03-09,0.938469,0,0.008724,1.006838,0.009228,0.124035157,0.0,0.004709,...,-1,0,0,0.0,,0,0.00061,0,0,0
24993,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,2017-03-30,0.936842,0,0.003433,0.818691,0.007243,0.166190118,0.0,0.005927,...,-1,0,0,0.0,,0,0.003867,0,4,0
24994,0000f99513770170a1aba690daeeb8a96da4a39f11fc27...,2017-03-15,0.400025,0,0.954861,0.02389,0.00314,,0.0,1.175081,...,-1,1,0,0.870115,0.141213953,1,0.008945,8,8,1
24995,0001812036f1558332e5c0880ecbad70b13a6f28ab04a8...,2017-03-27,0.410251,0,0.525142,0.018226,0.006648,1.607070804,0.0,0.266503,...,-1,0,0,0.0,,0,0.005431,0,12,1
24996,0002d381bdd8048d76719042cf1eb63caf53b636f8aacd...,2017-03-19,1.007809,0,0.017698,0.816354,0.000443,0.345746458,0.0,0.007117,...,-1,0,0,0.0,,0,0.003225,0,16,0


In [18]:
y_test = test['target'].values.get()
y_test.shape

(114729,)

In [19]:
from utils import amex_metric_np

In [20]:
amex_metric_np(y_test,yp[:,1])

0.7574333868122294