In [1]:
import os

import cudf
import numpy as np
import gc

import time
import tritonclient.grpc as triton_grpc
import tritonclient.http as httpclient
from tritonclient.utils import triton_to_np_dtype
from tritonclient import utils as triton_utils
HOST = 'localhost'
PORT = 8001
TIMEOUT = 60

In [2]:
PATH = '/raid/data/ml/kaggle/amex'

In [3]:
%%time

train = cudf.read_parquet(f'{PATH}/train.parquet')
train['cid'], _ = train.customer_ID.factorize()
train['S_2'] = cudf.to_datetime(train['S_2'])

mask = train['cid']%4 == 0
test = train.loc[mask]
test = test.sort_values(['cid','S_2'])
test = test.reset_index(drop=True)
del train
gc.collect()


CPU times: user 2.28 s, sys: 1.32 s, total: 3.6 s
Wall time: 3.61 s


1189

In [4]:
from rnn import TestRnnDataset,load_yaml
from torch.utils.data import DataLoader

In [5]:
config = load_yaml('rnn.yaml')

Config(model='rnn', epochs=5, batch_size=512, seq=5, H1=512, H2=128, layers=1, E=192, dropout=0, lr=0.001, wd=0.0, tcols='all')


In [6]:
test_ds = TestRnnDataset(test,config)

RnnDataset not used columns:
['customer_ID', 'cid', 'S_2']


In [7]:
batch_size = config.batch_size
cpu_workers = 4

test_dl = DataLoader(test_ds, batch_size=batch_size,
                    shuffle=False, num_workers=cpu_workers,
                    drop_last=False)

In [8]:
batch = next(iter(test_dl))

In [9]:
batch.shape,batch.dtype

(torch.Size([512, 5, 177]), torch.float32)

In [10]:
client = triton_grpc.InferenceServerClient(url=f'{HOST}:{PORT}')

In [11]:
def triton_predict(model_name, arr):
    triton_input = triton_grpc.InferInput('input__0', arr.shape, 'FP32')
    triton_input.set_data_from_numpy(arr)
    triton_output = triton_grpc.InferRequestedOutput('output__0')
    response = client.infer(model_name, model_version='1', inputs=[triton_input], outputs=[triton_output])
    return response.as_numpy('output__0')

In [12]:
rnn_fea = triton_predict('AutoRegressiveRNN',batch.numpy())
rnn_fea.shape

(512, 13, 177)

In [13]:
x = np.hstack([batch[:,-1,:],rnn_fea[:,-1,:]])
x.shape

(512, 354)

In [14]:
pred = triton_predict('amex_xgb',x)
pred.shape

(512, 2)

In [15]:
%%time

yps = []
for batch in test_dl:
    rnn_fea = triton_predict('AutoRegressiveRNN',batch.numpy())
    x = np.hstack([batch[:,-1,:],rnn_fea[:,-1,:]])
    pred = triton_predict('amex_xgb',x)
    yps.append(pred)
yp = np.vstack(yps)

CPU times: user 1.41 s, sys: 678 ms, total: 2.09 s
Wall time: 5.82 s


In [16]:
yp.shape

(114729, 2)

In [None]:
%%time
test = test.drop_duplicates('cid')
trainl = cudf.read_csv(f'{PATH}/train_labels.csv')
test = test.merge(trainl, on='customer_ID', how='left')
test = test.sort_values('cid')
test.head()

CPU times: user 85.8 ms, sys: 104 ms, total: 190 ms
Wall time: 188 ms


In [None]:
y_test = test['target'].values.get()
y_test.shape

In [None]:
from utils import amex_metric_np

In [None]:
amex_metric_np(y_test,yp[:,1])