<a href="https://www.kaggle.com/code/anirudhg15/tps-oct-21-part-2-blending-w-nvidia-rapids?scriptVersionId=135652776" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

![Nvidia Rapids](https://developer.nvidia.com/sites/default/files/pictures/2018/rapids/rapids-logo.png)

# PART 2 - Blending with NVIDIA RAPIDS

**This notebook is built on top of my earlier work on the competition with Nvidia Rapids - https://www.kaggle.com/anirudhg15/tps-oct-21-baseline-w-nvidia-rapids**

For explanations on NVIDIA RAPIDS and a much more beginner friendly introduction please refer the same

In [None]:
import cudf
import xgboost as xgb
from cuml import train_test_split
from cuml import metrics
from cuml.ensemble import RandomForestClassifier as cuRF
from cuml.linear_model import LogisticRegression
import gc

In [None]:
!tree ../input

In [None]:
train = cudf.read_csv('../input/tabular-playground-series-oct-2021/train.csv', dtype='float32')

test = cudf.read_csv('../input/tabular-playground-series-oct-2021/test.csv', dtype='float32')

submission = cudf.read_csv('../input/tabular-playground-series-oct-2021/sample_submission.csv', dtype='float32')

In [None]:
train['std'] = train.std(axis=1)
train['min'] = train.min(axis=1)
train['max'] = train.max(axis=1)

test['std'] = test.std(axis=1)
test['min'] = test.min(axis=1)
test['max'] = test.max(axis=1)

In [None]:
y = train['target']
X = train.drop(['target'], axis=1)
del train
gc.collect()

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)
del X
del y
gc.collect()

Parameter values from https://www.kaggle.com/mohammadkashifunique/tsp-single-xgboost-model

In [None]:
params = {
    'max_depth': 6,
    'n_estimators': 9500,
    'learning_rate': 0.007279718158350149,
    'subsample': 0.7,
    'colsample_bytree': 0.2,
    'colsample_bylevel': 0.6000000000000001,
    'min_child_weight': 56.41980735551558,
    'reg_lambda': 75.56651890088857,
    'reg_alpha': 0.11766857055687065,
    'gamma': 0.6407823221122686,
    'booster' : 'gbtree',
    'eval_metric' : 'auc',
    'tree_method': 'gpu_hist',
     'predictor' :'gpu_predictor',
    'use_label_encoder': False
    }

In [None]:
xgb_classifier = xgb.XGBClassifier(**params)
xgb_classifier.fit(X_train, 
                   y_train, 
                   eval_set=[(X_val, y_val)],
                   early_stopping_rounds=100,
                   verbose=True)

In [None]:
sub = cudf.DataFrame()
sub['id'] = submission['id']
sub['target_1'] = xgb_classifier.predict_proba(test)[:,-1]
sub.head(5)

In [None]:
curf_params = { 'max_depth' : 16,
    'n_estimators' : 2000,
    'random_state' : 42,
    'n_streams': 1,
    'min_samples_leaf': 10,
    'min_samples_split': 50
}

In [None]:
rf = cuRF(**curf_params)
rf.fit(X_train, y_train)
predict = rf.predict_proba(X_val)
metrics.roc_auc_score(y_val, predict[1])

In [None]:
predict_2 = rf.predict_proba(test)
sub['target_2'] = predict_2[1]
sub.head(5)

In [None]:
sub['target'] = (sub['target_1']*10 +\
                 sub['target_2']*2)/12

In [None]:
sub = sub.drop(['target_1', 'target_2'], axis=1)
sub.head(5)

In [None]:
sub['id'] = sub['id'].astype('int32')

In [None]:
sub.dtypes

In [None]:
sub.to_csv('submission.csv', index=False)

# Kindly upvote 👍🏻 if you found this kernel helpful

**Also, kindly upvote if you are forking the kernel** 😊