<a href="https://colab.research.google.com/github/gulabpatel/Rapids/blob/main/02%3A%20ML(Random_Forest)_with_RAPIDS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Video walkthrough the code : https://www.youtube.com/watch?v=m1i8yomD3nc

In [None]:
!nvidia-smi

In [None]:
# This get the RAPIDS-Colab install files and test check your GPU.  Run this and the next cell only.
# Please read the output of this cell.  If your Colab Instance is not RAPIDS compatible, it will warn you and give you remediation steps.
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/env-check.py

In [None]:
# This will update the Colab environment and restart the kernel.  Don't run the next cell until you see the session crash.
!bash rapidsai-csp-utils/colab/update_gcc.sh
import os
os._exit(00)

In [None]:
# This will install CondaColab.  This will restart your kernel one last time.  Run this cell by itself and only run the next cell once you see the session crash.
import condacolab
condacolab.install()

In [None]:
# Installing RAPIDS is now 'python rapidsai-csp-utils/colab/install_rapids.py <release> <packages>'
# The <release> options are 'stable' and 'nightly'.  Leaving it blank or adding any other words will default to stable.
# The <packages> option are default blank or 'core'.  By default, we install RAPIDSAI and BlazingSQL.  The 'core' option will install only RAPIDSAI and not include BlazingSQL, 
!python rapidsai-csp-utils/colab/install_rapids.py stable

In [None]:
#only for 0.10
!conda install -y --prefix /usr/local -c rapidsai/label/xgboost -c rapidsai -c nvidia -c conda-forge dask-cudf xgboost

In [None]:
!ls /usr/lib/libxgboost.so

In [None]:
#!cp /usr/local/lib/libxgboost.so /usr/lib

In [None]:
import cudf

In [None]:
from __future__ import print_function
import sys,tempfile, urllib, os
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

In [None]:
from sklearn.datasets import fetch_openml
covtyp = fetch_openml(name='covertype', version=4)

In [None]:
covtyp.data.shape

In [None]:
np.unique(covtyp.target)

In [None]:
!nvidia-smi

In [None]:
cov_df = pd.DataFrame(data= np.c_[covtyp['data'], covtyp['target']],
                     columns= covtyp['feature_names'] + ['target'])

In [None]:
cov_df.memory_usage(index=True).sum()

In [None]:
cov_df.head()

In [None]:
print ("Rows     : " ,cov_df.shape[0])
print ("Columns  : " ,cov_df.shape[1])

In [None]:
cov_df.target.value_counts()

In [None]:
cov_df.dtypes

In [None]:
for cols in cov_df.columns:
  cov_df[cols] = pd.to_numeric(cov_df[cols], downcast='float')

In [None]:
cov_df['target'] = cov_df['target'].astype(np.int32)

In [None]:
cov_df['target'] = cov_df['target']-1

In [None]:
cov_df_X = cov_df.copy()
cov_df_y =  cov_df_X.pop('target')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(cov_df_X, cov_df_y, train_size=0.95, test_size=0.05)

In [None]:
X_train_gdf = cudf.DataFrame.from_pandas(X_train)
X_test_gdf = cudf.DataFrame.from_pandas(X_test)
y_train_gdf = cudf.DataFrame.from_pandas(pd.DataFrame(y_train))
y_test_gdf = cudf.DataFrame.from_pandas(pd.DataFrame(y_test))

In [None]:
!nvidia-smi

In [None]:
from cuml import RandomForestClassifier as cuRF
import time

In [None]:
cu_rf_params = {
    'n_estimators': 250,
    'max_depth': 8,
    'n_streams': 1,
    'split_algo': 0
    # 'seed': 1010
}
cu_rf = cuRF(**cu_rf_params)

In [None]:
start_time = time.time()
cu_rf.fit(X_train_gdf, y_train_gdf)
print("GPU Training Time with GPU dataframe: %s seconds" % (str(time.time() - start_time)))

In [None]:
predvalue = cu_rf.predict(X_test_gdf)
predvalue[0]

In [None]:
#cu_rf.score(X_test_gdf, y_test_gdf) --> use GPU frame if using 0.11 version of rapids
cu_rf.score(X_test_gdf, y_test.to_numpy())

In [None]:
pred_out=predvalue.copy()

In [None]:
 from sklearn.metrics import confusion_matrix
 import cupy as cp

In [None]:
confusion_matrix(y_test, predvalue.to_array())