#### Introduction 📝 🎯 Goal:Multi Class classification based on features

📖 Data:

train.csv / test.csv - the training and testing set

Submissions are evaluated on accuracy of the multi class classification



In [None]:
import numpy as np 
import pandas as pd 
import math
import glob
import os
import gc
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
import time
%matplotlib inline

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [None]:
train=pd.read_csv('../input/tabular-playground-series-dec-2021/train.csv')
test=pd.read_csv('../input/tabular-playground-series-dec-2021/test.csv')
sample_submission=pd.read_csv('../input/tabular-playground-series-dec-2021/sample_submission.csv')

In [None]:
print("Train has", train.shape[0], "elements")
print("Test has", test.shape[0], "elements")

In [None]:
train.head(10)

### Lets take a look if there are NA values in the data

In [None]:
dd=(train.isna().sum()/train.shape[0]).reset_index(name='percentage_na')
dd=dd.set_index('index')
dd.T

### Xgboost in Rapids

In [None]:
import cudf
import pandas as pd

import pynvml
import numpy as np
import xgboost as xgb

features=train.iloc[:,1:55].columns.tolist()
train['Cover_Type']=train['Cover_Type'].apply(lambda x:  x-1)
data = cudf.from_pandas(train)
for col in features:
    data[col]=data[col].astype('float32')
## spliting training and test set
from cuml import train_test_split
X=data[features]
y=data.iloc[:,55]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [None]:
from cuml.ensemble import RandomForestClassifier
X_train.fillna(0, inplace=True)
model = RandomForestClassifier(n_estimators = 140, max_depth =10 )
model.fit(X_train, y_train)

In [None]:
from cuml.metrics import accuracy_score
from cupy import asnumpy
X_test.fillna(0, inplace=True)
rf_prediction = model.predict(X_test)



In [None]:

#rf_predictions=np.round(rf_prediction,0).astype(int)
cu_score = accuracy_score( y_test, rf_prediction )

print("cuml Accuracy: %.2f%%" % (cu_score * 100.0))

In [None]:
start_rapids = time.time()
dtrain = xgb.DMatrix(
        X_train,
        y_train    )

dtest = xgb.DMatrix(
        X_test,
        y_test    )

## Train the model
trained_model = xgb.train(
                        {
                          'nround': 100,
                          'max_depth': 4,
                          'max_leavs': 2**8,
                          'loss': 'ls',
                          'objective': 'multi:softprob',
                          'n_estimators':120,
                          'max_features': 'auto',
                          'criterion' : 'friedman_mse',
                          'grow_policy': 'lossguide',
                          'silent': True,
                          'num_class':7,
                          'verbose_eval': True,
                          'tree_method':'gpu_hist',
                        },
                        dtrain,
                        num_boost_round=400, evals=[(dtrain, 'train')])

## Predict the model

end_rapids = time.time()
print("Total time taken", end_rapids-start_rapids, "seconds")

In [None]:
from cuml.metrics import accuracy_score
from cupy import asnumpy

prediction = trained_model.predict(dtest)
prediction=np.argmax(prediction,axis=1)
cu_score = accuracy_score( y_test, prediction )

print("cuml Accuracy: %.2f%%" % (cu_score * 100.0))

In [None]:
test.fillna(0, inplace=True)
test[features]=test[features].astype('float32')
test_data = cudf.from_pandas(test[features])
dtest_actual = xgb.DMatrix(
        test_data  )
boost_pred=trained_model.predict(dtest_actual)
boost_pred=np.argmax(boost_pred,axis=1)
rf_pred=model.predict(test_data)


In [None]:
final_preds=asnumpy(boost_pred)
test['Cover_Type']=np.round(final_preds)
test['Cover_Type']=test['Cover_Type'].astype(int)
test['Cover_Type']=test['Cover_Type']+1

In [None]:
test[['Id','Cover_Type']].to_csv('submission.csv', index=False)

### This is a WIP notebook. Please upvote if you find the notebook Useful. I will be working on adding new features and visualizations.
