In [6]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier, plot_importance

from sklearn.metrics import plot_confusion_matrix, classification_report, confusion_matrix, multilabel_confusion_matrix
from random import sample
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

# Read Data

In [7]:
trainDF = pd.read_csv("HSV_LBPs_train.csv")
train_imagesF = trainDF.iloc[:,:-1].copy()
train_labels = trainDF.iloc[:,-1].copy()

testDF = pd.read_csv("HSV_LBPs_test.csv")
test_imagesF = testDF.iloc[:,:-1].copy()
test_labels = testDF.iloc[:,-1].copy()

valDF = pd.read_csv("HSV_LBPs_val.csv")
val_imagesF = valDF.iloc[:,:-1].copy()
val_labels = valDF.iloc[:,-1].copy()

In [21]:
def topk_Acc(k, RealLabel, PreLabelProb):
    """
    k: top k
    RealLabel: shape -> (num, 1)
    PreLabelProb: shape -> (num, class_num)
    """
    max_k_preds = PreLabelProb.argsort(axis=1)[:, -k:][:, ::-1] #得到top-k label
    match_array = np.logical_or.reduce(max_k_preds==RealLabel, axis=1) #得到匹配結果
    topk_acc_score = match_array.sum() / match_array.shape[0]
    
    return round(topk_acc_score, 4)

# 建模

## Random Forest

In [5]:
forest = RandomForestClassifier(n_jobs=15, random_state=2021)
final_RF = forest.fit(train_imagesF, train_labels)

In [25]:
### validation
val_real = np.array(val_labels).reshape(450,1)
val_pred_score = final_RF.predict_proba(val_imagesF)

### test
test_real = np.array(test_labels).reshape(450,1)
test_pred_score = final_RF.predict_proba(test_imagesF)

top1_Acc_val = topk_Acc(1,val_real,val_pred_score)
top5_Acc_val = topk_Acc(5, val_real, val_pred_score)

top1_Acc_test = topk_Acc(1, test_real, test_pred_score)
top5_Acc_test = topk_Acc(5, test_real, test_pred_score)

print("-----Random Forest-----")
print("Top 1 Validation Accuracy: ", top1_Acc_val)
print("Top 5 Validation Accuracy: ", top5_Acc_val)
print("Top 1 Test Accuracy: ", top1_Acc_test)
print("Top 5 Test Accuracy: ", top5_Acc_test)

-----Random Forest-----
Top 1 Validation Accuracy:  0.1178
Top 5 Validation Accuracy:  0.3289
Top 1 Test Accuracy:  0.0978
Top 5 Test Accuracy:  0.3178


## XGBoost

In [23]:
xbgc = XGBClassifier(random_state = 2020, n_jobs = 15)
xbgc.fit(train_imagesF.values, train_labels)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=15, num_parallel_tree=1,
              objective='multi:softprob', random_state=2020, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [26]:
### validation
val_real = np.array(val_labels).reshape(450,1)
val_pred_score = xbgc.predict_proba(val_imagesF)

### test
test_real = np.array(test_labels).reshape(450,1)
test_pred_score = xbgc.predict_proba(test_imagesF)

top1_Acc_val = topk_Acc(1,val_real,val_pred_score)
top5_Acc_val = topk_Acc(5, val_real, val_pred_score)

top1_Acc_test = topk_Acc(1, test_real, test_pred_score)
top5_Acc_test = topk_Acc(5, test_real, test_pred_score)

print("-----XGBoost-----")
print("Top 1 Validation Accuracy: ", top1_Acc_val)
print("Top 5 Validation Accuracy: ", top5_Acc_val)
print("Top 1 Test Accuracy: ", top1_Acc_test)
print("Top 5 Test Accuracy: ", top5_Acc_test)

-----XGBoost-----
Top 1 Validation Accuracy:  0.1244
Top 5 Validation Accuracy:  0.3444
Top 1 Test Accuracy:  0.1044
Top 5 Test Accuracy:  0.3289
