In [None]:
!pip install xgboost

In [None]:
# Required imports
import pandas as pd
import xgboost as xgb
import numpy as np
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, mean_absolute_percentage_error
from sklearn.metrics import classification_report

from sklearn.multiclass import OneVsOneClassifier

import copy

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV

import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
filename = '/final_key_points.csv'

TARGET_NAMES = ['Alert', 'Neutral', 'Relaxed', 'Anger', 'Fear', 'Anxious']

# read df
CSV_OUTPATH = '/content/drive/MyDrive/Coin Project 1/Development_2'
df = pd.read_csv(CSV_OUTPATH + filename, dtype=int, index_col=0)
print(len(df))

In [None]:
def show_number_of_samples(df):
  print('Number of Samples for Class Alert: ', len(df[df['emotions'] == 0]))
  print('Number of Samples for Class Neutral: ', len(df[df['emotions'] == 1]))
  print('Number of Samples for Class Relaxed: ', len(df[df['emotions'] == 2]))
  print('Number of Samples for Class Anger: ', len(df[df['emotions'] == 3]))
  print('Number of Samples for Class Fear: ', len(df[df['emotions'] == 4]))
  print('Number of Samples for Class Anxious: ', len(df[df['emotions'] == 5]))
  print('---------------------------------------')

In [None]:
show_number_of_samples(df)

In [None]:
def normalize_df(df):
  lengths = []
  for i in range(6):
    lengths.append(len(df[df['emotions'] == int(i)]))
  lengths = [i for i in lengths if i != 0]
  smallest_amount = min(lengths)
  
  # reduce all values to maximum of smallest_amount
  df_return = pd.DataFrame({})
  for i in range(6):
    df_emotion = df[df['emotions'] == int(i)].sample(smallest_amount)
    df_return = df_return.append(df_emotion)
  return df_return;

In [None]:
df_norm = normalize_df(df)
show_number_of_samples(df_norm)

In [None]:
# Train and Test split
train, test = train_test_split(df_norm, test_size=0.2, random_state=42)

In [None]:
# all rows, but not the first column
X_train = train.values[:, 1:]
# all rows, but only the first column
y_train = train.values[:, :1]

X_test = test.values[:, 1:]
y_test = test.values[:, :1]

In [None]:
y_train = y_train.reshape((1, len(y_train))).squeeze()
y_test = y_test.reshape((1, len(y_test))).squeeze()

In [None]:
def grid_search_cv(X_train, X_test, y_train, y_test, model, isOVO):
  learning_rate = [0.001, 0.01, 0.10, 0.20, 0.30]
  max_depth = [3, 5, 8, 10, 20]
  gamma= [0.0, 0.1, 0.2, 1]
  n_estimators = [10, 50, 100, 250]
  reg_alpha = [0.05, 0.1, 0.5, 1, 5]


  hyperparameter_grid = {'learning_rate': learning_rate,
                          'max_depth': max_depth,
                          'gamma': [0.0, 0.1, 0.2, 0.5, 1],
                          'colsample_bytree': [0.3, 0.7, 1.0],
                          'reg_alpha': reg_alpha,
                          'n_estimators': n_estimators,
                          }

  columns = ['Nose_x','Nose_y','L_Eye_x','L_Eye_y','R_Eye_x','R_Eye_y','L_Ear_x','L_Ear_y','R_Ear_x','R_Ear_y','Throat_x','Throat_y','Withers_x','Withers_y','TailSet_x','TailSet_y','L_F_Paw_x','L_F_Paw_y','R_F_Paw_x','R_F_Paw_y','L_F_Wrist_x','L_F_Wrist_y','R_F_Wrist_x','R_F_Wrist_y','L_F_Elbow_x','L_F_Elbow_y','R_F_Elbow_x','R_F_Elbow_y','L_B_Paw_x','L_B_Paw_y','R_B_Paw_x','R_B_Paw_y','L_B_Hock_x','L_B_Hock_y','R_B_Hock_x','R_B_Hock_y','L_B_Stiffle_x','L_B_Stiffle_y','R_B_Stiffle_x','R_B_Stiffle_y', 'back_middle_x', 'back_middle_y', 'tail_tip_x', 'tail_tip_y', 'lip_upper_x', 'lip_upper_y','lip_lower_x','lip_lower_y', 'ear_tip_left_x', 'ear_tip_left_y', 'ear_tip_right_x', 'ear_tip_right_y']


  grid_search_rf = GridSearchCV(estimator = xgb.XGBClassifier(random_state=42), param_grid = hyperparameter_grid, cv = 4, n_jobs = -1, verbose = 0, scoring="accuracy")
  # Fit it
  grid_search_rf.fit(X_train, y_train)
  print("BEST ", grid_search_rf.best_score_)

  if isOVO:
    ovo = OneVsOneClassifier(grid_search_rf)
    # Fit it
    ovo.fit(X_train, y_train)
    # Predict the train data
    result_train = ovo.predict(X_train)
    # Predict the test data
    result_test = ovo.predict(X_test)

    print(f'Model: {str(model)}; Accuracy for Train Set: {accuracy_score(y_train, result_train)}; Accuracy for Test Set: {accuracy_score(y_test, result_test)}')
    print(classification_report(y_test, result_test, target_names=model))
    
    # Feature Importance
    importance_sorted = sorted(zip(grid_search_rf.best_estimator_.feature_importances_, columns), reverse=True)
    print()
    print("Feature Importance: ", importance_sorted)

    rf_params = grid_search_rf.best_params_
    print()
    print("Parameter: ", rf_params)

    feature_values = [i[0] for i in importance_sorted[0:10]]
    feature_names = [i[1] for i in importance_sorted[0:10]]
    indices = list(range(0,10))

    plt.title('Feature Importances')
    plt.barh(range(len(indices)), feature_values, color='b', align='center')
    plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
    plt.xlabel('Relative Importance')
    plt.show()

  else:

    # Predict the train data
    result_train = grid_search_rf.predict(X_train)
    # Predict the test data
    result_test = grid_search_rf.predict(X_test)

    print(f'Model: {str(model)}; Accuracy for Train Set: {accuracy_score(y_train, result_train)}; Accuracy for Test Set: {accuracy_score(y_test, result_test)}')
    print(classification_report(y_test, result_test, target_names=model))
    
    # Feature Importance
    importance_sorted = sorted(zip(grid_search_rf.best_estimator_.feature_importances_, columns), reverse=True)
    print()
    print("Feature Importance: ", importance_sorted)

  #   rf_params = grid_search_rf.best_estimator_.getParams(False)
    rf_params = grid_search_rf.best_params_
    print()
    print("Parameter: ", rf_params)

    feature_values = [i[0] for i in importance_sorted[0:10]]
    feature_names = [i[1] for i in importance_sorted[0:10]]
    indices = list(range(0,10))

    plt.title('Feature Importances')
    plt.barh(range(len(indices)), feature_values, color='b', align='center')
    plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
    plt.xlabel('Relative Importance')
    plt.show()

In [None]:
# Normal
grid_search_cv(X_train, X_test, y_train, y_test, TARGET_NAMES, False)

In [None]:
# OVO
grid_search_cv(X_train, X_test, y_train, y_test, TARGET_NAMES, True)