# Monkey Species Identifier

## Initialization

### Imports

In [0]:
import numpy as np
import joblib

from google.colab import drive

from sklearn.metrics import precision_recall_fscore_support

In [0]:
# Mount google drive
drive.mount("drive")

Drive already mounted at drive; to attempt to forcibly remount, call drive.mount("drive", force_remount=True).


### Load Dataset

In [0]:
DATASET_PATH = "drive/My Drive/dataset/dataset_monkey.joblib"
X_train, Y_train = joblib.load(DATASET_PATH)

DATASET_PATH = "drive/My Drive/dataset/dataset_monkey_test.joblib"
X_test, Y_test = joblib.load(DATASET_PATH)

### Set variables

In [0]:
HEIGHT = X_train.shape[1]
WIDTH = X_train.shape[2]
CHANNELS = X_train.shape[3]

TRAIN_SAMPLES = X_train.shape[0]
TEST_SAMPLES = X_test.shape[0]
BATCH_SIZE = 32
CLASSES = 10
EPOCH_STEPS = int(TRAIN_SAMPLES / BATCH_SIZE)
EPOCHS = 50

NN_OUTPUT_HEIGHT = 7
NN_OUTPUT_WIDTH = 7
NN_OUTPUT_CHANNELS = 2048

## Feature Extraction - Xception Network

In [0]:
from keras.applications.xception import Xception

Using TensorFlow backend.


In [0]:
nn_model = Xception(
  weights="imagenet",
  input_shape=(HEIGHT, WIDTH, CHANNELS),
  include_top=False
)

### Training dataset

In [0]:
X_train_reduced = np.zeros((TRAIN_SAMPLES, NN_OUTPUT_HEIGHT, NN_OUTPUT_WIDTH, NN_OUTPUT_CHANNELS), dtype=np.float32)

for i in range(TRAIN_SAMPLES):
  X_train_reduced[i, :, :, :] = nn_model.predict(X_train[i:i+1, :, :, :])

# Cleanup memory
del X_train
  
X_train = np.reshape(X_train_reduced, (TRAIN_SAMPLES, 7 * 7 * 2048))

### Test dataset

In [0]:
X_test_reduced = np.zeros((TEST_SAMPLES, NN_OUTPUT_HEIGHT, NN_OUTPUT_WIDTH, NN_OUTPUT_CHANNELS), dtype=np.float32)

for i in range(TEST_SAMPLES):
  X_test_reduced[i, :, :, :] = nn_model.predict(X_test[i:i+1, :, :, :])

# Cleanup memory
del X_test
  
X_test = np.reshape(X_test_reduced, (TEST_SAMPLES, 7 * 7 * 2048))

## Feature Extraction -  PCA

In [0]:
from sklearn.decomposition import PCA

In [0]:
pca = PCA(n_components=3291)

### Training dataset

In [0]:
X_train = pca.fit_transform(X_train.reshape(TRAIN_SAMPLES, HEIGHT*WIDTH*CHANNELS))

### Test dataset

In [0]:
X_test = pca.transform(X_test.reshape(TEST_SAMPLES, HEIGHT*WIDTH*CHANNELS))

# Classification - Neural Network

### Model

In [0]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

In [0]:
model = Sequential()
model.add(Dense(512, activation='relu', input_dim=7*7*2048))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['acc'])

### Training

In [0]:
history = model.fit(
  X_train, 
  Y_train, 
  epochs=25,
  batch_size=BATCH_SIZE,
  shuffle=True
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [0]:
# Predict values
Y_pred = model.predict(X_test)

# Convert predicted values to labels
Y_pred_labels = [x.argmax() for x in Y_pred]
Y_test_labels = [x.argmax() for x in Y_test]

In [0]:
print(precision_recall_fscore_support(Y_test_labels, Y_pred_labels))

(array([0.875     , 0.94318182, 0.9625    , 0.94736842, 0.92682927,
       0.98823529, 1.        , 0.61029412, 0.83333333, 0.975     ]), array([0.98717949, 0.98809524, 0.95061728, 1.        , 0.97435897,
       1.        , 0.97435897, 0.98809524, 0.0617284 , 1.        ]), array([0.92771084, 0.96511628, 0.95652174, 0.97297297, 0.95      ,
       0.99408284, 0.98701299, 0.75454545, 0.11494253, 0.98734177]), array([78, 84, 81, 90, 78, 84, 78, 84, 81, 78]))


# Classification - Support Vector Machine

### Model

In [0]:
from sklearn.svm import SVC

In [0]:
model = SVC(C=10.0)

In [0]:
# Convert on hot encoding to labels
Y_train_labels = [x.argmax() for x in Y_train]

### Training

In [0]:
model.fit(X_train, Y_train_labels)

SVC(C=10.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [0]:
# Predict values
Y_pred_labels = model.predict(X_test)

# Convert test one hot encoding to labels
Y_test_labels = [x.argmax() for x in Y_test]

In [0]:
print(precision_recall_fscore_support(Y_test_labels, Y_pred_labels))

(array([1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.97402597, 0.95454545, 0.96385542, 0.98734177]), array([0.98717949, 1.        , 0.96296296, 0.97777778, 1.        ,
       1.        , 0.96153846, 1.        , 0.98765432, 1.        ]), array([0.99354839, 1.        , 0.98113208, 0.98876404, 1.        ,
       1.        , 0.96774194, 0.97674419, 0.97560976, 0.99363057]), array([78, 84, 81, 90, 78, 84, 78, 84, 81, 78]))
