In [1]:
import os
import pickle
import numpy as np

In [2]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict


In [3]:
def rgb2gray(rgb):
    r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return gray

def load_cifar100_data(data_dir):
    # Load training data
    train_file = os.path.join(data_dir, 'train')
    train_data_dict = unpickle(train_file)
    x_train = train_data_dict[b'data']
    y_train = train_data_dict[b'fine_labels']  # or b'coarse_labels' for coarse labels

    # Load testing data
    test_file = os.path.join(data_dir, 'test')
    test_data_dict = unpickle(test_file)
    x_test = test_data_dict[b'data']
    y_test = test_data_dict[b'fine_labels']  # or b'coarse_labels' for coarse labels

    # Load meta data
    meta_file = os.path.join(data_dir, 'meta')
    meta_data_dict = unpickle(meta_file)
    label_names = meta_data_dict[b'fine_label_names']  # or b'coarse_label_names'

    # Reshape and convert to grayscale
    x_train = x_train.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
    x_test = x_test.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
    x_train = np.array([rgb2gray(img) for img in x_train])
    x_test = np.array([rgb2gray(img) for img in x_test])

    x_train = x_train.astype('uint8')
    x_test = x_test.astype('uint8')
    return (x_train, y_train), (x_test, y_test), label_names


In [4]:
from util import Utils

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [5]:
(cifar_x_train, cifar_y_train), (cifar_x_test, cifar_y_test), label_names = load_cifar100_data('cifar-100-python')

In [6]:
# Reshape the data: CIFAR-100 images are 32x32 pixels with 3 color channels (RGB)
cifar_x_train = cifar_x_train.reshape(-1, 32 * 32)
cifar_x_test = cifar_x_test.reshape(-1, 32 * 32)

# Normalize pixel values
cifar_x_train, cifar_x_test = cifar_x_train / 255.0, cifar_x_test / 255

In [7]:
cifar_x_train = cifar_x_train.astype('float32')
cifar_x_test = cifar_x_test.astype('float32')

In [8]:
from sklearn import model_selection
cifar_X_train, cifar_x_val, cifar_Y_train, cifar_y_val = model_selection.train_test_split(cifar_x_train, cifar_y_train, test_size=1/3, random_state=42)


In [13]:
best_cifar_model = Utils.find_best_rf_parameters(cifar_X_train, cifar_Y_train, cifar_x_val, cifar_y_val)

|   n_estimators | criterion   |   max_depth | max_features   |   score in validation set |
|---------------:|:------------|------------:|:---------------|--------------------------:|
|            100 | gini        |         100 | log2           |                 0.147957  |
|             50 | gini        |         100 | log2           |                 0.127677  |
|             10 | gini        |         100 | log2           |                 0.0813584 |
|            100 | gini        |         100 | sqrt           |                 0.148557  |
|             50 | gini        |         100 | sqrt           |                 0.130977  |
|             10 | gini        |         100 | sqrt           |                 0.0803984 |
|            100 | gini        |          50 | log2           |                 0.151257  |
|             50 | gini        |          50 | log2           |                 0.128517  |
|             10 | gini        |          50 | log2           |                 

In [15]:
Utils.accuracy_measure_rf(cifar_x_train, cifar_y_train, 
                          cifar_x_test, cifar_y_test, 
                          n_estimators = 100, criterion = 'gini', max_depth = 50, max_features = 'log2')

--- 24.400174379348755 seconds ---


(0.1619,
 (array([0.30701754, 0.15808824, 0.11214953, 0.09848485, 0.08130081,
         0.12745098, 0.07368421, 0.12571429, 0.23170732, 0.27433628,
         0.09782609, 0.15      , 0.13265306, 0.11363636, 0.04761905,
         0.05084746, 0.31944444, 0.14973262, 0.03225806, 0.07954545,
         0.31891892, 0.20754717, 0.27631579, 0.13392857, 0.28837209,
         0.07317073, 0.16981132, 0.0661157 , 0.29126214, 0.34328358,
         0.05389222, 0.10891089, 0.2       , 0.06358382, 0.05813953,
         0.01886792, 0.16197183, 0.1       , 0.09271523, 0.28333333,
         0.31578947, 0.54761905, 0.125     , 0.08108108, 0.06122449,
         0.06896552, 0.13636364, 0.12650602, 0.43137255, 0.11464968,
         0.09615385, 0.09734513, 0.187251  , 0.20224719, 0.12087912,
         0.1       , 0.16      , 0.26229508, 0.15217391, 0.08219178,
         0.23030303, 0.32786885, 0.07575758, 0.13986014, 0.10465116,
         0.05952381, 0.08571429, 0.06382979, 0.26060606, 0.19161677,
         0.10843373, 0.17

In [None]:
best_xgb_model = Utils.find_best_xgb_parameters(cifar_X_train, cifar_Y_train, cifar_x_val, cifar_y_val)

In [None]:
Utils.accuracy_measure_xgb(cifar_x_train, cifar_y_train, 
                          cifar_x_test, cifar_y_test, 
                           n_estimators = 100, learning_rate = 0.1, max_depth = 50, subsample = 0.8, colsample_bytree = 1)