# Vectorized Models

## Emotion Face Classifier Notebook 2

Vectorized models for 2-D image classification is simpler and less computationally expensive than more sophisticated image models (e.g., deep learning, CNNs). 

This notebook generates a numpy array from the data and applies vectorized models to the data. 

See `README.md` for more details on approach benefits and disadvantages. 

In [3]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [4]:
def check_directory_name(target_name) -> bool:
    """
    Check if the current directory name matches the target_name.
    If not, move up a directory and repeat the check.
    
    Args:
        target_name (str): The directory name to match.
        
    Returns:
        bool: True if the current directory name matches the target_name, False otherwise.
    """
    # Get the current directory path
    current_dir = os.getcwd()
    # Extract the directory name from the path
    current_dir_name = os.path.basename(current_dir)    
    # Check if the current directory name matches the target_name
    if current_dir_name == target_name:
        print(f'Directory set to {current_dir}, matches target dir sting {target_name}.')
        return True
    else:
        # Move up a directory
        os.chdir('..')
        # Check if we have reached the root directory
        if os.getcwd() == current_dir:
            return False
        # Recursively call the function to check the parent directory
        return check_directory_name(target_name)

In [5]:
main_dir = 'EmotionFaceClassifier'
check_directory_name(main_dir)

Directory set to /Users/dsl/Documents/GitHub/EmotionFaceClassifier, matches target dir sting EmotionFaceClassifier.


True

In [6]:
from utils.helpers import (
    load_config
)

from utils.helpers import (
    load_images_and_labels
)

# Create Numpy Arrays

In [11]:
# Load FER 2013 data
fer2013_train_imgs, fer2013_train_lbls = load_images_and_labels('data/fer2013/Training')
fer2013_test_imgs, fer2013_test_lbls = load_images_and_labels('data/fer2013/Testing')

In [13]:
# Load FRD 2020 dataset
frd2020_train_imgs, frd2020_train_lbls = load_images_and_labels('data/frd2020/Training')
frd2020_test_imgs, frd2020_test_lbls = load_images_and_labels('data/frd2020/Testing')

In [14]:
# Combine image datasets
combined_train_images = np.concatenate((fer2013_train_imgs, frd2020_train_imgs), axis=0)
combined_test_images = np.concatenate((fer2013_test_imgs, frd2020_test_imgs), axis=0)

In [15]:
# Encode and combine labels
le = LabelEncoder()

combined_train_labels = np.concatenate((fer2013_train_lbls, frd2020_train_lbls), axis=0)
combined_test_labels = np.concatenate((fer2013_test_lbls, frd2020_test_lbls), axis=0)

combined_train_labels_enc = le.fit_transform(combined_train_labels)
combined_test_labels_enc = le.transform(combined_test_labels)

In [16]:
# Set output paths
intermediate_data_path = os.path.join('data', 'intermediate_data')
os.makedirs(intermediate_data_path, exist_ok=True)

train_imgs_path = os.path.join(intermediate_data_path, 'combined_train_images.npy')
train_labels_path = os.path.join(intermediate_data_path, 'combined_train_labels.npy')

test_imgs_path = os.path.join(intermediate_data_path, 'combined_test_images.npy')
test_labels_path = os.path.join(intermediate_data_path, 'combined_test_labels.npy')

In [17]:
# Save the combined datasets
np.save(train_imgs_path, combined_train_images)
np.save(train_labels_path, combined_train_labels_enc)
np.save(test_imgs_path, combined_test_images)
np.save(test_labels_path, combined_test_labels_enc)

In [18]:
# # Example load data
# train_images = np.load('combined_train_images.npy')
# train_labels = np.load('combined_train_labels.npy')
# test_images = np.load('combined_test_images.npy')
# test_labels = np.load('combined_test_labels.npy')

# Vectorized Model Training

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import ConfusionMatrixDisplay

In [21]:
# Load model params from JSON
flat_models = load_config('./configs/vectorized_models.json')
print(flat_models.keys())

dict_keys(['LGBM', 'LogisticRegression', 'DecisionTree', 'RandomForest', 'XGBoost'])


In [34]:
for label, model in flat_models.items():
    print(label)
    print(model)

LGBM
{'module': 'lightgbm', 'class': 'LGBMClassifier', 'params': {}}
LogisticRegression
{'module': 'sklearn.linear_model', 'class': 'LogisticRegression', 'params': {'multi_class': 'multinomial', 'max_iter': 500}}
DecisionTree
{'module': 'sklearn.tree', 'class': 'DecisionTreeClassifier', 'params': {}}
RandomForest
{'module': 'sklearn.ensemble', 'class': 'RandomForestClassifier', 'params': {}}
XGBoost
{'module': 'xgboost', 'class': 'XGBClassifier', 'params': {'eval_metric': 'mlogloss'}}


In [None]:
for label, model in models.items():
    print(f"Running {label} model...")    
    # Set dirs and filepaths
    model_output_dir = os.path.join(flat_model_dir, label)
    model_output_path = os.path.join(model_output_dir, 'mdl.pkl')
    metrics_ouput_path = os.path.join(model_output_dir, 'train_metrics.csv')
    cm_ouput_path = os.path.join(model_output_dir, 'train_confusion_matrix.png')

    os.makedirs(model_output_dir, exist_ok=True)
    
    # fit, save, predict
    model.fit(X_train, y_train)
    save_model(model, filename=model_output_path)
    model_preds = model.predict(X_train)
    model_results = get_classification_metrics(y_train, model_preds)

    # Aggregate metrics and save to model dir
    pd.DataFrame(model_results, index=[0]).to_csv(metrics_ouput_path)
    model_metrics.append({label: model_results})

    # Confusion matrix
    int_labels = [int(i) for i in emo_dict.keys()]
    str_labels = [i for i in emo_dict.values()]
    
    cm_disp = ConfusionMatrixDisplay.from_predictions(
        y_true=y_train,
        y_pred=model_preds, 
        cmap='Blues',
        labels=int_labels,
        display_labels=str_labels
    )    
    plt.tight_layout()
    plt.savefig(cm_ouput_path, pad_inches=5)