# Vectorized Models

## Emotion Face Classifier Notebook 2

Vectorized models for 2-D image classification is simpler and less computationally expensive than more sophisticated image models (e.g., deep learning, CNNs). 

This notebook generates a numpy array from the data and applies vectorized models to the data. 

See `README.md` for more details on approach benefits and disadvantages. 

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datascifuncs.tidbit_tools import load_json, write_json, check_directory_name

In [None]:
main_dir = 'EmotionFaceClassifier'
check_directory_name(main_dir)

In [None]:
from utils.helpers import (
    load_config,
    load_images_and_labels
)

from utils.vectorized_models import (
    create_train_test_splits
)

# Create Numpy Arrays

Load and combine data into numpy arrays.

Combined data is saved to numpy array files for future reference.

In [None]:
# Load input data paths from JSON
input_data_paths = load_config('./configs/input_mappings.json')
input_paths = input_data_paths["img_directories"]
print(input_paths)

In [None]:
X_train, y_train = create_train_test_splits(input_paths, usage='Training')

In [None]:
X_test, y_test = create_train_test_splits(input_paths, usage='Testing')

In [None]:
# Some models require encoded y classes
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.fit_transform(y_test)

In [None]:
# Set output paths
intermediate_data_path = os.path.join('data', 'intermediate_data')
os.makedirs(intermediate_data_path, exist_ok=True)

train_imgs_path = os.path.join(intermediate_data_path, 'combined_train_images.npy')
train_labels_path = os.path.join(intermediate_data_path, 'combined_train_labels.npy')

test_imgs_path = os.path.join(intermediate_data_path, 'combined_test_images.npy')
test_labels_path = os.path.join(intermediate_data_path, 'combined_test_labels.npy')

In [None]:
# Save the combined datasets
np.save(train_imgs_path, X_train)
np.save(train_labels_path, y_train_encoded)
np.save(test_imgs_path, X_test)
np.save(test_labels_path, y_test_encoded)

In [None]:
# # Example load data
# train_images = np.load('combined_train_images.npy')
# train_labels = np.load('combined_train_labels.npy')
# test_images = np.load('combined_test_images.npy')
# test_labels = np.load('combined_test_labels.npy')

# Vectorized Model Training

In [None]:
# from sklearn.model_selection import train_test_split
# from sklearn.tree import DecisionTreeClassifier 
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
# Load model params from JSON
vectorized_models = load_config('./configs/vectorized_models.json')
print(vectorized_models.keys())

In [None]:
vectorized_models

In [None]:
vectorized_models['LGBM']

In [None]:
for label, model in flat_models.items():
    print(label)
    print(model)

In [None]:
for label, model in models.items():
    print(f"Running {label} model...")    
    # Set dirs and filepaths
    model_output_dir = os.path.join(flat_model_dir, label)
    model_output_path = os.path.join(model_output_dir, 'mdl.pkl')
    metrics_ouput_path = os.path.join(model_output_dir, 'train_metrics.csv')
    cm_ouput_path = os.path.join(model_output_dir, 'train_confusion_matrix.png')

    os.makedirs(model_output_dir, exist_ok=True)
    
    # fit, save, predict
    model.fit(X_train, y_train)
    save_model(model, filename=model_output_path)
    model_preds = model.predict(X_train)
    model_results = get_classification_metrics(y_train, model_preds)

    # Aggregate metrics and save to model dir
    pd.DataFrame(model_results, index=[0]).to_csv(metrics_ouput_path)
    model_metrics.append({label: model_results})

    # Confusion matrix
    int_labels = [int(i) for i in emo_dict.keys()]
    str_labels = [i for i in emo_dict.values()]
    
    cm_disp = ConfusionMatrixDisplay.from_predictions(
        y_true=y_train,
        y_pred=model_preds, 
        cmap='Blues',
        labels=int_labels,
        display_labels=str_labels
    )    
    plt.tight_layout()
    plt.savefig(cm_ouput_path, pad_inches=5)