# Classification on `emnist`

## 1. Create `Readme.md` to document your work

Explain your choices, process, and outcomes.

## 2. Classify all symbols

### Choose a model

Your choice of model! Choose wisely...

### Train away!

Is do you need to tune any parameters? Is the model expecting data in a different format?

### Evaluate the model

Evaluate the models on the test set, analyze the confusion matrix to see where the model performs well and where it struggles.

### Investigate subsets

On which classes does the model perform well? Poorly? Evaluate again, excluding easily confused symbols (such as 'O' and '0').

### Improve performance

Brainstorm for improving the performance. This could include trying different architectures, adding more layers, changing the loss function, or using data augmentation techniques.

## 2. Classify digits vs. letters model showdown

Perform a full showdown classifying digits vs letters:

1. Create a column for whether each row is a digit or a letter
2. Choose an evaluation metric 
3. Choose several candidate models to train
4. Divide data to reserve a validation set that will NOT be used in training/testing
5. K-fold train/test
    1. Create train/test splits from the non-validation dataset 
    2. Train each candidate model (best practice: use the same split for all models)
    3. Apply the model the the test split 
    4. (*Optional*) Perform hyper-parametric search
    5. Record the model evaluation metrics
    6. Repeat with a new train/test split
6. Promote winner, apply model to validation set
7. (*Optional*) Perform hyper-parametric search, if applicable
8. Report model performance

# Work

## Preparation

In [1]:
# Install required packages (once per virtual environment)
%pip install -q numpy pandas matplotlib seaborn scikit-learn tensorflow xgboost scikeras
%reset -f


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Import packages
import os
import string
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import emnist
from IPython.display import display, Markdown

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# ML packages
# Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, classification_report
# Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, KFold, cross_validate
# XGBoost (SVM)
from xgboost import XGBClassifier
# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.utils import to_categorical
# Keras formatting helper
from scikeras.wrappers import KerasClassifier

In [4]:
# Constants
SIZE = 28
REBUILD = True
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # just show ERROR & FATAL warnings

In [5]:
# Define helper functions
def int_to_char(label):
    """Convert an integer label to the corresponding uppercase character. Using Unicode"""
    if label < 10:
        return str(label)
    elif label < 36:
        return chr(label - 10 + ord('A'))
    else:
        return chr(label - 36 + ord('a'))
    
def int_to_type(label):
    """Convert an integer label to type (letter/digit)"""
    if label < 10:
        return 'digit'
    else:
        return 'letter'
    
def class_to_int(emnist_classes): 
    """Define a function that takes a class and returns the integer label"""
    class_list = list(string.digits + string.ascii_uppercase + string.ascii_lowercase)
    label=[]
    for i in emnist_classes: 
        label.append(class_list.index(i))
    return label

def plot_accuracy(history):
    """Plot the training and validation accuracy during the training of a model."""
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    epochs = range(1, len(acc) + 1)
    plt.plot(epochs, acc, 'bo', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

def plot_loss(history):
    """Plot the training and validation loss during the training of a model."""
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# This is a common preprocessing step for neural networks, but may not be necessary in all cases
def normalize_images(images):
    """Normalize the pixel values of the images in the dataset to have zero mean and unit variance."""
    images = np.array(images)
    mean = images.mean()
    std = images.std()
    images = (images - mean) / std
    return images.tolist()

# exercise 1
def display_metrics_symbols(task, model_name, metrics_dict):
    """Display performance metrics and confusion matrix for symbol models."""
    metrics_df = pd.DataFrame()
    cm_df = pd.DataFrame()
    for key, value in metrics_dict[task][model_name].items():
        if type(value) == np.ndarray:
            class_lab = list(string.digits + string.ascii_uppercase + string.ascii_lowercase)
            cm_df = pd.DataFrame(value, index=['actual {}'.format(i) for i in class_lab], columns=['predict {}'.format(i) for i in class_lab])
        else:
            metrics_df[key] = [value]
    display(Markdown(f'# Performance Metrics: {model_name}'))
    display(metrics_df)
    display(Markdown(f'# Confusion Matrix: {model_name}'))
    display(cm_df)

# Define a function that takes row names and a labeled confusion matrix as input to generate a table of top classification classes 
def top_classes(row_names, source_df, class_n=5):
    """display top 5 classificaition classes & numbers for symbols listed 
    row_names = list of row names in the format of ['actual 1', 'actual A', 'actual a']
    source_df = labeled confusion matrix
    class_n = number of top classification classes, default set to 5
    """
    class_df = pd.DataFrame()
    for actuals in row_names: 
        display(Markdown(f'## Top {class_n} classificaition classes for {actuals}'))
        class_df['Class'] = source_df.loc[actuals,].sort_values(ascending=False).nlargest(class_n).index.tolist()
        class_df['Number'] = source_df.loc[actuals,].sort_values(ascending=False).nlargest(class_n).tolist()
        class_df['Percent(%)'] = class_df['Number']/sum(source_df.loc[actuals,])*100
        display(class_df)

# exercise 2
def display_metrics_type(task, model_name, metrics_dict):
    """Display performance metrics and confusion matrix for type models."""
    metrics_df = pd.DataFrame()
    cm_df = pd.DataFrame()
    for key, value in metrics_dict[task][model_name].items():
        if type(value) == np.ndarray:
            cm_df = pd.DataFrame(value, index=['actual digit', 'actual letter'], columns=['predicted digit', 'predicted letter'])
        else:
            metrics_df[key] = [value]
    display(Markdown(f'#### Performance Metrics: {model_name}'))
    display(metrics_df)
    display(Markdown(f'#### Confusion Matrix: {model_name}'))
    display(cm_df)

def display_candidate_model_metrics(task, candidate_metrics_dict):
    """Display performance metrics candidate models trained by 3-fold cv and generate an average table to evaluate models."""
    metrics_df = pd.DataFrame()
    metric_means_df = pd.DataFrame()
    for model_name in candidate_metrics_dict[task]:
        metrics_df = pd.DataFrame(candidate_metrics_dict[task][model_name], index=['CV1', 'CV2', 'CV3']).T
        metrics_df['mean'] = metrics_df.mean(axis=1)
        metric_means_df[model_name] = metrics_df.mean(axis=1)
        display(Markdown(f'#### Performance Metrics: {model_name}'))
        display(metrics_df)
    display(Markdown(f'#### Summary Performance Metrics'))
    display(metric_means_df)

In [6]:
# Load training data
image, label = emnist.extract_training_samples('byclass')
train = pd.DataFrame()
train['class'] = np.array([int_to_char(l) for l in label])
train['image'] = list(image) 
train['image_flat'] = train['image'].apply(lambda x: np.array(x).reshape(-1)) 
train['label'] = label

# Subset training data
train = train[train['class'].isin(['A', 'B', 'C','D','E','F','G'])]

# Count 
train.count()

class         20379
image         20379
image_flat    20379
label         20379
dtype: int64

In [7]:
# Load testing data
image, label = emnist.extract_test_samples('byclass')
valid = pd.DataFrame()
valid['class'] = np.array([int_to_char(l) for l in label])
valid['image'] = list(image) # 28*28 array
valid['image_flat'] = valid['image'].apply(lambda x: np.array(x).reshape(-1)) # array with length: 784
valid['label'] = label

# Subset testing data
valid = valid[valid['class'].isin(['A', 'B', 'C','D','E','F','G'])]

# Count 
valid.count()

class         3449
image         3449
image_flat    3449
label         3449
dtype: int64

## Problem 1: Classify all symbols 
### 1.a Choose a model
Classification will be done using Neural Network. 

In [8]:
# Define all the metrics in a dictionary
metrics_dict = {
    'Classify_all_symbols' : { 
        'neural_network': {
            'confusion_matrix': [],
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1': []
        }
    }, 
    'Classify_all_symbols_validation_with_selected_data' : { 
        'neural_network': {
            'confusion_matrix': [],
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1': []
        }
    }
}

### 1.b Train the model

In [9]:
# Comment if using whole dataset to train 
#train = train[:10000]
#print('Number of unique class values: ', len(train['class'].unique()))

# Comment if using whole dataset to validate 
#valid = valid[:3000]
#print('Number of unique class values: ', len(valid['class'].unique()))

In [10]:
# Neural Network
task = 'Classify_all_symbols from A to G'
model_name = 'neural_network'

# Set random seed for reproducibility
tf.random.set_seed(42)

# Convert data to tensor
train_images = np.array(train['image'])
train_images = np.array(list(map(lambda x: np.reshape(x, (28, 28, 1)), train_images)))
train_images = train_images / 255.0
train_labels = np.array(train['label'])
valid_images = np.array(valid['image'])
valid_images = np.array(list(map(lambda x: np.reshape(x, (28, 28, 1)), valid_images)))
valid_images = valid_images / 255.0
valid_labels = np.array(valid['label'])


In [11]:

# Initialize neural network model
model = Sequential([
    keras.layers.InputLayer(input_shape=(28, 28, 1)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(62, activation='softmax')
])



: 

In [None]:
# Compile the model, specifying the optimizer, loss function, and metrics 
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) 



In [None]:
# Train the model, saving the history of the training process
history = model.fit(train_images, train_labels, epochs=10, batch_size=3500, validation_data=(valid_images, valid_labels))


### 1.c Evaluate the model
#### Overall evalation

In [None]:
# Evaluate the model
loss, acc = model.evaluate(valid_images, valid_labels)
y_pred = np.argmax(model.predict(valid_images), axis=1)

# Calculate performance metrics
prec = precision_score(valid_labels, y_pred, average='weighted')
rec = recall_score(valid_labels, y_pred, average='weighted')
f1 = f1_score(valid_labels, y_pred, average='weighted')
cm = confusion_matrix(valid_labels, y_pred)

# Store performance metrics in dictionary
metrics_dict[task][model_name] = {'accuracy': acc,
                                  'precision': prec,
                                  'recall': rec,
                                  'f1': f1,
                                  'confusion_matrix': cm}

# Display performance metrics
display_metrics_symbols(task, model_name, metrics_dict)

# Plot the training and validation accuracy during the training of the model
plot_accuracy(history)
plot_loss(history)

### 1.d Investigate subsets

In [None]:
class_lab = list(string.digits + string.ascii_uppercase + string.ascii_lowercase)
class_report = classification_report(valid_labels, y_pred, target_names=class_lab, zero_division=0)
print(class_report)

In [None]:
# Create the list of symbols with lowest recall`lowest_recall_char_rowname`
recall_scores_nn = recall_score(valid_labels, y_pred, average=None)
lowest_recall_char = [int_to_char(i) for i in np.argsort(recall_scores_nn)]
lowest_recall_char_rowname = ['actual {}'.format(i) for i in lowest_recall_char]

# create cm pandas dataframe 
cm_df = pd.DataFrame(cm, index=['actual {}'.format(i) for i in class_lab], columns=['pred {}'.format(i) for i in class_lab])

# Find top classification classes for n number of symbols with lowest recall 
n=15
display(top_classes(lowest_recall_char_rowname[0:n], cm_df))
# Specifying symbol and number of classification classes
display(top_classes(['actual m'], cm_df, class_n=10))

Evaluate again, excluding easily confused symbols (such as 'O' and '0'): 


- Results show that accuracy increased by only including those that are easy to classify. 

In [None]:
# Re-evaluation: Exclude easily confused symbols

task = 'Classify_all_symbols_validation_with_selected_data'
model_name = 'neural_network'

# Exclude easily confused symbols in validation data 
excluded = class_to_int(list('0oOiI1lZ2z'))
mask_valid = valid['label'].apply(lambda x: x not in excluded)
valid_nonconfused = valid[mask_valid]

# Convert subset validation data to tensor
nonconfused_images = np.array(valid_nonconfused['image'])
nonconfused_images = np.array(list(map(lambda x: np.reshape(x, (28, 28, 1)), nonconfused_images)))
nonconfused_images = nonconfused_images / 255.0
nonconfused_labels = np.array(valid_nonconfused['label'])

# Evaluate the model
loss, acc = model.evaluate(nonconfused_images, nonconfused_labels)
nonconfused_pred = np.argmax(model.predict(nonconfused_images), axis=1)

# Calculate performance metrics
prec = precision_score(nonconfused_labels, nonconfused_pred, average='weighted')
rec = recall_score(nonconfused_labels, nonconfused_pred, average='weighted')
f1 = f1_score(nonconfused_labels, nonconfused_pred, average='weighted')
cm = confusion_matrix(nonconfused_labels, nonconfused_pred)

# Store performance metrics in dictionary
metrics_dict[task][model_name] = {'accuracy': acc,
                                  'precision': prec,
                                  'recall': rec,
                                  'f1': f1,
                                  'confusion_matrix': cm}

# Make confusion matrix indices more readable
cm_labels = []
for i in np.unique(nonconfused_pred).tolist(): 
    cm_labels.append(int_to_char(i))
cm_labels

#Display performance metrics and confusion matrix for a model.
metrics_df = pd.DataFrame()
cm_df = pd.DataFrame()
for key, value in metrics_dict[task][model_name].items():
    if type(value) == np.ndarray:
        class_lab = list(string.digits + string.ascii_uppercase + string.ascii_lowercase)
        cm_df = pd.DataFrame(value, index=['actual {}'.format(i) for i in cm_labels], columns=['predict {}'.format(i) for i in cm_labels])
    else:
        metrics_df[key] = [value]
display(Markdown(f'# Performance Metrics: {model_name}'))
display(metrics_df)
display(Markdown(f'# Confusion Matrix: {model_name}'))
display(cm_df)

### 1.e Improve performance
Since we know that there are sets of symbols that are commonly confused such as `['1', 'l', 'I', 'i']` or `['o', 'O', 'O']`, we can use ensemble methods with a few model that specializes in differentiating between commonly misclassified symbols sets, and another overall model that handles the rest of the classification task. The final prediction could be a combination of the predictions from both models.

## 2. Classify digits vs. letters model showdown

### 1. Create a column for whether each row is a digit or a letter

In [None]:
# Load 'training/testing' data
image, label = emnist.extract_training_samples('byclass')
train = pd.DataFrame()
train['class'] = np.array([int_to_char(l) for l in label])
train['image'] = list(image) 
train['image_flat'] = train['image'].apply(lambda x: np.array(x).reshape(-1)) 
train['label'] = label
# Add a column with the type corresponding digit/letter
train['type'] = train['label'].apply(lambda x: int_to_type(x))
# Add a column 'isletter' with 0=digit, 1=letter
train['isletter']= train['label'].apply(lambda x: 0 if x<10 else 1)


# Load testing data
image, label = emnist.extract_test_samples('byclass')
valid = pd.DataFrame()
valid['class'] = np.array([int_to_char(l) for l in label])
valid['image'] = list(image) # 28*28 array
valid['image_flat'] = valid['image'].apply(lambda x: np.array(x).reshape(-1)) # array with length: 784
valid['label'] = label
# Add a column with the type corresponding digit/letter
valid['type'] = valid['label'].apply(lambda x: int_to_type(x))
 # Add a column 'isletter' with 0=digit, 1=letter
valid['isletter']= valid['label'].apply(lambda x: 0 if x<10 else 1)

### 2. Choose an evaluation metric 
### 3. Choose several candidate models to train
both specified in matrix dictionary

In [None]:
candidate_metrics_dict = {
    'digit_vs_letter' : { 
        'logistic_regression': {
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1': []
        },
        'xgboost': {
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1': []
        },
        'random_forest': {
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1': []
        },
        'neural_network': {
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1': []
        }
    }
}

### 4. Divide data to reserve a validation set that will NOT be used in training/testing

In [None]:
"""Merge data and subset out"""
merged = pd.concat([train, valid], axis=0, ignore_index=True)

# Group by class
grouped = merged.groupby('class')
# Number of samples from each group
n_sample=round(len(merged)/10/62, None)
# Sample a subset from each group
merged_sampled = grouped.apply(lambda x: x.sample(n_sample, random_state=42)).reset_index(drop=True)

# Display number samples 
print('Number of samples in `merged_sampled`: ', len(merged_sampled))
# Check that all 62 classes are present
print('Number of unique class values: ', len(merged_sampled['class'].unique()))

In [None]:
# Divide data to reserve a validation set that will NOT be used in training/testing
shuffled_df = merged_sampled.sample(frac=1, random_state=42).reset_index(drop=True)
# Create a 7:3 train:validation data split
split_index = round(len(shuffled_df)*0.7, None)
train = shuffled_df[:split_index]
valid = shuffled_df[split_index:]

# Check that all 62 classes are present
# In `train` (70% of the data)
print('Number of samples in `train`: ', len(train))
print('Number of unique class values: ', len(train['class'].unique()))
# In `valid` (30% of the data)
print('Number of samples in `valid`: ', len(valid))
print('Number of unique class values: ', len(valid['class'].unique()))

### 5. K-fold train/test
        1. Create train/test splits from the non-validation dataset 
        2. Train each candidate model (best practice: use the same split for all models)
        3. Apply the model the the test split 
        4. (*Optional*) Perform hyper-parametric search
        5. Record the model evaluation metrics
        6. Repeat with a new train/test split

In [None]:
# Whole/subset data selection

# Comment for bigger model building
#train_subset = train[0:10000]
#valid_subset = valid[0:2000]

# Comment for small model building 
train_subset = train
valid_subset = valid

In [None]:
# Create train/test splits from the non-validation dataset 
# Initialize k-fold cross-validation: Create train/test splits from `train_subset`, use this cross-validaiton split for all models 
kfold = KFold(n_splits=3, shuffle=False)

In [None]:
# digit_vs_letter Classifier: RandomForest
task = 'digit_vs_letter'
model_name = 'random_forest'


# Initialize random forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train and evaluate model: perform k-fold cross-validation 
cvscore = cross_validate(rf_clf, 
                         train_subset['image_flat'].tolist(), 
                         train_subset['isletter'], 
                         scoring=('accuracy', 'precision', 'recall', 'f1'), 
                         cv=kfold, n_jobs=-2, return_indices=False)
acc = cvscore['test_accuracy']
prec = cvscore['test_precision']
rec = cvscore['test_recall']
f1 = cvscore['test_f1']

# Store performance metrics in dictionary
candidate_metrics_dict[task][model_name] = {'accuracy': acc,
                                            'precision': prec,
                                            'recall': rec,
                                            'f1': f1}

In [None]:
# digit_vs_letter Classifier: Logistic Regression 
task = 'digit_vs_letter'
model_name = 'logistic_regression'

# Initialize logistic regression classifier
lr_clf = LogisticRegression(max_iter=1000, random_state=42)

# Scale the data
# When running without scaling the data, the model does not converge
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_subset['image_flat'].tolist())

# Initialize logistic regression classifier
lr_clf = LogisticRegression(max_iter=1000, random_state=42)

# Train and evaluate model: perform k-fold cross-validation 
cvscore = cross_validate(lr_clf, 
                         train_scaled, 
                         train_subset['isletter'], 
                         scoring=('accuracy', 'precision', 'recall', 'f1'), 
                         cv=kfold, n_jobs=-2, return_indices=False)
acc = cvscore['test_accuracy']
prec = cvscore['test_precision']
rec = cvscore['test_recall']
f1 = cvscore['test_f1']

# Store performance metrics in dictionary
candidate_metrics_dict[task][model_name] = {'accuracy': acc,
                                            'precision': prec,
                                            'recall': rec,
                                            'f1': f1}

In [None]:
# digit_vs_letter Classifier: XGBoost
task = 'digit_vs_letter'
model_name = 'xgboost'

# Initialize XGBoost classifier
xgb_clf = XGBClassifier(n_estimators=100, random_state=42)

# Train and evaluate model: perform k-fold cross-validation 
cvscore = cross_validate(xgb_clf, 
                         train_subset['image_flat'].tolist(), 
                         train_subset['isletter'], 
                         scoring=('accuracy', 'precision', 'recall', 'f1'), 
                         cv=kfold, n_jobs=-2, return_indices=False)
acc = cvscore['test_accuracy']
prec = cvscore['test_precision']
rec = cvscore['test_recall']
f1 = cvscore['test_f1']

# Store performance metrics in dictionary
candidate_metrics_dict[task][model_name] = {'accuracy': acc,
                                            'precision': prec,
                                            'recall': rec,
                                            'f1': f1}

In [None]:
# digit vs letter Classifier: Neural Network
task = 'digit_vs_letter'
model_name = 'neural_network'

# Set random seed for reproducibility
tf.random.set_seed(42)

# Convert data to tensor
train_images = np.array(train_subset['image'])
train_images = np.array(list(map(lambda x: np.reshape(x, (28, 28, 1)), train_images)))
train_images = train_images / 255.0
train_type = np.array(train_subset['isletter'])

# Initialize neural network model
def build_nn_model(): 
    model = Sequential([
        keras.layers.InputLayer(input_shape=(28, 28, 1)),
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model 

# wrap the Keras model inside a scikit-learn compatible wrapper 
keras_wrapped_nnmodel = KerasClassifier(build_fn=build_nn_model, epochs=5, verbose=1)

# Train and evaluate model: perform k-fold cross-validation 
cvscore = cross_validate(keras_wrapped_nnmodel, train_images, train_type,
                         scoring=('accuracy', 'precision', 'recall', 'f1'), 
                         cv=kfold, verbose=1, return_indices=False)

acc = cvscore['test_accuracy']
prec = cvscore['test_precision']
rec = cvscore['test_recall']
f1 = cvscore['test_f1']

# Store performance metrics in dictionary
candidate_metrics_dict[task][model_name] = {'accuracy': acc,
                                            'precision': prec,
                                            'recall': rec,
                                            'f1': f1}

In [None]:
display_candidate_model_metrics(task, candidate_metrics_dict)

### 6. Promote winner, apply model to validation set: 
**Random forest model** has the <u>highest recall<u>

In [None]:
# Fit data on validation set

# digit_vs_letter Classifier: RandomForest
task = 'digit_vs_letter_final_model'
model_name = 'random_forest'


# Initialize random forest classifier
rf_cl_dvl = RandomForestClassifier(n_estimators=100, random_state=42)

# Train and generate predictions
rf_cl_dvl.fit(train_subset['image_flat'].tolist(), train_subset['isletter'])
y_pred = rf_cl_dvl.predict(valid_subset['image_flat'].tolist())

# Evaluate model
acc = accuracy_score(valid_subset['isletter'], y_pred)
prec = precision_score(valid_subset['isletter'], y_pred)
rec = recall_score(valid_subset['isletter'], y_pred)
f1 = f1_score(valid_subset['isletter'], y_pred)
cm = confusion_matrix(valid_subset['isletter'], y_pred)

# Store evaluation metrics in dictionary 
final_model_metrics = {task:{model_name:{}}}
final_model_metrics[task][model_name] = {'accuracy': acc,
                                         'precision': prec,
                                         'recall': rec,
                                         'f1': f1,
                                         'confusion_matrix': cm}

### 7. (*Optional*) Perform hyper-parametric search, if applicable
N/A

### 8. Report model performance

In [None]:
#Display performance metrics and confusion matrix for a model.
metrics_df = pd.DataFrame()
cm_df = pd.DataFrame()
for key, value in final_model_metrics[task][model_name].items():
    if type(value) == np.ndarray:
        cm_df = pd.DataFrame(value, index=['actual digit', 'actual letter'], columns=['predicted digit', 'predicted letter'])
    else:
        metrics_df[key] = [value]
display(Markdown(f'# Performance Metrics: {model_name}'))
display(metrics_df)
display(Markdown(f'# Confusion Matrix: {model_name}'))
display(cm_df)