<a href="https://colab.research.google.com/github/ashsProjects/Handwriting_to_3DPrinted_Braille/blob/main/rf_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report

In [None]:
# Mount your Google Drive.
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
kaggle_creds_path = "//content/drive/MyDrive/CS370_Project/"

In [None]:
! pip install kaggle --quiet

In [None]:
! mkdir ~/.kaggle
! cp /content/drive/MyDrive/CS370_Project/kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Downloading dataset
! kaggle datasets download -d crawford/emnist --path '/usr/local' --unzip

Downloading emnist.zip to /usr/local
 99% 1.23G/1.24G [00:17<00:00, 96.9MB/s]
100% 1.24G/1.24G [00:17<00:00, 77.3MB/s]


In [None]:
# Creating df
test_df = pd.read_csv('/usr/local/emnist-balanced-test.csv', header = None)
train_df = pd.read_csv('/usr/local/emnist-balanced-train.csv', header = None)

test_df.shape, train_df.shape

((18800, 785), (112800, 785))

In [None]:
#Merging data
df = pd.concat([test_df, train_df], ignore_index = True)
df.shape

NameError: ignored

In [None]:
#Reading file for df
label_map = pd.read_csv("/usr/local/emnist-balanced-mapping.txt", delimiter = ' ', index_col = 0, header = None)
label_map = label_map.iloc[:, 0]

#Creating empty dictionary
label_dict = {}

#Looping for ASCII to char conversion
for index, label in enumerate(label_map):
  label_dict[index] = chr(label)

#Visualize labels
#label_dict

In [None]:
#Preprocessing data
#Remove missing values
df.dropna(inplace = True)
num_missing_values = df.isnull().sum().sum()

#Remove duplicates
df.drop_duplicates(inplace = True)
num_duplicates = df.duplicated().sum()

#Set grayscale values from 0 to 255
df = np.clip(df, 0, 255)
num_values_out_range = ((df < 0) | (df > 255)).sum().sum()

#Printing values
print(f"# of missing values: {num_missing_values}")
print(f"# of duplicates: {num_duplicates}")
print(f"# of values outside of 0 - 255: {num_values_out_range}")

In [None]:
#Splitting data into x and y
x = df.loc[:, 1:]
y = df.loc[:, 0]

#Checking size of data
x.shape, y.shape

In [None]:
#Looking at sample image
sample_img = x.iloc[50]
sample_label = y.iloc[50]

plt.imshow(sample_img.values.reshape(28, 28), cmap = plt.cm.gray)
plt.show

In [None]:
def flip_rotate(image):
  image = image.reshape(28, 28)
  image = np.fliplr(image)
  image = np.rot90(image)
  return image

In [None]:
#Converting df to numpy array
xnp = np.asarray(x)
xnp.shape

In [None]:
#Reshaping images
xnp = np.apply_along_axis(flip_rotate, 1, xnp)
xnp.shape

In [None]:
#Looking at sample image
plt.imshow(xnp[400])

In [None]:
#Flatten images
xf = xnp.reshape(xnp.shape[0], -1)
xf.shape

In [None]:
#Splitting data into train, test, and validation sets

#Getting test set
#Set random state to same number for testing purposes, remove when done
x_train, x_test, y_train, y_test = train_test_split(xf, y, test_size = 0.3, stratify = y, random_state = 50)

#Getting train and validation set
#x_train, x_val, y_train, y_val = train_test_split(xf, y, test_size = 0.3, stratify = y, random_state = 50)

x_train.shape, x_test.shape

In [None]:
#Reshape data
# Assuming X_train and X_test are your input data
# Reshape the input data to 3D (height, width, channels)
#x_train_reshaped = x_train.reshape(-1, 28, 28, 1)
#x_test_reshaped = x_test.reshape(-1, 28, 28, 1)
#x_val_reshaped = x_val.reshape(-1, 28, 28, 1)

#One-hot encode the target labels for categorical classification.
y_train = tf.keras.utils.to_categorical(y_train, 47)  # 47 classes (26 letters + 9 numbers + 1 for 'none')
y_test = tf.keras.utils.to_categorical(y_test, 47)

In [None]:
# Create the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Train the model on your training set
rf_model.fit(x_train, y_train)

# Predict the labels of your validation set
y_pred_rf = rf_model.predict(x_test)

# Calculate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)

# Calculate precision, recall, and f1 score
precision_rf, recall_rf, f1_rf, _ = precision_recall_fscore_support(y_test, y_pred_rf, average='weighted')

# Print the evaluation metrics
print("Accuracy:", accuracy_rf)
print("Precision:", precision_rf)
print("Recall:", recall_rf)
print("F1 Score:", f1_rf)

In [None]:
# Create a KNN classifier with k=5
knn_model = KNeighborsClassifier(n_neighbors=5)

# Train the model on your training set
knn_model.fit(x_train, y_train)

# Predict the labels of your validation set
y_pred_knn = knn_model.predict(x_test)

# Calculate accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)

# Calculate precision, recall, and f1 score
precision_knn, recall_knn, f1_knn, _ = precision_recall_fscore_support(y_test, y_pred_knn, average='weighted')

# Print the evaluation metrics
print("Accuracy:", accuracy_knn)
print("Precision:", precision_knn)
print("Recall:", recall_knn)
print("F1 Score:", f1_knn)

In [None]:
#Convert model to TF Lite
converter = tf.lite.TFLiteConverter.from_keras_model(rf_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()

In [None]:
#Exporting to .tflite file
import logging
import pathlib
tflite_models_dir = pathlib.Path("/tmp/CS370_Project_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

tflite_model_file = tflite_models_dir/"CS370_Project.tflite"
tflite_model_file.write_bytes(tflite_quant_model)

In [None]:
#Download file
from google.colab import files

with open('s_CS370_Project.tflite', 'w') as f:
  f.write('s_CS370_Project.tflite')

files.download('s_CS370_Project.tflite')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Datasets

https://www.kaggle.com/datasets/crawford/emnist

References
https://www.kaggle.com/code/khadijatagui/htr-character-recognition-using-machine-learning