<a href="https://colab.research.google.com/github/WDSEatBNL/Intro-to-Machine-Learning-and-AI-Files/blob/master/Machine_Learning_Type_in_Categories.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import the libraries that we need to train and test a neural network

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from skimage import io
import os
import ipywidgets as widgets
from IPython.display import display, Image, clear_output

Load in our image files from github

In [None]:
!git clone https://github.com/WDSEatBNL/Intro-to-Machine-Learning-and-AI-Files

Read the files from the "images" folder (found in "content/Intro-to-Machine-Learning-and-AI-Files) and ask the user to name the images based on the categories from their card game

***Instructions:*** After running the cell below, you will see an image, an input box, and a submit button. Enter your label in the input box and click 'Submit Label' to proceed to the next image.

In [None]:
IMG_HEIGHT = 288
IMG_WIDTH = 288
BATCH_SIZE = 32

train_data_dir = r'/content/Intro-to-Machine-Learning-and-AI-Files/images'
image_files = [f for f in os.listdir(train_data_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

collected_image_labels = {}
current_image_index = 0

image_widget = widgets.Image(width=300)
label_input = widgets.Text(description='Label:')
submit_button = widgets.Button(description='Submit Label')
output_area = widgets.Output()

def display_next_image():
    global current_image_index
    if current_image_index < len(image_files):
        current_image_file = image_files[current_image_index]
        filepath = os.path.join(train_data_dir, current_image_file)

        with open(filepath, 'rb') as f:
            image_data = f.read()
        image_widget.value = image_data
        label_input.value = '' # Clear previous input
        label_input.placeholder = f"Enter label for {current_image_file}"
        with output_area:
            clear_output(wait=True)
            print(f"Labeling image {current_image_index + 1}/{len(image_files)}: {current_image_file}")
    else:
        with output_area:
            clear_output(wait=True)
            print("Labeling complete!")
            print("Collected labels:")
            for filename, label in collected_image_labels.items():
                print(f"  {filename}: {label}")
        submit_button.disabled = True
        label_input.disabled = True

def on_submit_button_clicked(b):
    global current_image_index
    current_image_file = image_files[current_image_index]
    label = label_input.value.strip()
    if label:
        collected_image_labels[current_image_file] = label
        current_image_index += 1
        display_next_image()
    else:
        with output_area:
            print("Please enter a label before submitting.")

submit_button.on_click(on_submit_button_clicked)

display(widgets.VBox([
    image_widget,
    widgets.HBox([label_input, submit_button]),
    output_area
]))
display_next_image()


Translate the category names into integers for tensorflow to read

In [None]:
all_string_labels = list(collected_image_labels.values())
class_names = sorted(list(set(all_string_labels)))

label_to_index = {label: i for i, label in enumerate(class_names)}

sorted_image_filenames = sorted(image_files)
ordered_integer_labels = [label_to_index[collected_image_labels[filename]] for filename in sorted_image_filenames]

print(f"Unique class names (sorted): {class_names}")
print(f"Number of classes: {len(class_names)}")

Split the data into a training set and a validation set (set aside 20% of the images for testing), then apply integer labels to each image

In [None]:
validation_split_percentage = 0.2

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    labels=ordered_integer_labels,
    validation_split=validation_split_percentage,
    subset='training',
    seed=42
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    labels=ordered_integer_labels,
    validation_split=validation_split_percentage,
    subset='validation',
    seed=42
)

Set up the model for training (normalize images, adjust settings for the model, and compile the model)

In [None]:
normalization_layer = tf.keras.layers.Rescaling(1./255)
norm_train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
norm_val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

num_classes = len(class_names)

model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation='relu',
    input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

Train the model and assign a category name to each image in the validation set (epochs tells you the number of times the model will train and check accuracy before training is complete)

In [None]:
epochs = 10
history = model.fit(norm_train_ds, validation_data=norm_val_ds, epochs=epochs)

prediction = model.predict(norm_val_ds)

predicted_classes = np.argmax(prediction, axis=1)
predicted_class_names = [class_names[i] for i in predicted_classes]

actual_classes = []
for images, labels in norm_val_ds:
    actual_classes.append(labels.numpy())
concatenated_labels = np.concatenate(actual_classes, axis=0)
actual_class_names = [class_names[i] for i in concatenated_labels]

Show each validation image with its predicted category

In [None]:
fig = plt.figure(figsize=(10, 10))
for images, labels in val_ds.take(1):
    for i in range(len(predicted_class_names)):
        ax = plt.subplot(4, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(predicted_class_names[i])
        plt.axis("off")
plt.tight_layout()
plt.show()

Print out the accuracy of the model in the form of percent correctly identified

In [None]:
print('Percentage correct: ', 100*np.sum(concatenated_labels == predicted_classes)/len(predicted_classes))