In [1]:
# Importing libraries
import os
os.environ['TF_USE_LEGACY_KERAS']='1'
import random
from PIL import Image, ImageOps
import numpy as np
import random
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from tensorflow import keras
import tensorflow_hub as hub
import datetime

# Path to dataest
folder_path = "C:/Users/Student/Desktop/IzzyL/Projects/MusicImages/dataset"
# List of classes
subfolders = os.listdir(folder_path)

# Variables
image_arrays = [] # The numerical data of the images
image_labels = [] # The labels for each image
img_rows, img_cols = 224, 224 # The desired size of each image

# Finds images of each instrument
for instrument in subfolders:
    images_names = os.path.join(folder_path, instrument)
    images = os.listdir(images_names)
    # Processes the images
    for file in images:
        image_label = instrument # Uses the folder name to append the label
        image_labels.append(image_label)
        image_path = os.path.join(folder_path, instrument, file) # Finds the path to each image
        img = Image.open(image_path) # Opens the image from the path
        padding_color = (255, 255, 255) # Sets the padding color to white
        width, height = img.size
        if width > 224:
            img = img.resize((224, 224), Image.Resampling.LANCZOS)
            if height > 224:
                img = img.resize((224, 224), Image.Resampling.LANCZOS)
            else:
                img = ImageOps.pad(img, (img_rows, img_cols), color=padding_color)
        else:
            img = ImageOps.pad(img, (img_rows, img_cols), color=padding_color) # Pads the image so they're all the same size
        image_array = np.array(img) # Converts images to numerical arrays
        if image_array.shape == (224, 224, 3):
            image_arrays.append(image_array) # Appends array to list
        else:
            pass
            
image_labels.pop(101)

image_labels = np.array(image_labels) # Converts labels to numpy array

print(len(image_arrays))
print(len(image_labels))

data = pd.DataFrame(zip(image_arrays, image_labels), columns=['Arrays', 'Labels']) # Creates dataframe from arrays and labels

num_classes = 4 # Number of classes
input_shape = (img_rows, img_cols, 3) # Sets the input shape for the model

y = data['Labels'] # Sets the variable for the labels

X = np.stack(data['Arrays'].values) # Consolidates training images
X = X.astype('float32') / 255.0 # Converts images to float32 and makes each value between 0 and 1
y = pd.get_dummies(y) # One-hot encodes labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=42) # Splits data into train and test

# Data augmentation

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range = 60,
        shear_range = 0.4,
        zoom_range = 0.4,
        horizontal_flip = True,
        vertical_flip = True,
        brightness_range = (0.5, 1.5))

n_aug = 10

augmented_images = []
augmented_labels = []

for i in range(len(X_train)):
    for _ in range(n_aug):
        augmented = train_datagen.random_transform(X_train[i])
        augmented_images.append(augmented)
        augmented_labels.append(y_train.iloc[i])

augmented_images = np.array(augmented_images)
augmented_labels = np.array(augmented_labels)

X_combined = np.concatenate([X_train, augmented_images], axis=0)
y_combined = np.concatenate([y_train, augmented_labels], axis=0)

X_combined = tf.convert_to_tensor(X_combined, dtype=tf.float32)
y_combined = tf.convert_to_tensor(y_combined, dtype=tf.float32)
X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)

# Making the model

INPUT_SHAPE = [None, img_rows, img_cols, 3]
OUTPUT_SHAPE = 4
MODEL_URL = "https://kaggle.com/models/google/mobilenet-v2/TensorFlow2/130-224-classification/1"

def create_model():
    input_layer = tf.keras.Input(shape=(224, 224, 3))
    feature_extractor = hub.KerasLayer(
        "https://kaggle.com/models/google/mobilenet-v2/TensorFlow2/130-224-classification/1",
        trainable=False
    )(input_layer)
    output_layer = tf.keras.layers.Dense(4, activation="softmax")(feature_extractor)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

    model.compile(
        loss="categorical_crossentropy",
        optimizer="adam",
        metrics=["accuracy"]
    )

    return model

# Callback

%load_ext tensorboard

def create_tensorboard_callback():
    logdir = "C:/Users/Student/Desktop/IzzyL/Projects/MusicImages/logdir"
    datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    return tf.keras.callbacks.TensorBoard(logdir)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=3)

# Training the model

def train_model():
    model = create_model()
    tensorboard = create_tensorboard_callback()
    model.fit(X_combined, y_combined, epochs=100, validation_data=(X_test, y_test), validation_freq=1, callbacks=[tensorboard, early_stopping])

    return model

model = train_model()

# Evaluating the model

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
scores = model.evaluate(X_test, y_test, verbose=0)

print("Test accuracy: ", scores[1])

# Saving the model
model.save("inst-classification.keras")

%tensorboard --logdir C:/Users/Student/Desktop/IzzyL/Projects/MusicImages/logdir

  from pkg_resources import parse_version


101
101
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
3/3 - 1s - loss: 0.2582 - accuracy: 0.9155 - 814ms/epoch - 271ms/step
Test accuracy:  0.9154929518699646


Reusing TensorBoard on port 6006 (pid 17392), started 4:28:42 ago. (Use '!kill 17392' to kill it.)