In [4]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.regularizers import l2
from keras.optimizers import Adam
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import os
import numpy as np
import matplotlib.pyplot as plt

In [5]:
images_dir = "training/"
labels = pd.read_csv("training_labels.csv")

# add the directory to the filename
labels['ID'] = labels['ID'].apply(lambda x: os.path.join(images_dir, x))

# Initialize the ImageDataGenerator
# You can change the size of the validation split (0.25 is 25% of data used as validation set)
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.25)

train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.25,
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create the training and validation generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=labels,
    directory=None,  
    x_col='ID',
    y_col='target',
    target_size=(224, 224), # You can change the size of the image
    batch_size=32, # You can change the batch_size
    class_mode='categorical',  
    subset='training'
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=labels,
    directory=None,  
    x_col='ID',
    y_col='target',
    target_size=(224, 224), # Should match training size
    batch_size=32, # Should match training
    class_mode='categorical',  
    subset='validation'
)

Found 2591 validated image filenames belonging to 5 classes.
Found 863 validated image filenames belonging to 5 classes.


### ONLY RUN THIS TO SEE THE PICTURES. SKIP IT OTHERWISE

In [None]:
## Plot a few of the images

# Fetch a batch of images and their labels
images, labels = next(train_generator)

# Number of images to show
num_images = 8

plt.figure(figsize=(20, 10))
for i in range(num_images):
    ax = plt.subplot(2, 4, i + 1)
    plt.imshow(images[i])
    # The label for current image
    label_index = labels[i].argmax()  # Convert one-hot encoding to index
    label = list(train_generator.class_indices.keys())[label_index]  # Get label name from index
    plt.title(label)
    plt.axis('off')
plt.show()


### Training Code

In [7]:
base_mod = ResNet50(weights='imagenet')

# Freeze the base model layers
for layer in base_mod.layers[-5:]:
    layer.trainable = True

# Create the model
model = Sequential([
    base_mod,
    Flatten(),
    Dense(128, activation='relu', kernel_regularizer = l2(0.01)),
    Dropout(0.5),
    Dense(len(labels['target'].unique()), activation='softmax')  # Number of classes
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(
    train_generator,
    validation_data= validation_generator, 
    epochs = 10
)

Epoch 1/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m606s[0m 7s/step - accuracy: 0.3362 - loss: 2.7462 - val_accuracy: 0.2317 - val_loss: 1.6296
Epoch 2/10
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m540s[0m 7s/step - accuracy: 0.4429 - loss: 1.4742 - val_accuracy: 0.1587 - val_loss: 1.7674
Epoch 3/10
[1m53/81[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m2:58[0m 6s/step - accuracy: 0.4222 - loss: 1.3760


KeyboardInterrupt



### Test Data!

In [None]:
test_images_dir = "test/"  # Directory for test images
test_labels = pd.read_csv("test_labels.csv")  # CSV with test image IDs

# Update paths for test images
test_labels['ID'] = test_labels['ID'].apply(lambda x: os.path.join(test_images_dir, x))

# Initialize the ImageDataGenerator for test data (no validation split or augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create the test generator
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_labels,
    directory=None,
    x_col='ID',
    y_col=None,  # No labels for test data
    target_size=(224, 224),  # Same size as training
    batch_size=32,
    class_mode=None,  # Only images, no labels
    shuffle=False  # Keep order for mapping with IDs
)

# Mapping class indices back to class labels
class_labels = {v: k for k, v in train_generator.class_indices.items()}

# Generate predictions
predictions = model.predict(test_generator)
predicted_indices = np.argmax(predictions, axis=1)  # Get the class indices with the highest probability

# Convert indices to class labels
predicted_labels = [class_labels[idx] for idx in predicted_indices]

# Prepare predictions for CSV
output = pd.DataFrame({
    'ID': test_labels['ID'].apply(lambda x: os.path.basename(x)),  # Get just the filename
    'Prediction': predicted_labels
})

# Save to CSV
output.to_csv("predictions.csv", index=False)