In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers
import matplotlib.pyplot as plt
import numpy as np
import os

# Define paths for the dataset
train_dir = '/content/train'
test_dir = '/content/test'

# Constants
IMG_HEIGHT, IMG_WIDTH = 180, 180
BATCH_SIZE = 32
EPOCHS = 20
NUM_CLASSES = 9

# Step 1: Dataset Creation and Preprocessing
# Create training and validation datasets
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    validation_split=0.2
)

train_dataset = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_dataset = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

# Step 2: Visualize one instance of each class
def visualize_classes(dataset):
    class_names = list(dataset.class_indices.keys())
    fig, axes = plt.subplots(3, 3, figsize=(10, 10))
    axes = axes.flatten()
    for i, class_name in enumerate(class_names):
        for x, y in dataset:
            if y[0][i] == 1:
                axes[i].imshow(x[0])
                axes[i].set_title(class_name)
                axes[i].axis('off')
                break
    plt.show()

visualize_classes(train_dataset)

# Step 3: Model Building
def create_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

model = create_model()
model.summary()

# Step 4: Model Training
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=val_dataset
)

# Step 5: Plot training results
def plot_training(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(EPOCHS)

    plt.figure(figsize=(8, 8))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

plot_training(history)

# Analyze results for signs of overfitting or underfitting
# If there is overfitting, we will move to data augmentation next

# Step 6: Data Augmentation to Reduce Overfitting
train_datagen_augmented = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

train_dataset_augmented = train_datagen_augmented.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_dataset_augmented = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

# Re-train the model with augmented data
model_augmented = create_model()
history_augmented = model_augmented.fit(
    train_dataset_augmented,
    epochs=EPOCHS,
    validation_data=val_dataset_augmented
)

# Plot training results for augmented data
plot_training(history_augmented)

# Step 7: Class Distribution and Handling Class Imbalance
import pandas as pd
from collections import Counter

class_counts = Counter(train_dataset_augmented.classes)
class_distribution = pd.DataFrame.from_dict(class_counts, orient='index', columns=['Count'])
print("Class Distribution:")
print(class_distribution)

# Find classes with fewer samples and apply class augmentation using Augmentor if necessary
# Example for handling class imbalance using the `Augmentor` library
!pip install Augmentor
import Augmentor

# Path setup for class augmentation (example for one class)
augmentor_pipeline = Augmentor.Pipeline("/path/to/class_with_least_samples")
augmentor_pipeline.rotate(probability=0.7, max_left_rotation=10, max_right_rotation=10)
augmentor_pipeline.zoom_random(probability=0.5, percentage_area=0.8)
augmentor_pipeline.sample(500)  # Generate 500 samples to balance classes

# After balancing, retrain the model
model_balanced = create_model()
history_balanced = model_balanced.fit(
    train_dataset_augmented,
    epochs=30,
    validation_data=val_dataset_augmented
)

# Plot training results for balanced dataset
plot_training(history_balanced)
