In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Define the animal supergroups to use for training and testing
animal_supergroups = [
    'aquatic_mammals', 'fish', 'insects', 'large_carnivores', 
    'large_omnivores_and_herbivores', 'medium_mammals', 'non-insect_invertebrates', 
    'reptiles', 'small_mammals'
]

# Helper function to load CIFAR-100 data
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        data = pickle.load(fo, encoding='bytes')
    return data

# Load train and meta data
train_data = unpickle('Data/train')
meta_data = unpickle('Data/meta')

# Decode superclass names and find indices of animal supergroups
coarse_label_names = [label.decode('utf-8') for label in meta_data[b'coarse_label_names']]
animal_indices = [i for i, label in enumerate(coarse_label_names) if label in animal_supergroups]

# Function to preprocess data
def preprocess_data(data, animal_indices):
    images = []
    labels = []
    for i in range(len(data[b'coarse_labels'])):
        if data[b'coarse_labels'][i] in animal_indices:
            images.append(data[b'data'][i])
            labels.append(data[b'coarse_labels'][i])
    # Convert to numpy arrays
    images = np.array(images)
    labels = np.array(labels)
    # Reshape and normalize image data
    images = images.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1).astype('float32') / 255.0
    # Map labels to categorical indices
    labels = np.array([animal_indices.index(label) for label in labels])
    labels = to_categorical(labels, num_classes=len(animal_supergroups))
    return images, labels

# Preprocess train data
filtered_images, filtered_labels = preprocess_data(train_data, animal_indices)

# Split train data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(filtered_images, filtered_labels, test_size=0.2, random_state=42)

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),1
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(animal_supergroups), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val))

# Plot training history
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.show()

# Load and preprocess test data
test_data = unpickle('Data/test')
test_images, test_labels = preprocess_data(test_data, animal_indices)

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=2)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
