# Dataset link
https://www.kaggle.com/datasets/vencerlanz09/sea-animals-image-dataste

In [None]:
import os
import numpy as np
from PIL import Image
import torch
from torchvision import models, transforms


## Pre-processing

In [None]:
# Function to process each image and convert to a feature vector
def process_image(image_path):
    # Open the image
    image = Image.open(image_path).convert('RGB')  # Convert to RGB if grayscale
    # Apply the preprocessing transform
    image = preprocess_transform(image)
    # Convert to a numpy array
    return image.numpy()

# Function to get class label based on the directory name
def get_class_label(folder_name):
    if folder_name in class_1_labels:
        return 1
    else:
        return 0

# Main function to preprocess the dataset
def preprocess_dataset(base_directory):
    class_0_data = []
    class_1_data = []

    # Traverse the directory
    for folder in os.listdir(base_directory):
        folder_path = os.path.join(base_directory, folder)
        if os.path.isdir(folder_path):
            class_label = get_class_label(folder)  # Determine if it's class_0 or class_1
            
            # Loop over each image in the folder
            for filename in os.listdir(folder_path):
                print(filename)
                if filename.endswith(".jpg"):  # Only process .jpg images
                    image_path = os.path.join(folder_path, filename)
                    image_vector = process_image(image_path)  # Process image into a vector
                    
                    # Add the label (0 or 1) to the vector
                    image_vector = np.append(image_vector, class_label)
                    
                    # Store the image in the corresponding class array
                    if class_label == 0:
                        class_0_data.append(image_vector)
                    else:
                        class_1_data.append(image_vector)
    
    # Convert lists to numpy arrays
    class_0_data = np.array(class_0_data)
    class_1_data = np.array(class_1_data)
    
    return class_0_data, class_1_data

In [None]:
# Define the classes that correspond to class_1 and class_0
class_1_labels = ["Sea Urchins", "Puffers", "Sea Rays", "Eel", "Otter"]

# Define a transformation to preprocess the image (resize, normalize)
preprocess_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet expects 224x224 images
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize as per ResNet
])

# Base directory of your dataset (modify accordingly)
base_directory = "./datasettoprocess"

# Preprocess the dataset
class_0_array, class_1_array = preprocess_dataset(base_directory)

# Save the arrays as .npy files for future loading
np.save('class_0.npy', class_0_array)
np.save('class_1.npy', class_1_array)

print("Preprocessing complete. Saved class_0.npy and class_1.npy.")

## Training 

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, TensorDataset, random_split
from PIL import Image
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Load data from .npy files
class0 = np.load('class_0.npy')
class1 = np.load('class_1.npy')

# Ensure the data is in the shape [num_samples, channels, height, width]
# Assuming the images are originally in shape [height, width, channels]
# Convert to [channels, height, width] and normalize to [0, 1]
def preprocess_images(images):
    processed_images = []
    for img in images:
        img = Image.fromarray(img)  # Convert to PIL image
        img = transforms.Resize((224, 224))(img)  # Resize to (224, 224)
        img = transforms.ToTensor()(img)  # Convert to tensor
        img = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(img)  # Normalize
        processed_images.append(img)
    return torch.stack(processed_images)

# Combine the arrays and create labels
X = np.concatenate([class0, class1], axis=0)
y = np.concatenate([np.zeros(len(class0)), np.ones(len(class1))])

# Preprocess images
X_tensor = preprocess_images(X)
y_tensor = torch.tensor(y, dtype=torch.long)

# Split data into train, validation, and test sets (80%, 10%, 10%)
dataset = TensorDataset(X_tensor, y_tensor)
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the model
class CustomResNet50(nn.Module):
    def __init__(self):
        super(CustomResNet50, self).__init__()
        self.resnet50 = models.resnet50(pretrained=True)
        # Replace the final fully connected layer with a new one for binary classification
        num_features = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Linear(num_features, 2)

    def forward(self, x):
        return self.resnet50(x)

model = CustomResNet50()

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
num_epochs = 10
train_losses = []
val_losses = []
best_val_loss = float('inf')
best_model_state = None

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    train_losses.append(epoch_loss)
    
    # Validation
    model.eval()
    val_running_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item() * inputs.size(0)
        
        val_loss = val_running_loss / len(val_loader.dataset)
        val_losses.append(val_loss)
        
        # Save the model with the best validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()

# Load best model
model.load_state_dict(best_model_state)

# Testing
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

accuracy = accuracy_score(y_true, y_pred)
print(f'Test Accuracy: {accuracy:.4f}')

# Plot training and validation loss
plt.figure()
plt.plot(range(num_epochs), train_losses, label='Training Loss')
plt.plot(range(num_epochs), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
