In [4]:
import os
import shutil
from collections import defaultdict
from sklearn.model_selection import train_test_split
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import random

# Define class-wise sample requirements
# fusar_subset_definitions = {
#     "easy": {"Cargo": 437, "Fishing": 117},
#     "moderate": {"Cargo": 314, "Fishing": 115, "Bulk": 81, "Tanker": 74},
#     "hard": {"Cargo": 109, "Fishing": 58, "Bulk": 54, "Tanker": 44, "Container": 45, "Dredging": 39, "Tug": 43, "GeneralCargo": 32, "Passenger": 29},
#     # "test": {"Cargo": 150, "Fishing": 58, "Bulk": 28, "Tanker": 18, "Container": 12, "Dredging": 10, "Tug": 8, "GeneralCargo": 7, "Passenger": 6},
# }

fusar_subset_definitions = {
        'easy': {'Cargo': 408, 'Fishing': 314}, 
        'moderate': {'Bulk': 80, 'Tanker': 74, 'Cargo': 170, 'Fishing': 138}, 
        'hard': {'Container': 52, 'Dredging': 44, 'Tug': 43, 'GeneralCargo': 32, 'Passenger': 29, 'Bulk': 48, 'Tanker': 46, 'Cargo': 136, 'Fishing': 110}
        }

balanced_fusar_subset_definitions = {
        'Easy': {'Cargo': 466, 'Fishing': 466}, 
        'Moderate': {'Tanker': 72, 'Bulk': 72, 'Cargo': 72, 'Fishing': 72}, 
        'Hard': {'Cargo': 29, 'Fishing': 29, 'Tanker': 29, 'Bulk': 29, 'Passenger': 29, 'Container': 29, 'Dredging': 29, 'Tug': 29, 'GeneralCargo': 29}}


# Initialize paths
fusar_data_dir = "c_fusar/"
fusar_output_dir = "c_fusar_ready_bal/"
os.makedirs(fusar_output_dir, exist_ok=True)


# Create output directories for subsets
for subset in ["easy", "moderate", "hard", "test", "validation"]:
    os.makedirs(os.path.join(fusar_output_dir, subset), exist_ok=True)

# Function to collect data by class
def collect_data_by_class(data_dir):
    data_by_class = defaultdict(list)
    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        if os.path.isdir(class_dir):
            for file_name in os.listdir(class_dir):
                data_by_class[class_name].append(os.path.join(class_dir, file_name))
    return data_by_class

# Split data into subsets
def create_curriculum_subsets(data_by_class, subset_definitions, output_dir):
    for class_name, file_paths in data_by_class.items():
        random.shuffle(file_paths)  # Shuffle files for randomness
        remaining = file_paths

        for subset, class_limits in subset_definitions.items():
            if class_name in class_limits:
                limit = class_limits[class_name]
                selected, remaining = remaining[:limit], remaining[limit:]
                print(subset, class_name, class_limits[class_name], len(remaining))
                save_files(selected, os.path.join(output_dir, subset, class_name))

        # Split remaining data into validation and test sets
        if remaining:
            validation_files, test_files = train_test_split(
                remaining, test_size=0.5, random_state=42
            )
            save_files(validation_files, os.path.join(output_dir, "validation", class_name))
            save_files(test_files, os.path.join(output_dir, "test", class_name))

# Function to save files into respective directories
def save_files(file_paths, dest_dir):
    os.makedirs(dest_dir, exist_ok=True)
    for file_path in file_paths:
        shutil.copy(file_path, os.path.join(dest_dir, os.path.basename(file_path)))


# FUSAR 
# Collect data
data_by_class = collect_data_by_class(fusar_data_dir)

# Create curriculum subsets
create_curriculum_subsets(data_by_class, balanced_fusar_subset_definitions, fusar_output_dir)

print(f"Curriculum datasets created and saved in {fusar_output_dir}")

Hard Passenger 29 8
Hard Dredging 29 27
Easy Fishing 466 319
Moderate Fishing 72 247
Hard Fishing 29 218
Easy Cargo 466 1227
Moderate Cargo 72 1155
Hard Cargo 29 1126
Hard GeneralCargo 29 12
Hard Container 29 36
Hard Tug 29 25
Moderate Bulk 72 201
Hard Bulk 29 172
Moderate Tanker 72 76
Hard Tanker 29 47
Curriculum datasets created and saved in c_fusar_ready_bal/


In [13]:
# fusar_subset_definitions

{'easy': {'Cargo': 437, 'Fishing': 274},
 'moderate': {'Cargo': 327,
  'Fishing': 175,
  'Bulk': 81,
  'Tanker': 59,
  'Container': 26,
  'Dredging': 22},
 'hard': {'Cargo': 109,
  'Fishing': 58,
  'Bulk': 54,
  'Tanker': 59,
  'Container': 26,
  'Dredging': 22,
  'Tug': 43,
  'GeneralCargo': 32,
  'Passenger': 29}}

In [1]:
# Fusar Loaders
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
import os

transform_train = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

new_class_to_idx = {'Cargo': 0, 'Fishing': 1, 'Bluk': 2, 'Dredging': 3, 'Container': 4, 'Tanker': 5, 'GeneralCargo': 6, 'Passenger': 7, 'Tug': 8}

# Load datasets
curriculum_data_dir = "c_fusar_ready/"
easy_dataset = ImageFolder(os.path.join(curriculum_data_dir, "easy"), transform=transform_train)
moderate_dataset = ImageFolder(os.path.join(curriculum_data_dir, "moderate"), transform=transform_train)
hard_dataset = ImageFolder(os.path.join(curriculum_data_dir, "hard"), transform=transform_train)
validation_dataset = ImageFolder(os.path.join(curriculum_data_dir, "validation"), transform=transform_train)
test_dataset = ImageFolder(os.path.join(curriculum_data_dir, "test"), transform=transform_train)

easy_dataset.class_to_idx = new_class_to_idx
moderate_dataset.class_to_idx = new_class_to_idx
hard_dataset.class_to_idx = new_class_to_idx
validation_dataset.class_to_idx = new_class_to_idx
test_dataset.class_to_idx = new_class_to_idx

# Create DataLoaders
batch_size = 32
easy_loader = DataLoader(easy_dataset, batch_size=batch_size, shuffle=True)
moderate_loader = DataLoader(moderate_dataset, batch_size=batch_size, shuffle=True)
hard_loader = DataLoader(hard_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print("DataLoaders created successfully.")

train_loaders = [easy_loader, moderate_loader, hard_loader]
test_loaders = [validation_loader, test_loader]

DataLoaders created successfully.


In [6]:
# easy_dataset.class_to_idx, moderate_dataset.class_to_idx, hard_dataset.class_to_idx

# new_class_to_idx = {'Cargo': 0, 'Fishing': 1, 'Dredging': 2, 'Container': 3, 'Tanker': 4, 'GeneralCargo': 5, 'Passenger': 6, 'Tug': 7}

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
import numpy as np
from torchvision import models


class VGGModel(nn.Module):
  def __init__(self, pretrained=True):
    super(VGGModel, self).__init__()
    self.features = models.vgg16(pretrained=pretrained).features  # Use VGG16 features
    
    # for param in self.features.parameters():
    #   param.requires_grad = False  # Freeze pre-trained layers
    self.avgpool = nn.AdaptiveAvgPool2d((7, 7))  # Global Average Pooling
    self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 3)  # 3 output classes
        )

  def forward(self, x):
    x = self.features(x)
    # print(x.shape)
    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    # print(x.shape)
    x = self.classifier(x)
    return x

train_loaders = [easy_loader, moderate_loader, hard_loader]
test_loaders = [validation_loader, test_loader]

# Training function
def train_model(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

# Training with curriculum
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGGModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 5
for level, train_loader in enumerate(train_loaders):
    print(f"Training on curriculum level {level + 1}")
    # train_loader = DataLoader(dataset, batch_size=64, shuffle=True)
    
    for epoch in range(num_epochs):
        loss = train_model(model, train_loader, optimizer, criterion, device)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss:.4f}")
        
        # if level == 2:
        #     eval_loss = train_model(model, test_loaders[0], optimizer, criterion, device)
        #     print(f"Eval: Epoch [{epoch+1}/{num_epochs}], Loss: {loss:.4f}")


print("Training completed with curriculum learning.")


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device('cpu')


In [5]:
device

device(type='cuda')

In [8]:

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 9)

    def forward(self, x):
        print(f"Input shape to conv1: {x.shape}")
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        print(f"Shape after conv1: {x.shape}")
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        print(f"Shape after conv2: {x.shape}")
        x = x.view(x.size(0), -1)  # Ensure the batch dimension is preserved
        print(f"Shape after flattening: {x.shape}")
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

class VGGModel(nn.Module):
  def __init__(self, pretrained=True):
    super(VGGModel, self).__init__()
    self.features = models.vgg16(pretrained=pretrained).features  # Use VGG16 features
    
    # for param in self.features.parameters():
    #   param.requires_grad = False  # Freeze pre-trained layers
    self.avgpool = nn.AdaptiveAvgPool2d((7, 7))  # Global Average Pooling
    self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 9)  # 3 output classes
        )

  def forward(self, x):
    x = self.features(x)
    # print(x.shape)
    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    # print(x.shape)
    x = self.classifier(x)
    return x

# Training function
def train_model(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        # print(type(inputs))
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)


def validate_model(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            # Compute loss
            loss = criterion(outputs, labels)
            running_loss += loss.item()
    return running_loss / len(val_loader)


# Training with curriculum
model = VGGModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 3
best_val_loss = float('inf')
early_stop_patience = 9
no_improve_epochs = 0

for level, train_loader in enumerate(train_loaders):
    print(f"Training on curriculum level {level + 1}")
    
    for epoch in range(num_epochs):
        train_loss = train_model(model, train_loader, optimizer, criterion, device)
        
        # if level == 2:
        val_loss = validate_model(model, test_loaders[0], criterion, device)
        
        print(f"Level {level+1}, Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        
        # Early stopping logic
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            no_improve_epochs = 0
        else:
            no_improve_epochs += 1
            if no_improve_epochs >= early_stop_patience:
                print(f"Early stopping at Level {level+1}, Epoch {epoch+1}")
                break

print("Training completed with curriculum learning.")


Training on curriculum level 1
Level 1, Epoch [1/3] - Train Loss: 2.8831, Val Loss: 3.2306
Level 1, Epoch [2/3] - Train Loss: 0.6823, Val Loss: 4.2193
Level 1, Epoch [3/3] - Train Loss: 0.7062, Val Loss: 3.2478
Training on curriculum level 2
Level 2, Epoch [1/3] - Train Loss: 2.0191, Val Loss: 1.4786
Level 2, Epoch [2/3] - Train Loss: 1.4514, Val Loss: 1.5151
Level 2, Epoch [3/3] - Train Loss: 1.4239, Val Loss: 1.4384
Training on curriculum level 3
Level 3, Epoch [1/3] - Train Loss: 2.4059, Val Loss: 2.1194
Level 3, Epoch [2/3] - Train Loss: 2.1456, Val Loss: 1.9714
Level 3, Epoch [3/3] - Train Loss: 2.0836, Val Loss: 1.5697
Training completed with curriculum learning.


In [1]:
! pip install pandas

'pip' is not recognized as an internal or external command,
operable program or batch file.
