In [33]:
!pip install torch torchvision matplotlib





In [73]:
# === Configuration and Imports ===
import torch
from torch.utils.data import DataLoader, random_split, ConcatDataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import datasets, transforms
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split, DataLoader

# Configuration
BATCH_SIZE = 64
LEARNING_RATE = 0.001
EPOCHS = 5
COCO_CLASSES = 80  # Adjust based on the specific COCO task

# Check for GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [35]:
# === Data Augmentation and Normalization ===
transform_coco = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize all images to 128x128
    transforms.RandomHorizontalFlip(),  # Data Augmentation: Random Horizontal Flip
    transforms.ToTensor(),  # Convert images to tensor format
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize image data
])

In [78]:
from torch.utils.data import random_split, DataLoader, ConcatDataset
from torchvision import datasets, transforms
import torch

# Define data transformations with 128x128 resize
transform_train = transforms.Compose([
    transforms.Resize(256),  # Resize the image while maintaining the aspect ratio
    transforms.RandomResizedCrop(128),  # Random crop to 128x128
    transforms.RandomHorizontalFlip(),  # Random horizontal flip
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize
])

transform_test = transforms.Compose([
    transforms.Resize(256),  # Resize the image while maintaining the aspect ratio
    transforms.CenterCrop(128),  # Crop the image to 128x128 from the center
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize
])

# Custom collate_fn to handle both CIFAR-10 and COCO datasets
def custom_collate_fn(batch):
    images = [item[0] for item in batch]
    
    # If the item is from CIFAR-10, labels are integers (class labels)
    if isinstance(batch[0][1], int):  # CIFAR-10
        labels = torch.tensor([item[1] for item in batch], dtype=torch.long)
    else:  # COCO
        # Extract the category IDs from the annotations (assumed to be in item[1])
        # COCO annotations are a list of dictionaries, each containing 'category_id'
        labels = torch.tensor([item[1][0]['category_id'] for item in batch], dtype=torch.long)  # Extracting category ID
    
    images = torch.stack(images, dim=0)  # Stack images into a tensor
    return images, labels

# === Load CIFAR-10 Dataset ===
train_data_cifar = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_data_cifar = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# === Load COCO Dataset ===
train_data_coco = datasets.CocoDetection(
    root='/kaggle/input/coco-2017-dataset/coco2017/train2017',  # Path to training images
    annFile='/kaggle/input/coco-2017-dataset/coco2017/annotations/instances_train2017.json',  # Path to training annotations
    transform=transform_train
)

test_data_coco = datasets.CocoDetection(
    root='/kaggle/input/coco-2017-dataset/coco2017/val2017',  # Path to validation images
    annFile='/kaggle/input/coco-2017-dataset/coco2017/annotations/instances_val2017.json',  # Path to validation annotations
    transform=transform_test
)

# === Combine Datasets ===
train_data_combined = ConcatDataset([train_data_cifar, train_data_coco])  # Combine CIFAR-10 and COCO training data
test_data_combined = ConcatDataset([test_data_cifar, test_data_coco])    # Combine CIFAR-10 and COCO testing data

# === Split the Combined Dataset ===
train_size = int(0.7 * len(train_data_combined))  # 70% for training
test_size = len(train_data_combined) - train_size  # 30% for testing

train_dataset, test_dataset = random_split(train_data_combined, [train_size, test_size])

# === Create DataLoaders ===
BATCH_SIZE = 64  # Example batch size

# DataLoader with custom collate_fn to handle mixed formats (CIFAR-10 and COCO)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate_fn)

# Confirm the transformation is applied correctly
for images, labels in train_loader:
    print(images.shape)  # Should print [BATCH_SIZE, 3, 128, 128]
    print(type(labels))  # Should print <class 'torch.Tensor'>
    break


Files already downloaded and verified
Files already downloaded and verified
loading annotations into memory...
Done (t=18.67s)
creating index...
index created!
loading annotations into memory...
Done (t=0.50s)
creating index...
index created!


TypeError: 'int' object is not subscriptable

In [50]:
# === Model Definition ===
# Load ResNet-18 pre-trained on ImageNet
model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

# Modify the final layer to fit the combined task
model.fc = nn.Linear(model.fc.in_features, 10)  # Change 10 to your specific number of output classes
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [39]:
# === Define Model, Loss Function, and Optimizer ===
# Assuming 'model' is already defined
criterion = torch.nn.CrossEntropyLoss()  # Example loss function
LEARNING_RATE = 0.001  # Set a learning rate
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)  # Define optimizer


In [40]:
#### === Training Loop ===
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for images, annotations in train_loader:
        # Move images to the device
        images = images.to(device)
        
        # Generate predictions
        outputs = model(images)
        
        # Generate dummy labels (if not using annotations directly)
        labels = torch.zeros(len(images), dtype=torch.long).to(device)  # Adjust as needed
        
        # Compute loss
        loss = criterion(outputs, labels)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {running_loss:.4f}")


Epoch 1/5, Loss: 9.2467
Epoch 2/5, Loss: 0.0067
Epoch 3/5, Loss: 0.0014
Epoch 4/5, Loss: 0.0003
Epoch 5/5, Loss: 0.0000


In [68]:
# === Loss Function and Optimizer ===
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# === Training Function ===
def train_model(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(loader)

# === Evaluation Function ===
def evaluate_model(model, loader, device):
    model.eval()  # Set the model to evaluation mode
    
    correct = 0
    total = 0
    
    # Ensure the data loader is not empty
    if len(loader.dataset) == 0:
        print("The test loader is empty!")
        return 0
    
    with torch.no_grad():
        for images, annotations in loader:
            images = images.to(device)  # Move images to the device
            labels = annotations  # Extract labels (in COCO annotations, they could be complex)
            
            # If annotations are a list of dictionaries, extract 'category_id'
            if isinstance(labels, list):
                labels = [ann['category_id'] for ann in labels if isinstance(ann, dict)]
            
            # If labels are in a simple format (int or tensor), handle it correctly
            if isinstance(labels, int):
                labels = [labels]  # Convert to a list
            
            if len(labels) == 0:  # Check if labels are empty
                print(f"Warning: Empty labels found in this batch")
                continue  # Skip this batch

            labels = torch.tensor(labels).to(device)  # Convert to tensor and move to device
            
            # Forward pass
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            
            # Check the shapes to debug
            print(f"Predicted shape: {predicted.shape}, Labels shape: {labels.shape}")
            
            # Count correct predictions
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    # Avoid division by zero
    if total == 0:
        print("No data found in the test loader!")
        return 0
    
    accuracy = (100 * correct / total)
    return accuracy


In [69]:
# Load the model
model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 10)  # Adjust the number of output classes
model.load_state_dict(torch.load('stegonet.pth'))
model.to(device)

# Evaluate the model to confirm it works as expected
accuracy = evaluate_model(model, test_loader, device)
print(f"Accuracy of the loaded model: {accuracy:.2f}%")


  model.load_state_dict(torch.load('stegonet.pth'))


No data found in the test loader!
Accuracy of the loaded model: 0.00%


In [42]:
# Save the trained model
torch.save(model.state_dict(), 'stegonet.pth')