In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/synndd/SynDDAnnotated/test_video/sceneGraphs.json
/kaggle/input/synndd/SynDDAnnotated/test_video/pose.json
/kaggle/input/synndd/SynDDAnnotated/test_video/imageEmbeddings.json
/kaggle/input/synndd/SynDDAnnotated/test_video/sceneGraphs/dev.json
/kaggle/input/synndd/SynDDAnnotated/test_video/sceneGraphs/train.json
/kaggle/input/synndd/SynDDAnnotated/test_video/sceneGraphs/test.json
/kaggle/input/synndd/SynDDAnnotated/test_video/imageEmbeddings/dev.json
/kaggle/input/synndd/SynDDAnnotated/test_video/imageEmbeddings/train.json
/kaggle/input/synndd/SynDDAnnotated/test_video/imageEmbeddings/test.json
/kaggle/input/synndd/SynDDAnnotated/test_video/frames/24026_1.0_2681.jpg
/kaggle/input/synndd/SynDDAnnotated/test_video/frames/24026_17.0_6120.jpg
/kaggle/input/synndd/SynDDAnnotated/test_video/frames/24026_17.0_6275.jpg
/kaggle/input/synndd/SynDDAnnotated/test_video/frames/24026_8.0_4697.jpg
/kaggle/input/synndd/SynDDAnnotated/test_video/frames/24026_2.0_4198.jpg
/kaggle/input/synn

In [2]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image

import os
import json
import glob

# Train on classificaion task
# get images from the folder
# get the labels from the folder


# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load all images from sampleFrames folder
imagePaths = glob.glob(f'/kaggle/input/synndd/SynDDAnnotated/test_video/frames/*.jpg')
print(f"Number of images: {len(imagePaths)}")

def get_label(img_path):
    class_label = img_path.split('/')[-1].split('_')[1]
    class_label = int(float(class_label)) if class_label != "nan" else 0  
    return class_label

# Create a dataset
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path)
        if self.transform:
            image = self.transform(image)
        label = get_label(image_path)
        return image, label
    
# Define the transformation
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

print("Creating the dataset and dataloader")
# Create the dataset
dataset = ImageDataset(imagePaths, transform=transform)

print("Creating the dataloader")
# Make train, test, validation splits
train_size = int(0.8 * len(dataset))
test_size = int(0.1 * len(dataset))
val_size = len(dataset) - train_size - test_size
train_dataset, test_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, test_size, val_size])

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=True)

print("Creating the model")
# Load the pre-trained VGG16 model and move it to GPU
model = models.vgg16(pretrained=True).to(device)
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs")
    model = torch.nn.DataParallel(model)

# Freeze the feature parameters
for param in model.module.features.parameters():
    param.requires_grad = False

# Modify the classifier
model.module.classifier[6] = torch.nn.Linear(4096, 18).to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()

# Only train the classifier parameters
optimizer = torch.optim.SGD(model.module.classifier.parameters(), lr=0.001)

print("Training the model")
# Train the model
model.train()

for epoch in range(5):
    print(f"Epoch {epoch + 1}")
    running_loss = 0.0
    total = 0
    correct = 0
    for i, data in enumerate(train_dataloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)  # Move data to GPU
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if i % 20 == 0:
            print(f"Train Loss: {running_loss / 20}")
            print(f"Train Accuracy: {100 * correct / total}%")
            running_loss = 0.0
            total = 0
            correct = 0

    for i, data in enumerate(val_dataloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)  # Move data to GPU
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {100 * correct / total}%")

print("Finished Training")

# Save the model
os.makedirs("./models/", exist_ok=True)
torch.save(model.module.state_dict(), "./models/classifier.pth")

# Test the model
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for data in test_dataloader:
        images, labels = data[0].to(device), data[1].to(device)  # Move data to GPU
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the {total} test images: {100 * correct / total}%")

Number of images: 10770
Creating the dataset and dataloader
Creating the dataloader
Creating the model


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:03<00:00, 166MB/s]


Using 2 GPUs
Training the model
Epoch 1
Train Loss: 0.15787073373794555
Train Accuracy: 0.0%
Train Loss: 3.0033105492591856
Train Accuracy: 6.25%
Train Loss: 2.919947016239166
Train Accuracy: 7.5%
Train Loss: 2.8239803671836854
Train Accuracy: 16.25%
Train Loss: 2.5853911995887757
Train Accuracy: 23.75%
Train Loss: 2.500911009311676
Train Accuracy: 26.25%
Train Loss: 2.3271274626255036
Train Accuracy: 28.75%
Train Loss: 2.2535944163799284
Train Accuracy: 36.25%
Train Loss: 2.0325741946697233
Train Accuracy: 53.75%
Train Loss: 1.8451813817024232
Train Accuracy: 48.75%
Train Loss: 2.0930127918720247
Train Accuracy: 42.5%
Train Loss: 1.7757993757724762
Train Accuracy: 52.5%
Train Loss: 1.6289580196142197
Train Accuracy: 52.5%
Train Loss: 1.6055532932281493
Train Accuracy: 55.0%
Train Loss: 1.4615238696336745
Train Accuracy: 65.0%
Train Loss: 1.3952509373426438
Train Accuracy: 65.0%
Train Loss: 1.261175698041916
Train Accuracy: 73.75%
Train Loss: 1.2441672533750534
Train Accuracy: 71.25%
T

In [3]:
# Save the model
os.makedirs("./models/", exist_ok=True)
torch.save(model.state_dict(), "./models/model.pth")


In [4]:
for name, param in model.named_parameters():
    print(name, param.shape)




module.features.0.weight torch.Size([64, 3, 3, 3])
module.features.0.bias torch.Size([64])
module.features.2.weight torch.Size([64, 64, 3, 3])
module.features.2.bias torch.Size([64])
module.features.5.weight torch.Size([128, 64, 3, 3])
module.features.5.bias torch.Size([128])
module.features.7.weight torch.Size([128, 128, 3, 3])
module.features.7.bias torch.Size([128])
module.features.10.weight torch.Size([256, 128, 3, 3])
module.features.10.bias torch.Size([256])
module.features.12.weight torch.Size([256, 256, 3, 3])
module.features.12.bias torch.Size([256])
module.features.14.weight torch.Size([256, 256, 3, 3])
module.features.14.bias torch.Size([256])
module.features.17.weight torch.Size([512, 256, 3, 3])
module.features.17.bias torch.Size([512])
module.features.19.weight torch.Size([512, 512, 3, 3])
module.features.19.bias torch.Size([512])
module.features.21.weight torch.Size([512, 512, 3, 3])
module.features.21.bias torch.Size([512])
module.features.24.weight torch.Size([512, 512

In [5]:


# Load the VGGN model
model = models.vgg16(pretrained=True)
# Modify the classifier
model = torch.nn.DataParallel(model)

model.module.classifier[6] = torch.nn.Linear(4096, 18)

# Load model weights from .pth file
model.load_state_dict(torch.load('/kaggle/working/models/model.pth'))
model.to('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()

DataParallel(
  (module): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
      

In [6]:
image = Image.open("/kaggle/input/synndd/SynDDAnnotated/test_video/frames/24026_0.0_1440.jpg")

# Preprocess the image
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
input_tensor = preprocess(image)
input_batch = input_tensor.unsqueeze(0).to(device)

In [7]:
# Generate the embedding vector
with torch.no_grad():
    features = model.module.features(input_batch)
    features = torch.flatten(features, 1)
    embedding = model.module.classifier[:4](features)

# Convert the tensor to a list
embedding = embedding.squeeze().tolist()

In [8]:
len(embedding)

4096

<a href="/kaggle/working/models/model.pth"> Download File </a>

In [9]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/models/model.pth')