## Example: How to use

In [1]:
import os

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
from PIL import Image
import json

In [2]:
from dataset import FrameDataSet, ActionDataset, parse_coco
from feature_extractor import feature_extractor

In [3]:
ann_dir = os.path.join(".", "mock data")
root_dir = os.path.join(".", "mock data", "images")
data = parse_coco(ann_dir)

In [4]:
data

[{'file_name': '.\\mock data\\images\\00000.jpg',
  'bb_info': [[35, 26, 817, 453],
   [114, 129, 725, 268],
   [224, 364, 513, 115],
   [189, 82, 155, 112],
   [565, 31, 119, 108],
   [0, 0, 852, 479]]},
 {'file_name': '.\\mock data\\images\\00001.jpg',
  'bb_info': [[36, 25, 816, 454],
   [115, 128, 723, 268],
   [225, 361, 513, 118],
   [190, 82, 153, 111],
   [566, 30, 119, 108],
   [0, 0, 852, 479]]},
 {'file_name': '.\\mock data\\images\\00002.jpg',
  'bb_info': [[35, 25, 817, 454],
   [114, 127, 724, 270],
   [224, 361, 513, 118],
   [189, 82, 153, 111],
   [565, 29, 119, 108],
   [0, 0, 852, 479]]},
 {'file_name': '.\\mock data\\images\\00003.jpg',
  'bb_info': [[35, 23, 817, 456],
   [114, 125, 724, 271],
   [224, 359, 514, 120],
   [189, 81, 153, 111],
   [565, 28, 119, 108],
   [0, 0, 852, 479]]},
 {'file_name': '.\\mock data\\images\\00004.jpg',
  'bb_info': [[35, 14, 817, 465],
   [113, 117, 725, 272],
   [224, 352, 512, 127],
   [187, 72, 153, 112],
   [565, 18, 120, 110]

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((256, 256))
        ])
dataset = FrameDataSet("activity recognition/mock data", data, transform=transform)
print(dataset.transform)

Compose(
    ToTensor()
    Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=warn)
)


In [6]:
# just as example, should not be used like that!!
# batch size does not need to be adjusted here, can be set later in the ActionDataSet class
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
for cropped_frame_dict in dataloader:
    cropped_frame_images = cropped_frame_dict.get("imgs")
    # batch size, number of cropped images, color channels, (size)
    print(cropped_frame_images.shape)
    for batch in cropped_frame_images:
        # this loop is not necessary since we can also pass all the cropped images to the
        # feature extractor, but then we get an output from the feature extractor: [num_images, 1000] instead
        # of [1, 1000]
        for obj in batch:
            # plt.imshow(obj.squeeze(0)) # only with batch size = 1
            # plt.show()
            #obj = obj.permute(0, 3, 1, 2)
            # print(obj.shape)
            # img = Image.open(os.path.join("mock data", "cropped", "rabbit_cropped.jpg")).convert("RGB")
            # transf = transforms.ToTensor()
            # obj = transf(img)
            # obj = obj.unsqueeze(0)
            # print(obj.shape)
            # print(obj / 255.0)
            print(obj.shape)
            obj = obj/255
            features = feature_extractor(img=obj.unsqueeze(0))
            print(features.shape)
            break
        break
    break



torch.Size([1, 5, 3, 256, 256])
torch.Size([3, 256, 256])
torch.Size([1, 1000])


In [7]:
dataset[0]

{'frame_name': '00000.jpg',
 'imgs': tensor([[[[0.2991, 0.3325, 0.4528,  ..., 0.3352, 0.3385, 0.3367],
           [0.4159, 0.4638, 0.4526,  ..., 0.3304, 0.3310, 0.3273],
           [0.3971, 0.4151, 0.4106,  ..., 0.3200, 0.3171, 0.3376],
           ...,
           [0.3764, 0.3727, 0.3711,  ..., 0.3195, 0.3242, 0.3200],
           [0.3833, 0.3781, 0.3798,  ..., 0.3246, 0.3260, 0.3031],
           [0.3843, 0.3825, 0.3792,  ..., 0.3333, 0.3180, 0.2988]],
 
          [[0.3988, 0.4632, 0.5976,  ..., 0.5274, 0.5307, 0.5211],
           [0.5539, 0.6272, 0.6240,  ..., 0.5225, 0.5232, 0.5116],
           [0.5695, 0.6136, 0.6078,  ..., 0.5121, 0.5093, 0.5219],
           ...,
           [0.5882, 0.5845, 0.5829,  ..., 0.5352, 0.5403, 0.5449],
           [0.5951, 0.5899, 0.5916,  ..., 0.5403, 0.5473, 0.5452],
           [0.5961, 0.5943, 0.5910,  ..., 0.5490, 0.5392, 0.5556]],
 
          [[0.4121, 0.4718, 0.6058,  ..., 0.4489, 0.4523, 0.4348],
           [0.5472, 0.6159, 0.6098,  ..., 0.4441, 0.444

In [8]:
with open(os.path.join(".", "Actions", "video1", "action_annotations.json")) as jf:
    action_annotations = json.load(jf)

In [9]:
action_dataset = ActionDataset(dataset, action_annotations)
action_dataloader = DataLoader(action_dataset, batch_size=2, shuffle=True)

In [10]:
for imgs, activity in action_dataloader:
    print(imgs)
    print(activity)
    break

tensor([[[[[9.7907e-01, 9.5974e-01, 9.6064e-01,  ..., 3.2586e-01,
            3.2345e-01, 3.1190e-01],
           [9.8009e-01, 9.5907e-01, 9.4757e-01,  ..., 3.2528e-01,
            3.2381e-01, 3.1796e-01],
           [8.3006e-01, 7.7229e-01, 7.4139e-01,  ..., 3.2157e-01,
            3.2157e-01, 3.2400e-01],
           ...,
           [4.0784e-01, 4.0630e-01, 4.0136e-01,  ..., 3.3912e-01,
            3.3770e-01, 3.3968e-01],
           [4.0784e-01, 4.0630e-01, 4.0136e-01,  ..., 3.4823e-01,
            3.4192e-01, 3.3755e-01],
           [4.0112e-01, 4.0090e-01, 4.0239e-01,  ..., 3.5947e-01,
            3.5843e-01, 3.5458e-01]],

          [[1.0000e+00, 9.8944e-01, 9.8813e-01,  ..., 4.8272e-01,
            4.8031e-01, 4.7558e-01],
           [9.9802e-01, 9.7700e-01, 9.6440e-01,  ..., 4.8550e-01,
            4.8403e-01, 4.8208e-01],
           [8.3979e-01, 7.7959e-01, 7.4247e-01,  ..., 4.8627e-01,
            4.8627e-01, 4.8870e-01],
           ...,
           [4.9412e-01, 4.9258e-01, 4.8