In [None]:
import os
import cv2
import torch
import numpy as np
from torchvision import transforms, models
from sklearn.preprocessing import LabelEncoder

In [None]:
# Pre-processing steps for the images
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the pre-trained ResNet model
resnet = models.resnet18(pretrained=True)

# Remove the last layer of the ResNet model to obtain the feature extractor
resnet_feat = torch.nn.Sequential(*list(resnet.children())[:-1])

# Create an empty list to store the features and labels
resnet.eval()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 83.0MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
def has_less_than_30_images(folder_path):
    # Supported image file extensions
    image_extensions = {'.png', '.jpg', '.jpeg'}

    # Count the number of image files
    image_count = 0
    for file in os.listdir(folder_path):
        if any(file.endswith(ext) for ext in image_extensions):
            image_count += 1

            # If 30 or more images are found, return False
            if image_count >= 30:
                return False
    # print("image_count : ", image_count)
    # If fewer than 30 images, return True
    return True

def frame_2_data(data_dir):
    samples = []
    # Loop over the folders in the dataset
    for image_folder in os.listdir(data_dir):
        if image_folder == '.DS_Store':
            continue

        print("image_folder : ", image_folder)

        if "hand_flapping" in image_folder:
            label = 0
        elif "arm_flapping" in image_folder:
            label = 1

        image_folder_path = os.path.join(data_dir, image_folder)
        # print("image_folder_path : ", image_folder_path)

        frame_count = 0
        frames = []
        for image_file in os.listdir(image_folder_path):
            if has_less_than_30_images(image_folder_path):
                continue

            if image_file == ".DS_Store":
                continue

            if image_file.endswith(('.png', '.jpg', '.jpeg')):  # check for image files
                frame_count += 1
                image_path = os.path.join(image_folder_path, image_file)
                image = cv2.imread(image_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = transform(image)

                frames.append(image)

        # when finish current file, then next one
        # Stack the frames into a tensor of shape (num_frames, 3, 224, 224)
        frames_tensor = torch.stack(frames, dim=0)
        # Extract the features using the pre-trained ResNet50 model
        with torch.no_grad():
            features_tensor = resnet_feat(frames_tensor)
            # print(features_tensor.shape)
        # Flatten the features tensor
        features_tensor = torch.flatten(features_tensor, start_dim=1)
        # Convert the features tensor to a numpy array
        features = features_tensor.cpu().numpy()
        # Append the features and label to the samples list
        samples.append((features, label))

    np.random.shuffle(samples)
    return samples

In [None]:
train_data_dir = '/content/drive/MyDrive/train'
test_data_dir = '/content/drive/MyDrive/test'

train_samples = frame_2_data(train_data_dir)
test_samples = frame_2_data(test_data_dir)

train_features, train_labels = zip(*train_samples)
test_features, test_labels = zip(*test_samples)


# Convert the labels to numerical labels using a LabelEncoder
le = LabelEncoder()
train_numerical_labels = le.fit_transform(train_labels)
test_numerical_labels = le.fit_transform(test_labels)
# Convert the features and labels arrays to numpy arrays
# Convert the features and labels arrays to numpy arrays
train_features = np.array(train_features)
train_labels = train_numerical_labels
test_features = np.array(test_features)
test_labels = test_numerical_labels

# Print the shapes of the features and labels arrays
print("Train Features shape:", train_features.shape)
print("Train Labels shape:", train_labels.shape)
print("Test Features shape:", test_features.shape)
print("Test Labels shape:", test_labels.shape)

# Save the features and labels to numpy arrays
np.save('train_features.npy', train_features)
np.save('train_labels.npy', train_labels)
np.save('test_features.npy', test_features)
np.save('test_labels.npy', test_labels)

image_folder :  12_hand_flapping_1_1
image_folder :  25_arm_flapping_1_1
image_folder :  27_arm_flapping_1_1
image_folder :  28_hand_flapping_1_1
image_folder :  19_arm_flapping_1_1
image_folder :  1_arm_flapping_1_1
image_folder :  14_hand_flapping_1_1
Train Features shape: (5, 30, 512)
Train Labels shape: (5,)
Test Features shape: (2, 30, 512)
Test Labels shape: (2,)
