In [1]:
from __future__ import print_function

import glob
from itertools import chain
import os
import random
import zipfile
import shutil

import av
import imageio

import numpy as np
import torch

from torch.utils.data import DataLoader, Dataset
from torchvision.io import read_video
from torchvision.transforms import Compose, Resize, ToTensor
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

torch.device('cpu')


device(type='cpu')

In [2]:
seed = 42

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed)

In [4]:
def make_datasets():

    
    root_dir = './dataset'
    source_slip_videos = './dataset/slip'  
    source_wriggle_videos = './dataset/wriggle' 
    
    classes = ['slip', 'wriggle']
    subsets = ['train', 'validation', 'test']
    split_ratios = [0.7, 0.15, 0.15]  # Train, validation, test split ratios
    
    # Create root directory
    if not os.path.exists(root_dir):
        os.makedirs(root_dir)
    
    # Create subsets directories and copy videos
    for subset in subsets:
        subset_dir = os.path.join(root_dir, subset)
        if not os.path.exists(subset_dir):
            os.makedirs(subset_dir)
    
        for class_name in classes:
            class_dir = os.path.join(subset_dir, class_name)
            if not os.path.exists(class_dir):
                os.makedirs(class_dir)
            
            source_videos = source_slip_videos if class_name == 'slip' else source_wriggle_videos
            video_files = [f for f in os.listdir(source_videos) if f.endswith('.avi')]
            random.shuffle(video_files)  # Shuffle video files
            
            split_ratio = split_ratios[subsets.index(subset)]
            num_videos = int(len(video_files) * split_ratio)
            
            for video_file in video_files[:num_videos]:
                src_path = os.path.join(source_videos, video_file)
                dest_path = os.path.join(class_dir, video_file)
                shutil.copy(src_path, dest_path)
    
    print("Directory structure and video copying completed.")



In [5]:
#make_datasets()

In [6]:
class VideoDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        self.videos = self._load_videos()
        self.transform = transform

    def _load_videos(self):
        videos = []
        for class_name in self.classes:
            class_dir = os.path.join(self.data_dir, class_name)
            for video_file in os.listdir(class_dir):
                if video_file.endswith('.avi'):
                    video_path = os.path.join(class_dir, video_file)
                    videos.append((video_path, self.class_to_idx[class_name]))
        return videos

    def __len__(self):
        return len(self.videos)

    def __getitem__(self, idx):
        video_path, label = self.videos[idx]

        video = imageio.get_reader(video_path, 'ffmpeg')  # Open video with imageio
        
        #print("Video:", video_path)
        #print("Number of frames:", len(video))
        #print("Height:", video.get_meta_data()["size"][1])
        #print("Width:", video.get_meta_data()["size"][0])
        
        
        frames = []
        
        for frame in video:
            frame = frame[:, :, :3]  # Keep only the first three channels (RGB)
            frames.append(frame)

        video.close()
        
        #container = av.open(video_path)  # Open the video file with pyav
        #frames = []
        #for frame in container.decode(video=0):  # Loop through video frames
        #    img = frame.to_image()
        #    img = img.convert('RGB')  # Convert to RGB format
        #    frame_array = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())).view(img.size[1], img.size[0], 3)
        #    frames.append(frame_array.numpy())
        
        #container.close() 
        
        if self.transform:
            frames = [self.transform(frame) for frame in frames]
            video_tensor = torch.stack(frames)
        #print(video_tensor.shape)
        return video_tensor.permute(1, 0, 2, 3), label  # Permute to (batch, channels, frames, height, width)

data_transform = Compose([
    ToTensor(),
])

In [7]:
root_dir = './dataset'
train_dir = os.path.join(root_dir, 'train')
val_dir = os.path.join(root_dir, 'validation')
test_dir = os.path.join(root_dir, 'test')

In [8]:
train_dataset = VideoDataset(train_dir, transform=data_transform)
val_dataset = VideoDataset(val_dir, transform=data_transform)
test_dataset = VideoDataset(test_dir, transform=data_transform)

In [9]:
batch_size = 4

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
#val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=0,shuffle=True)
#test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=0, shuffle=True)

In [10]:
# Sample usage of the DataLoader
#for videos, labels in train_loader:
    # videos will have the shape: (batch, channels, frames, height, width)
#    print("Video batch shape:", videos.shape)
#    print("Label batch:", labels)
#    break  # Stop after the first batch for demonstration purposes

In [11]:
train_loader.dataset.classes


['slip', 'wriggle']