In [None]:
filenames = ["<filenames>"]
input_dir = "<input_dir>"
output_dir = "<output_dir>"

In [None]:
import tarfile
import os
import shutil
import gzip

def extract_map_gz(input_path: str, output_path: str) -> None:
    # Ensure the input file exists
    if not os.path.isfile(input_path):
        raise FileNotFoundError(f"Input file not found: {input_path}")

    # Open the gzipped file and write the decompressed content
    with gzip.open(input_path, 'rb') as f_in:
        with open(output_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    print(f"Extracted '{input_path}' to '{output_path}'")

In [None]:
for f in filenames:
    # Construct the full path to the .map.gz file
    mat_file = os.path.join(input_dir, f, 'features.mat.gz')
    
    # Check if the file exists before attempting to extract it
    if os.path.isfile(mat_file):
        # Extract the .map.gz file to its .map counterpart in the current directory
        extract_map_gz(mat_file, os.path.join(output_dir, f + '_features.mat'))
    else:
        print(f"File not found: {mat_file}")



Extracted 'D:\bobsl_v1_4_features_i3d\bobsl\v1.4\video_features\i3d_c2281_16f_m8_-15_4_d0.8_-3_22\5086465773912997411\features.mat.gz' to 'C:\Users\taylo\OneDrive\Documents\Dissertation_Work\BOBSL\i3d_features_data\5086465773912997411_features.mat'
Extracted 'D:\bobsl_v1_4_features_i3d\bobsl\v1.4\video_features\i3d_c2281_16f_m8_-15_4_d0.8_-3_22\5087953980081062580\features.mat.gz' to 'C:\Users\taylo\OneDrive\Documents\Dissertation_Work\BOBSL\i3d_features_data\5087953980081062580_features.mat'
Extracted 'D:\bobsl_v1_4_features_i3d\bobsl\v1.4\video_features\i3d_c2281_16f_m8_-15_4_d0.8_-3_22\5090546422340930233\features.mat.gz' to 'C:\Users\taylo\OneDrive\Documents\Dissertation_Work\BOBSL\i3d_features_data\5090546422340930233_features.mat'
Extracted 'D:\bobsl_v1_4_features_i3d\bobsl\v1.4\video_features\i3d_c2281_16f_m8_-15_4_d0.8_-3_22\5090916219025116054\features.mat.gz' to 'C:\Users\taylo\OneDrive\Documents\Dissertation_Work\BOBSL\i3d_features_data\5090916219025116054_features.mat'
Extr

In [12]:
from scipy.io import loadmat
mat = loadmat(os.getcwd() + '\\features.mat')      

In [13]:
#!/usr/bin/env python3
"""
extract_optical_flow_mat.py: Load optical flow data from a .mat file and convert specific
variables into NumPy arrays and PyTorch tensors for model processing.
"""
import numpy as np
from scipy.io import loadmat

def extract_i3d_features(mat_path, name):

    # Load the .mat file
    mat = loadmat(mat_path)

    # Extract variables by name
    preds = mat.get('preds')
    clip_gt = mat.get('clip_gt')
    clip_ix = mat.get('clip_ix')
    video_names = mat.get('video_names')

    # Validate that all required variables are present
    missing = [name for name, var in [('preds', preds), ('clip_gt', clip_gt), ('clip_ix', clip_ix), ('video_names', video_names)] if var is None]
    if missing:
        raise KeyError(f"Missing variables in MAT file: {missing}")

    # Convert to NumPy arrays and remove singleton dimensions
    preds_np = np.squeeze(np.array(preds))          # shape: (11268, 1024)
    clip_gt_np = np.squeeze(np.array(clip_gt))        # shape: (11268,)
    clip_ix_np = np.squeeze(np.array(clip_ix))        # shape: (11268,)
    video_names_arr = np.squeeze(np.array(video_names, dtype=object))  # e.g. '5086465773912997411.mp4'

    # Convert video_names to a Python list of strings
    if video_names_arr.ndim == 0:
        video_names_list = [str(video_names_arr)]
    else:
        video_names_list = video_names_arr.tolist()

    # Convert to PyTorch tensors
    try:
        import torch
    except ImportError:
        raise ImportError("PyTorch is required for tensor conversion. Please install torch.")

    preds_tensor = torch.from_numpy(preds_np).float()      # Tensor of shape (11268, 1024)
    # Assuming clip_gt and clip_ix are integer labels/indices
    clip_gt_tensor = torch.from_numpy(clip_gt_np).long()  # Tensor of shape (11268,)
    clip_ix_tensor = torch.from_numpy(clip_ix_np).long()  # Tensor of shape (11268,)

    # Print summaries
    print(f"preds_tensor: shape={preds_tensor.shape}, dtype={preds_tensor.dtype}")
    print(f"clip_gt_tensor: shape={clip_gt_tensor.shape}, dtype={clip_gt_tensor.dtype}")
    print(f"clip_ix_tensor: shape={clip_ix_tensor.shape}, dtype={clip_ix_tensor.dtype}")
    print(f"video_names_list: {video_names_list}")

    torch.save(preds_tensor, os.path.join(output_dir, name + '_preds_tensor.pt'))
    torch.save(clip_gt_tensor, os.path.join(output_dir, name + '_clip_gt_tensor.pt'))
    torch.save(clip_ix_tensor, os.path.join(output_dir, name + '_clip_ix_tensor.pt'))


In [15]:
for f in filenames:
    # Construct the full path to the .map.gz file
    mat_file = os.path.join(output_dir, f + '_features.mat')
    
    # Check if the file exists before attempting to extract it
    if os.path.isfile(mat_file):
        # Extract the .map.gz file to its .map counterpart in the current directory
        extract_i3d_features(mat_file, f)
    else:
        print(f"File not found: {mat_file}")

preds_tensor: shape=torch.Size([11268, 1024]), dtype=torch.float32
clip_gt_tensor: shape=torch.Size([11268]), dtype=torch.int64
clip_ix_tensor: shape=torch.Size([11268]), dtype=torch.int64
video_names_list: ['5086465773912997411.mp4']
preds_tensor: shape=torch.Size([18714, 1024]), dtype=torch.float32
clip_gt_tensor: shape=torch.Size([18714]), dtype=torch.int64
clip_ix_tensor: shape=torch.Size([18714]), dtype=torch.int64
video_names_list: ['5087953980081062580.mp4']
preds_tensor: shape=torch.Size([18806, 1024]), dtype=torch.float32
clip_gt_tensor: shape=torch.Size([18806]), dtype=torch.int64
clip_ix_tensor: shape=torch.Size([18806]), dtype=torch.int64
video_names_list: ['5090546422340930233.mp4']
preds_tensor: shape=torch.Size([21831, 1024]), dtype=torch.float32
clip_gt_tensor: shape=torch.Size([21831]), dtype=torch.int64
clip_ix_tensor: shape=torch.Size([21831]), dtype=torch.int64
video_names_list: ['5090916219025116054.mp4']
preds_tensor: shape=torch.Size([22463, 1024]), dtype=torch.f