In [None]:
# Install dependencies
%pip install tensorflow opencv-python scipy pandas numpy matplotlib scikit-learn

import warnings
warnings.filterwarnings('ignore')


In [None]:
# Import libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    TimeDistributed, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, 
    LSTM, BatchNormalization, GlobalAveragePooling2D
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import Sequence
import gc

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tf.keras.__version__}")

# GPU setup
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print(f"🔥 Found {len(physical_devices)} GPU(s)")
for gpu in physical_devices:
    tf.config.experimental.set_memory_growth(gpu, True)


In [None]:
# Verify dataset structure
print("📁 Dataset Structure Check:")
print(f"Working directory: {os.getcwd()}")
print("\nFolders:")
for folder in ['clips', 'dataframes', 'flow_clips']:
    if os.path.exists(folder):
        print(f"✅ {folder}/ exists")
    else:
        print(f"❌ {folder}/ missing")

# Check dataframes
dataframe_files = [f for f in os.listdir('dataframes') if f.endswith('.csv')]
print(f"\n📄 Found {len(dataframe_files)} dataframe files:")
for df_file in sorted(dataframe_files):
    print(f"   - {df_file}")

# Check clips
clips_folders = [f for f in os.listdir('clips') if os.path.isdir(f'clips/{f}')]
print(f"\n🎬 Found {len(clips_folders)} video folders:")
for folder in sorted(clips_folders):
    clip_files = len([f for f in os.listdir(f'clips/{folder}') if f.endswith('.npy')])
    print(f"   - {folder}: {clip_files} clips")


In [None]:
# Analyze class distribution
class_distribution = [0] * 5
total_clips = 0
all_clip_data = []

print("📊 Analyzing class distribution...")
for df_file in sorted(dataframe_files):
    df = pd.read_csv(f'dataframes/{df_file}')
    total_clips += len(df)
    
    for _, row in df.iterrows():
        video_num = df_file.split('_')[1].split('.')[0]
        clip_path = f"clips/video_{video_num}/{row['name']}.npy"
        class_label = int(row['class']) - 1  # Convert to 0-indexed
        
        all_clip_data.append((clip_path, class_label))
        class_distribution[class_label] += 1
    
    print(f"   📄 {df_file}: {len(df)} clips")

# Action class names
action_classes = [
    'Reach To Shelf',
    'Retract From Shelf', 
    'Hand In Shelf',
    'Inspect Product',
    'Inspect Shelf'
]

print(f"\n📈 Class Distribution:")
for i, (class_name, count) in enumerate(zip(action_classes, class_distribution)):
    percentage = (count / total_clips * 100)
    print(f"   {i}: {class_name:<20} = {count:3d} clips ({percentage:5.1f}%)")

print(f"\n📊 Total clips: {total_clips}")
print(f"📊 Balance std: {np.std([c/total_clips*100 for c in class_distribution]):.2f}%")


In [None]:
class MerlActionDataGenerator(Sequence):
    """
    Simple and efficient data generator for MERL Shopping dataset
    """
    def __init__(self, clip_data, sequence_length=10, batch_size=4, 
                 target_size=(224, 224), shuffle=True, augment=False):
        self.clip_data = clip_data
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.augment = augment
        self.indices = np.arange(len(self.clip_data))
        
        if self.shuffle:
            np.random.shuffle(self.indices)
            
        print(f"📊 Data Generator initialized:")
        print(f"   - Total clips: {len(self.clip_data)}")
        print(f"   - Sequence length: {self.sequence_length}")
        print(f"   - Batch size: {self.batch_size}")
        print(f"   - Batches per epoch: {len(self)}")
    
    def __len__(self):
        return len(self.clip_data) // self.batch_size
    
    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        batch_clips = []
        batch_labels = []
        
        for i in batch_indices:
            clip_path, label = self.clip_data[i]
            
            try:
                # Load clip
                clip = np.load(clip_path)
                
                # Adjust sequence length
                if len(clip) >= self.sequence_length:
                    # Take middle portion if clip is longer
                    start_idx = (len(clip) - self.sequence_length) // 2
                    clip = clip[start_idx:start_idx + self.sequence_length]
                else:
                    # Repeat frames if clip is shorter
                    repeat_factor = self.sequence_length // len(clip) + 1
                    clip = np.tile(clip, (repeat_factor, 1, 1, 1))[:self.sequence_length]
                
                # Simple augmentation
                if self.augment and np.random.random() > 0.5:
                    clip = np.flip(clip, axis=2)  # Horizontal flip
                
                batch_clips.append(clip)
                batch_labels.append(label)
                
            except Exception as e:
                print(f"⚠️ Error loading {clip_path}: {e}")
                # Use dummy data if loading fails
                dummy_clip = np.zeros((self.sequence_length, 224, 224, 3))
                batch_clips.append(dummy_clip)
                batch_labels.append(0)
        
        return np.array(batch_clips), np.array(batch_labels)
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


In [None]:
# Create data generator
print("🔄 Creating data generator...")
train_generator = MerlActionDataGenerator(
    clip_data=all_clip_data,
    sequence_length=10,  # 10 frames per sequence
    batch_size=4,        # Small batch size for memory efficiency
    target_size=(224, 224),
    shuffle=True,
    augment=True
)

# Test the generator
print("\n🧪 Testing data generator...")
try:
    X_test, y_test = train_generator[0]
    print(f"✅ Batch shape: X={X_test.shape}, y={y_test.shape}")
    print(f"   X range: [{X_test.min():.3f}, {X_test.max():.3f}]")
    print(f"   Labels: {y_test}")
    print(f"   Unique labels: {np.unique(y_test)}")
except Exception as e:
    print(f"❌ Generator test failed: {e}")
