In [1]:
import tensorflow as tf
# Limit TF to grow GPU memory as needed (optional but recommended)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print(f"{len(gpus)} GPU(s) found. Memory growth enabled.")

# Enable mixed precision for faster training on RTX 4070
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy("mixed_float16")
print("Mixed precision policy set to:", mixed_precision.global_policy())


The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.
Mixed precision policy set to: <Policy "mixed_float16">


In [1]:
import tensorflow as tf
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPU devices:", tf.config.list_physical_devices('GPU'))


Built with CUDA: False
GPU devices: []


In [None]:
import os, cv2, numpy as np

# Path to the UCF-101 dataset directory (update this to your actual path)
data_dir = r"C:\Users\acer\OneDrive\Documents\Dev Work\AI\ProjectX\ProjectX\UCF-101"

class_names = sorted([d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))])
num_classes = len(class_names)
print(f"Found {num_classes} classes: {class_names}")


# List all action classes
class_names = sorted([d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))])
num_classes = len(class_names)
print(f"Found {num_classes} classes.")  # Expect 101 for full UCF-101

# Gather all video file paths and their class labels
video_paths, labels = [], []
video_paths = []
labels = []
for label, class_name in enumerate(class_names):
    class_dir = os.path.join(data_dir, class_name)
    for file in os.listdir(class_dir):
        if file.endswith(".avi"):
            video_paths.append(os.path.join(class_dir, file))
            labels.append(label)
print(f"Found {len(video_paths)} videos.")

# Define parameters for frame extraction
FRAMES_PER_VIDEO = 16
IMG_HEIGHT, IMG_WIDTH = 112, 112

def extract_frames_from_video(video_path, num_frames=FRAMES_PER_VIDEO):
    """Read a video file and extract `num_frames` frames, resized to IMG_HEIGHT x IMG_WIDTH."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
    if total_frames <= 0:
        cap.release()
        return None  # unable to read video
    # Determine frame indices to sample (evenly spaced)
    interval = max(total_frames // num_frames, 1)
    for i in range(num_frames):
        frame_index = int(i * interval)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
        ret, frame = cap.read()
        if not ret:
            break  # end of video
        # Resize frame to target size and normalize pixel values
        frame = cv2.resize(frame, (IMG_WIDTH, IMG_HEIGHT))
        frame = frame.astype("float32") / 255.0  # normalize to [0,1]
        frames.append(frame)
    cap.release()
    # If video is shorter than num_frames, pad with black frames
    while len(frames) < num_frames:
        frames.append(np.zeros((IMG_HEIGHT, IMG_WIDTH, 3), dtype="float32"))
    return np.array(frames)  # shape: (num_frames, IMG_HEIGHT, IMG_WIDTH, 3)


In [None]:
# Shuffle and split into train and test sets (80/20 split)
random_indices = np.random.permutation(len(video_paths))
split_idx = int(0.8 * len(video_paths))
train_idx, test_idx = random_indices[:split_idx], random_indices[split_idx:]
train_videos = [video_paths[i] for i in train_idx]
train_labels = [labels[i] for i in train_idx]
test_videos  = [video_paths[i] for i in test_idx]
test_labels  = [labels[i] for i in test_idx]
print(f"Training videos: {len(train_videos)}, Testing videos: {len(test_videos)}")


In [None]:
import random, tensorflow as tf

def frame_generator(video_list, label_list, training=False):
    """Generator that yields (frames, label) for each video in the list."""
    indices = list(range(len(video_list)))
    if training:
        random.shuffle(indices)  # shuffle order each epoch for training
    for idx in indices:
        frames = extract_frames_from_video(video_list[idx], FRAMES_PER_VIDEO)
        if frames is None:
            continue  # skip unreadable video
        # Data augmentation: random horizontal flip for training data
        if training and random.random() < 0.5:
            frames = np.flip(frames, axis=2)  # flip frames horizontally
        yield frames, label_list[idx]

# Create tf.data Datasets for training and testing
train_ds = tf.data.Dataset.from_generator(
    lambda: frame_generator(train_videos, train_labels, training=True),
    output_signature=(tf.TensorSpec(shape=(FRAMES_PER_VIDEO, IMG_HEIGHT, IMG_WIDTH, 3), dtype=tf.float32),
                      tf.TensorSpec(shape=(), dtype=tf.int32))
)
test_ds = tf.data.Dataset.from_generator(
    lambda: frame_generator(test_videos, test_labels, training=False),
    output_signature=(tf.TensorSpec(shape=(FRAMES_PER_VIDEO, IMG_HEIGHT, IMG_WIDTH, 3), dtype=tf.float32),
                      tf.TensorSpec(shape=(), dtype=tf.int32))
)

# Batch and prefetch the datasets for performance
BATCH_SIZE = 8
train_ds = train_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Inspect one batch of training data
for frames_batch, labels_batch in train_ds.take(1):
    print("Batch frames shape:", frames_batch.shape)
    print("Batch labels:", labels_batch)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, BatchNormalization, Flatten, Dense, Dropout

# Define model parameters
FRAMES_PER_VIDEO = 16
IMG_HEIGHT, IMG_WIDTH = 112, 112
num_classes = 101  # For UCF-101

model = Sequential([
    # First Conv3D: Pool only spatially to preserve temporal dimension.
    Conv3D(32, kernel_size=(3, 3, 3), activation='relu', padding='same',
           input_shape=(FRAMES_PER_VIDEO, IMG_HEIGHT, IMG_WIDTH, 3)),
    MaxPooling3D(pool_size=(1, 2, 2)),  # Temporal dimension remains 16
    BatchNormalization(),
    
    # Second Conv3D: now pool in all dimensions
    Conv3D(64, kernel_size=(3, 3, 3), activation='relu', padding='same'),
    MaxPooling3D(pool_size=(2, 2, 2)),  # Temporal: 16/2 = 8
    BatchNormalization(),
    
    # Third Conv3D: pool in all dimensions
    Conv3D(128, kernel_size=(3, 3, 3), activation='relu', padding='same'),
    MaxPooling3D(pool_size=(2, 2, 2)),  # Temporal: 8/2 = 4
    BatchNormalization(),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

model.summary()


In [None]:
tf.config.list_physical_devices('GPU')
for frames_batch, labels_batch in train_ds.take(1):
    print("Frames shape:", frames_batch.shape)
    print("Labels shape:", labels_batch.shape)



In [None]:
import tensorflow as tf
print("Physical GPUs:", tf.config.list_physical_devices('GPU'))


In [None]:
from tensorflow.keras.optimizers import Adam
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=Adam(learning_rate=1e-4),
              metrics=["accuracy"])


In [None]:
import tensorflow as tf
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("Built with GPU support:", tf.test.is_gpu_available())
print(tf.config.list_physical_devices('GPU'))

In [None]:
EPOCHS = 10
history = model.fit(train_ds, epochs=EPOCHS)


In [None]:
test_loss, test_accuracy = model.evaluate(test_ds)
print(f"Test Accuracy: {test_accuracy*100:.2f}%")


In [None]:
model.compile(..., metrics=["accuracy", tf.keras.metrics.TopKCategoricalAccuracy(k=5)])


In [None]:
model.save("ucf101_3dcnn.h5")
# This creates a file 'ucf101_3dcnn.h5' with the model.
from tensorflow.keras.models import load_model
loaded_model = load_model("ucf101_3dcnn.h5")


In [None]:
# Suppose we have a new video file "new_video.mp4" that we want to classify
new_video_path = "new_video.mp4"
frames = extract_frames_from_video(new_video_path, num_frames=FRAMES_PER_VIDEO)
if frames is None:
    print("Could not read video or video too short.")
else:
    frames = np.expand_dims(frames, axis=0)  # shape becomes (1, num_frames, H, W, 3)
    predictions = loaded_model.predict(frames)  # model expects batch input
    predicted_label = np.argmax(predictions[0])
    print("Predicted action:", class_names[predicted_label])
