In [48]:
import numpy as np

def generate_videos_from_images(images, labels, n_videos, frames, augment_noise=True):
    """
    Create synthetic videos:
      - Each video is 'frames' images sampled (with replacement) from the same class
    Returns:
      videos: uint8 array (n_videos, frames, H, W, C)
      video_labels: int array (n_videos,)
    """
    num_classes = int(labels.max()) + 1
    IMG_H, IMG_W, IMG_C = images.shape[1], images.shape[2], images.shape[3]
    videos = np.zeros((n_videos, frames, IMG_H, IMG_W, IMG_C), dtype=np.uint8)
    video_labels = np.zeros((n_videos,), dtype=np.int32)

    # precompute indices per class
    class_indices = {c: np.where(labels == c)[0] for c in range(num_classes)}

    per_class = n_videos // num_classes
    idx = 0

    for c in range(num_classes):
        idxs = class_indices[c]
        for _ in range(per_class):
            chosen = np.random.choice(idxs, size=frames, replace=True)
            clip = images[chosen].copy()
            if augment_noise:
                # tiny random noise and brightness jitter
                noise = (np.random.randn(*clip.shape) * 4).astype(np.int16)
                clip = clip.astype(np.int16) + noise
                brightness = np.random.randint(-10, 11)
                clip = clip + brightness
                clip = np.clip(clip, 0, 255).astype(np.uint8)
            videos[idx] = clip
            video_labels[idx] = c
            idx += 1

    # handle any remaining videos (if n_videos not divisible by num_classes)
    while idx < n_videos:
        c = np.random.randint(0, num_classes)
        idxs = class_indices[c]
        chosen = np.random.choice(idxs, size=frames, replace=True)
        clip = images[chosen].copy()
        if augment_noise:
            noise = (np.random.randn(*clip.shape) * 4).astype(np.int16)
            clip = clip.astype(np.int16) + noise
            clip = np.clip(clip, 0, 255).astype(np.uint8)
        videos[idx] = clip
        video_labels[idx] = c
        idx += 1

    return videos, video_labels


In [49]:
# quick synthetic test to ensure function runs
images = np.random.randint(0, 256, size=(200, 32, 32, 3), dtype=np.uint8)
labels = np.random.randint(0, 10, size=(200,), dtype=np.int32)

videos, vlabels = generate_videos_from_images(images, labels, n_videos=20, frames=8)
print("videos.shape:", videos.shape)   # expected (20, 8, 32, 32, 3)
print("vlabels.shape:", vlabels.shape)


videos.shape: (20, 8, 32, 32, 3)
vlabels.shape: (20,)


In [50]:
# Assume you already ran: generate_videos_from_images

videos, vlabels = generate_videos_from_images(images, labels, n_videos=200, frames=8)

print("Videos:", videos.shape)       # (200, 8, 32, 32, 3)
print("Labels:", vlabels.shape)      # (200,)


Videos: (200, 8, 32, 32, 3)
Labels: (200,)


In [51]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(videos, vlabels, test_size=0.3, random_state=42, stratify=vlabels)


In [52]:
import tensorflow as tf
from tensorflow.keras import layers, models

num_classes = len(np.unique(vlabels))

model = models.Sequential([
    layers.Conv3D(32, (3,3,3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3], X_train.shape[4])),
    layers.MaxPooling3D((1,2,2)),
    layers.Conv3D(64, (3,3,3), activation='relu'),
    layers.MaxPooling3D((1,2,2)), # Adjusted pooling to (1,2,2)
    layers.Conv3D(64, (3,3,3), activation='relu'),
    layers.MaxPooling3D((1,2,2)), # Adjusted pooling to (1,2,2)
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [53]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=10,
    batch_size=16
)


Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 580ms/step - accuracy: 0.0953 - loss: 59.4463 - val_accuracy: 0.1000 - val_loss: 13.9630
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 567ms/step - accuracy: 0.0971 - loss: 8.6641 - val_accuracy: 0.0667 - val_loss: 2.7385
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 463ms/step - accuracy: 0.1011 - loss: 2.5725 - val_accuracy: 0.0833 - val_loss: 2.3895
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 448ms/step - accuracy: 0.1118 - loss: 2.2879 - val_accuracy: 0.1500 - val_loss: 2.4041
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 602ms/step - accuracy: 0.2378 - loss: 2.1263 - val_accuracy: 0.1333 - val_loss: 2.4609
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 455ms/step - accuracy: 0.3760 - loss: 1.9437 - val_accuracy: 0.1000 - val_loss: 2.5274
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━

In [54]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Predictions
y_pred = np.argmax(model.predict(X_test), axis=1)

# Metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 302ms/step
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.43      0.50      0.46         6
           2       0.00      0.00      0.00         6
           3       0.00      0.00      0.00         6
           4       0.00      0.00      0.00         6
           5       0.00      0.00      0.00         6
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         6
           8       0.10      0.17      0.12         6
           9       0.08      0.17      0.11         6

    accuracy                           0.08        60
   macro avg       0.06      0.08      0.07        60
weighted avg       0.06      0.08      0.07        60

Confusion Matrix:
[[0 0 0 1 1 1 1 0 0 2]
 [0 3 1 0 0 0 1 0 0 1]
 [1 0 0 0 1 0 2 1 1 0]
 [1 0 0 0 0 2 0 0 2 1]
 [1 1 0 0 0 0 0 0 2 2]
 [3 0 0 0 1 