In [3]:
import zipfile
zip_ref=zipfile.ZipFile('/content/organized_dataset.zip','r')
zip_ref.extractall('/content/Dataset')
zip_ref.close()

In [4]:
import os
import random
import shutil
from collections import defaultdict

original_dataset = 'Dataset'
output_dataset = 'FinalDataset'
non_g3_classes = ['g1', 'g2', 'g4', 'g11']
g3_class = 'g3'

non_g3_path = os.path.join(output_dataset, 'NON_G3')
g3_path = os.path.join(output_dataset, 'G3')
os.makedirs(non_g3_path, exist_ok=True)
os.makedirs(g3_path, exist_ok=True)

g3_videos = [f for f in os.listdir(os.path.join(original_dataset, g3_class)) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
num_g3 = len(g3_videos)
print(f"G3 has {num_g3} videos")

for fname in g3_videos:
    src = os.path.join(original_dataset, g3_class, fname)
    dst = os.path.join(g3_path, fname)
    shutil.copy2(src, dst)

non_g3_video_dict = defaultdict(list)
for cls in non_g3_classes:
    class_path = os.path.join(original_dataset, cls)
    videos = [os.path.join(class_path, f) for f in os.listdir(class_path) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
    non_g3_video_dict[cls] = videos

all_non_g3_videos = sum(non_g3_video_dict.values(), [])
random.shuffle(all_non_g3_videos)

per_class_quota = num_g3 // len(non_g3_classes)
selected_non_g3 = []

for cls in non_g3_classes:
    videos = non_g3_video_dict[cls]
    random.shuffle(videos)
    selected_non_g3.extend(videos[:per_class_quota])

remainder = num_g3 - len(selected_non_g3)
if remainder > 0:
    leftovers = []
    for cls in non_g3_classes:
        leftovers += non_g3_video_dict[cls][per_class_quota:]
    random.shuffle(leftovers)
    selected_non_g3 += leftovers[:remainder]

for i, src in enumerate(selected_non_g3):
    fname = f"non_g3_{i}_{os.path.basename(src)}"
    dst = os.path.join(non_g3_path, fname)
    shutil.copy2(src, dst)

print(f"NON_G3 has {len(selected_non_g3)} videos")


G3 has 452 videos
NON_G3 has 452 videos


In [5]:
import os

organized_dir = 'FinalDataset'
class_counts = {}

for class_name in os.listdir(organized_dir):
    class_path = os.path.join(organized_dir, class_name)
    if os.path.isdir(class_path):
        num_files = len([
            f for f in os.listdir(class_path)
            if os.path.isfile(os.path.join(class_path, f))
        ])
        class_counts[class_name] = num_files

for class_name, count in class_counts.items():
    print(f"Class '{class_name}': {count} videos")

total_videos = sum(class_counts.values())
print(f"\nTotal videos counted: {total_videos}")


Class 'G3': 452 videos
Class 'NON_G3': 452 videos

Total videos counted: 904


In [6]:
import os
import cv2
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

class VideoDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, video_paths, labels, batch_size=4, frames_per_clip=16, img_size=(112, 112), shuffle=True):
        self.video_paths = video_paths
        self.labels = labels
        self.batch_size = batch_size
        self.frames_per_clip = frames_per_clip
        self.img_size = img_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.video_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_paths = self.video_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]

        X, y = self.__data_generation(batch_paths, batch_labels)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.video_paths))
        if self.shuffle:
            np.random.shuffle(self.indexes)
            self.video_paths = [self.video_paths[i] for i in self.indexes]
            self.labels = [self.labels[i] for i in self.indexes]

    def __data_generation(self, video_paths, labels):
        X = []
        y = []

        for video_path, label in zip(video_paths, labels):
            cap = cv2.VideoCapture(video_path)
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            if total_frames == 0:
                frames = [np.zeros((*self.img_size, 1), dtype=np.uint8) for _ in range(self.frames_per_clip)]
            else:
                frame_idxs = np.linspace(0, total_frames - 1, self.frames_per_clip).astype(int)
                frames = []
                for idx in frame_idxs:
                    cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
                    ret, frame = cap.read()
                    if ret:
                        frame = cv2.resize(frame, self.img_size)
                        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                        frame = np.expand_dims(frame, axis=-1)
                        frames.append(frame)
                    else:
                        frames.append(np.zeros((*self.img_size, 1), dtype=np.uint8))
            cap.release()

            clip = np.array(frames) / 255.0  # Normalize to [0,1]
            X.append(clip)
            y.append(label)

        X = np.array(X)
        y = tf.keras.utils.to_categorical(y, num_classes=len(set(self.labels)))
        return X, y

def collect_paths_and_labels(base_path):
    video_paths = []
    labels = []
    class_names = sorted(os.listdir(base_path))
    le = LabelEncoder()
    le.fit(class_names)

    for label_name in class_names:
        class_dir = os.path.join(base_path, label_name)
        if not os.path.isdir(class_dir):
            continue
        for fname in os.listdir(class_dir):
            if fname.lower().endswith(('.mp4', '.avi', '.mov')):
                video_paths.append(os.path.join(class_dir, fname))
                labels.append(label_name)

    labels_encoded = le.transform(labels)
    return video_paths, labels_encoded, le
dataset_path = "FinalDataset"

video_paths, video_labels, label_encoder = collect_paths_and_labels(dataset_path)
train_paths, val_paths, train_labels, val_labels = train_test_split(
    video_paths, video_labels, test_size=0.2, stratify=video_labels, random_state=42)

print(f"Number of training videos: {len(train_paths)}")
print(f"Number of validation videos: {len(val_paths)}")

train_generator = VideoDataGenerator(train_paths, train_labels, batch_size=4)
val_generator = VideoDataGenerator(val_paths, val_labels, batch_size=4)

Number of training videos: 723
Number of validation videos: 181


In [7]:
import tensorflow as tf
from tensorflow.keras import layers, models

def build_conv3d_model_with_bn(input_shape=(16, 112, 112, 1), num_classes=2):
    model = models.Sequential()

    model.add(layers.Conv3D(32, kernel_size=(3, 3, 3), padding='same', input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same'))

    model.add(layers.Conv3D(64, kernel_size=(3, 3, 3), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling3D(pool_size=(2, 2, 2), padding='same'))

    model.add(layers.Conv3D(128, kernel_size=(3, 3, 3), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling3D(pool_size=(2, 2, 2), padding='same'))

    model.add(layers.Flatten())

    model.add(layers.Dense(256))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(128))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(num_classes, activation='sigmoid'))

    return model

num_classes = 2
input_shape = (16, 112, 112, 1)

model = build_conv3d_model_with_bn(input_shape=input_shape, num_classes=num_classes)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
model.fit(train_generator,epochs=15,validation_data=val_generator)

  self._warn_if_super_not_called()


Epoch 1/15
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 1s/step - accuracy: 0.5781 - loss: 0.8226 - val_accuracy: 0.4972 - val_loss: 0.7093
Epoch 2/15
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 1s/step - accuracy: 0.6162 - loss: 0.7093 - val_accuracy: 0.5635 - val_loss: 0.6599
Epoch 3/15
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 1s/step - accuracy: 0.7109 - loss: 0.5974 - val_accuracy: 0.7182 - val_loss: 0.6810
Epoch 4/15
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 1s/step - accuracy: 0.7426 - loss: 0.5424 - val_accuracy: 0.7956 - val_loss: 0.4599
Epoch 5/15
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 1s/step - accuracy: 0.7740 - loss: 0.4967 - val_accuracy: 0.8453 - val_loss: 0.3306
Epoch 6/15
[1m181/181[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 1s/step - accuracy: 0.7931 - loss: 0.4839 - val_accuracy: 0.8177 - val_loss: 0.4140
Epoch 7/15
[1m181/181

<keras.src.callbacks.history.History at 0x7fcaa0730890>

In [None]:
model.save("Identification_Of_G3.h5")
from google.colab import files
files.download("Identification_Of_G3.h5")