In [1]:
import os, cv2, pandas as pd
from tqdm import tqdm

DATASET_PATH = "/kaggle/input/liveness-detection-zalo-2022"
VIDEO_DIR = os.path.join(DATASET_PATH, "train/train/videos")
LABEL_FILE = os.path.join(DATASET_PATH, "train/train/label.csv")
OUTPUT_PATH = "/kaggle/working/liveness_frames"

# Make output folders
os.makedirs(os.path.join(OUTPUT_PATH, "real"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_PATH, "fake"), exist_ok=True)

# Read label file
labels = pd.read_csv(LABEL_FILE)
print(labels.head())

   fname  liveness_score
0  1.mp4               0
1  2.mp4               1
2  3.mp4               1
3  5.mp4               0
4  7.mp4               1


In [2]:
def extract_frames(video_path, label, save_dir):
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_ids = [int(total*x) for x in [0.2, 0.5, 0.8]]  # pick 3 frames
    count = 0
    for fid in frame_ids:
        cap.set(cv2.CAP_PROP_POS_FRAMES, fid)
        ret, frame = cap.read()
        if ret:
            name = f"{label}_{os.path.basename(video_path).split('.')[0]}_{count}.jpg"
            cv2.imwrite(os.path.join(save_dir, name), frame)
            count += 1
    cap.release()

for _, row in tqdm(labels.iterrows(), total=len(labels)):
    video_path = os.path.join(VIDEO_DIR, row['fname'])
    if not os.path.exists(video_path): continue
    label = int(row['liveness_score'])
    save_dir = os.path.join(OUTPUT_PATH, "real" if label == 1 else "fake")
    extract_frames(video_path, label, save_dir)

100%|██████████| 1168/1168 [12:49<00:00,  1.52it/s]


In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

IMG_SIZE = (160, 160)
train_dir = "/kaggle/working/liveness_frames"

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_gen = datagen.flow_from_directory(
    train_dir, target_size=IMG_SIZE, batch_size=32,
    class_mode='binary', subset='training'
)

val_gen = datagen.flow_from_directory(
    train_dir, target_size=IMG_SIZE, batch_size=32,
    class_mode='binary', subset='validation'
)

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(160,160,3)),
    BatchNormalization(), MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(), MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation='relu'),
    BatchNormalization(), MaxPooling2D(2,2),
    Flatten(), Dropout(0.4),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

callbacks = [
    EarlyStopping(patience=3, restore_best_weights=True),
    ModelCheckpoint("/kaggle/working/liveness_detector_zalo.h5", save_best_only=True)
]

history = model.fit(train_gen, validation_data=val_gen, epochs=10, callbacks=callbacks)

2025-11-06 01:45:48.083516: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762393548.295195      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762393548.350150      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 2804 images belonging to 2 classes.
Found 700 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1762393560.609842      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1762393560.610523      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5
  self._warn_if_super_not_called()


Epoch 1/10


I0000 00:00:1762393566.339659    4747 service.cc:148] XLA service 0x7dc198003970 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1762393566.340539    4747 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1762393566.340564    4747 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1762393566.800323    4747 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 1/88[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:02[0m 9s/step - accuracy: 0.6250 - loss: 0.7611

I0000 00:00:1762393571.470991    4747 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 346ms/step - accuracy: 0.7240 - loss: 0.7054 - val_accuracy: 0.6086 - val_loss: 0.6465
Epoch 2/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 282ms/step - accuracy: 0.9493 - loss: 0.1445 - val_accuracy: 0.5129 - val_loss: 1.6397
Epoch 3/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 270ms/step - accuracy: 0.9848 - loss: 0.0504 - val_accuracy: 0.5614 - val_loss: 1.3927
Epoch 4/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 267ms/step - accuracy: 0.9926 - loss: 0.0278 - val_accuracy: 0.6029 - val_loss: 1.4798


In [4]:
# Re-export model in universal, forward-compatible format
import keras

model.save("/kaggle/working/liveness_detector_zalo.keras") 