In [1]:
import os, cv2, pandas as pd
from tqdm import tqdm

DATASET_PATH = "/kaggle/input/liveness-detection-zalo-2022"
VIDEO_DIR = os.path.join(DATASET_PATH, "train/train/videos")
LABEL_FILE = os.path.join(DATASET_PATH, "train/train/label.csv")
OUTPUT_PATH = "/kaggle/working/liveness_frames"

# Make output folders
os.makedirs(os.path.join(OUTPUT_PATH, "real"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_PATH, "fake"), exist_ok=True)

# Read label file
labels = pd.read_csv(LABEL_FILE)
print(labels.head())

   fname  liveness_score
0  1.mp4               0
1  2.mp4               1
2  3.mp4               1
3  5.mp4               0
4  7.mp4               1


In [2]:
def extract_frames(video_path, label, save_dir):
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_ids = [int(total*x) for x in [0.2, 0.5, 0.8]]  # pick 3 frames
    count = 0
    for fid in frame_ids:
        cap.set(cv2.CAP_PROP_POS_FRAMES, fid)
        ret, frame = cap.read()
        if ret:
            name = f"{label}_{os.path.basename(video_path).split('.')[0]}_{count}.jpg"
            cv2.imwrite(os.path.join(save_dir, name), frame)
            count += 1
    cap.release()

for _, row in tqdm(labels.iterrows(), total=len(labels)):
    video_path = os.path.join(VIDEO_DIR, row['fname'])
    if not os.path.exists(video_path): continue
    label = int(row['liveness_score'])
    save_dir = os.path.join(OUTPUT_PATH, "real" if label == 1 else "fake")
    extract_frames(video_path, label, save_dir)

100%|██████████| 1168/1168 [12:58<00:00,  1.50it/s]


In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

IMG_SIZE = (160, 160)
train_dir = "/kaggle/working/liveness_frames"

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_gen = datagen.flow_from_directory(
    train_dir, target_size=IMG_SIZE, batch_size=32,
    class_mode='binary', subset='training'
)

val_gen = datagen.flow_from_directory(
    train_dir, target_size=IMG_SIZE, batch_size=32,
    class_mode='binary', subset='validation'
)

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(160,160,3)),
    BatchNormalization(), MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(), MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation='relu'),
    BatchNormalization(), MaxPooling2D(2,2),
    Flatten(), Dropout(0.4),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

callbacks = [
    EarlyStopping(patience=3, restore_best_weights=True),
    ModelCheckpoint("/kaggle/working/liveness_detector_zalo.h5", save_best_only=True)
]

history = model.fit(train_gen, validation_data=val_gen, epochs=10, callbacks=callbacks)

2025-11-17 19:08:15.292598: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763406495.456821      39 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763406495.505960      39 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Found 2804 images belonging to 2 classes.
Found 700 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1763406507.059594      39 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1763406507.060267      39 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5
  self._warn_if_super_not_called()


Epoch 1/10


I0000 00:00:1763406513.296207    4793 service.cc:148] XLA service 0x79f9f4002950 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1763406513.296898    4793 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1763406513.296920    4793 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1763406513.825959    4793 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 1/88[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14:37[0m 10s/step - accuracy: 0.3438 - loss: 2.3039

I0000 00:00:1763406518.915403    4793 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 384ms/step - accuracy: 0.6785 - loss: 1.0088 - val_accuracy: 0.4886 - val_loss: 1.0243
Epoch 2/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 333ms/step - accuracy: 0.9418 - loss: 0.1651 - val_accuracy: 0.6657 - val_loss: 0.6218
Epoch 3/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 334ms/step - accuracy: 0.9798 - loss: 0.0691 - val_accuracy: 0.6171 - val_loss: 1.0294
Epoch 4/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 325ms/step - accuracy: 0.9899 - loss: 0.0367 - val_accuracy: 0.6471 - val_loss: 1.2467
Epoch 5/10
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 324ms/step - accuracy: 0.9946 - loss: 0.0267 - val_accuracy: 0.6543 - val_loss: 1.2590


In [4]:
# Re-export model in universal, forward-compatible format
import keras

model.save("/kaggle/working/liveness_detector_zalo.keras") 

In [11]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

def predict_video(video_path):
    cap = cv2.VideoCapture(video_path)
    preds = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # sample every 5 frames
        if int(cap.get(cv2.CAP_PROP_POS_FRAMES)) % 5 == 0:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, IMG_SIZE)
            frame = frame / 255.0
            frame = np.expand_dims(frame, axis=0)
            pred = model.predict(frame, verbose=0)[0][0]
            preds.append(pred)

    cap.release()

    avg = np.mean(preds)
    return avg, 1 if avg >= 0.5 else 0  # 1=Real, 0=Fake


def evaluate_folder(folder, label):
    """
    folder: path to videos
    label: true label (0=Fake, 1=Real)
    """
    y_true = []
    y_pred = []
    files = sorted(os.listdir(folder))

    print(f"\n=== Evaluating {folder} ===")

    for f in files:
        if f.endswith(".mp4"):
            prob, pred = predict_video(os.path.join(folder, f))
            print(f"{f} → Prob={prob:.4f}, Pred={pred}, True={label}")

            y_true.append(label)
            y_pred.append(pred)

    return y_true, y_pred


# ---- Edit these according to your dataset ----
fake_folder = "/kaggle/input/liveness-detection-zalo-2022/public_test/public/videos"       # change if needed
real_folder = "/kaggle/input/liveness-detection-zalo-2022/public_test_2/public_test_2/videos"

y_true_fake, y_pred_fake = evaluate_folder(fake_folder, 0)
y_true_real, y_pred_real = evaluate_folder(real_folder, 1)

y_true = y_true_fake + y_true_real
y_pred = y_pred_fake + y_pred_real


# ------------------ Classification Report ------------------
print("\n=== Classification Report ===")
print(classification_report(y_true, y_pred, target_names=["Fake", "Real"]))

# ------------------ Confusion Matrix -----------------------
cm = confusion_matrix(y_true, y_pred)
print("\n=== Confusion Matrix ===")
print(cm)


=== Evaluating /kaggle/input/liveness-detection-zalo-2022/public_test/public/videos ===
0.mp4 → Prob=0.4452, Pred=0, True=0
100.mp4 → Prob=0.8813, Pred=1, True=0
1001.mp4 → Prob=0.6689, Pred=1, True=0
1005.mp4 → Prob=0.8564, Pred=1, True=0
1022.mp4 → Prob=0.3484, Pred=0, True=0
1023.mp4 → Prob=0.6515, Pred=1, True=0
1045.mp4 → Prob=0.6973, Pred=1, True=0
1048.mp4 → Prob=0.8133, Pred=1, True=0
1049.mp4 → Prob=0.4794, Pred=0, True=0
1062.mp4 → Prob=0.5871, Pred=1, True=0
1079.mp4 → Prob=0.5946, Pred=1, True=0
1092.mp4 → Prob=0.2268, Pred=0, True=0
1102.mp4 → Prob=0.4032, Pred=0, True=0
1116.mp4 → Prob=0.9059, Pred=1, True=0
1122.mp4 → Prob=0.8962, Pred=1, True=0
1132.mp4 → Prob=0.6542, Pred=1, True=0
1133.mp4 → Prob=0.3957, Pred=0, True=0
1141.mp4 → Prob=0.4101, Pred=0, True=0
1151.mp4 → Prob=0.5560, Pred=1, True=0
1155.mp4 → Prob=0.3853, Pred=0, True=0
1156.mp4 → Prob=0.3823, Pred=0, True=0
1159.mp4 → Prob=0.4038, Pred=0, True=0
1178.mp4 → Prob=0.3753, Pred=0, True=0
118.mp4 → Prob=0.7