In [1]:
import cv2
import numpy as np
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import torch


# One shot learning (Siamese or prototype) network

## Augmentasi data

In [2]:
# Path ke video dan gambar target
# video_path = os.path.join('repository_lab_cv', 'proyek_kelompok', 'assets', 'test_video', 'OTV3.mp4')
template_path = os.path.join('assets', 'dataset', 'Mario-Target.png')
template_image = cv2.imread(template_path)
template_image = cv2.resize(template_image, (64, 64))  # Resize ke ukuran tetap

output_path = 'mario_detection_orb.mp4'  # Path untuk menyimpan video hasil

# Augmentasi data
datagen = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='nearest'
)

# Generate augmented images
template_image = np.expand_dims(template_image, axis=0)
augmented_images = datagen.flow(template_image, batch_size=1)

for i in range(5):  # Simpan 5 augmented images
    aug_image = next(augmented_images)[0].astype('uint8')
    cv2.imwrite(f'augmented_data/augmented_mario_{i}.png', aug_image)

In [3]:
def build_siamese_network(input_shape):
    # Arsitektur CNN untuk ekstraksi fitur
    input = Input(input_shape)
    x = Conv2D(64, (7, 7), activation='relu')(input)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(256, (3, 3), activation='relu')(x)
    x = Flatten()(x)
    x = Dense(256, activation='sigmoid')(x)
    model = Model(input, x)
    return model

# Input size
input_shape = (64, 64, 3)

# Create Siamese branches
siamese_base = build_siamese_network(input_shape)

# Define inputs
input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)

# Extract features
feature_a = siamese_base(input_a)
feature_b = siamese_base(input_b)

# Compute L1 distance
l1_distance = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))([feature_a, feature_b])

# Output layer
output = Dense(1, activation='sigmoid')(l1_distance)

# Siamese model
siamese_model = Model([input_a, input_b], output)
siamese_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [4]:
def create_pairs(positive_images, negative_images):
    pairs = []
    labels = []

    # Positive pairs
    for i in range(len(positive_images)):
        for j in range(i + 1, len(positive_images)):
            pairs.append([positive_images[i], positive_images[j]])
            labels.append(1)

    # Negative pairs
    for i in range(len(positive_images)):
        for j in range(len(negative_images)):
            pairs.append([positive_images[i], negative_images[j]])
            labels.append(0)

    return np.array(pairs), np.array(labels)


In [5]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import cv2

# Transformasi untuk dataset
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize gambar ke 64x64
    transforms.ToTensor()  # Konversi ke tensor
])

# Unduh CIFAR-10 dataset
cifar10_data = datasets.CIFAR10(root='cifar', train=True, transform=transform, download=True)

# Inisialisasi DataLoader
dataloader = DataLoader(cifar10_data, batch_size=1, shuffle=True)

# Load beberapa gambar negatif
negative_images = []
for i, (image, label) in enumerate(dataloader):
    if i >= 100:  # Ambil 100 gambar negatif
        break
    # Konversi tensor ke numpy (untuk OpenCV)
    img = image[0].permute(1, 2, 0).numpy() * 255  # Permute dimensi untuk OpenCV
    img = img.astype(np.uint8)  # Pastikan format uint8
    negative_images.append(img)
    print("Loop ke: ")
    print(i)

negative_images = np.array(negative_images)

# Verifikasi hasil
print(f"Loaded {len(negative_images)} negative images.")


Files already downloaded and verified
Loop ke: 
0
Loop ke: 
1
Loop ke: 
2
Loop ke: 
3
Loop ke: 
4
Loop ke: 
5
Loop ke: 
6
Loop ke: 
7
Loop ke: 
8
Loop ke: 
9
Loop ke: 
10
Loop ke: 
11
Loop ke: 
12
Loop ke: 
13
Loop ke: 
14
Loop ke: 
15
Loop ke: 
16
Loop ke: 
17
Loop ke: 
18
Loop ke: 
19
Loop ke: 
20
Loop ke: 
21
Loop ke: 
22
Loop ke: 
23
Loop ke: 
24
Loop ke: 
25
Loop ke: 
26
Loop ke: 
27
Loop ke: 
28
Loop ke: 
29
Loop ke: 
30
Loop ke: 
31
Loop ke: 
32
Loop ke: 
33
Loop ke: 
34
Loop ke: 
35
Loop ke: 
36
Loop ke: 
37
Loop ke: 
38
Loop ke: 
39
Loop ke: 
40
Loop ke: 
41
Loop ke: 
42
Loop ke: 
43
Loop ke: 
44
Loop ke: 
45
Loop ke: 
46
Loop ke: 
47
Loop ke: 
48
Loop ke: 
49
Loop ke: 
50
Loop ke: 
51
Loop ke: 
52
Loop ke: 
53
Loop ke: 
54
Loop ke: 
55
Loop ke: 
56
Loop ke: 
57
Loop ke: 
58
Loop ke: 
59
Loop ke: 
60
Loop ke: 
61
Loop ke: 
62
Loop ke: 
63
Loop ke: 
64
Loop ke: 
65
Loop ke: 
66
Loop ke: 
67
Loop ke: 
68
Loop ke: 
69
Loop ke: 
70
Loop ke: 
71
Loop ke: 
72
Loop ke: 
73
Loop ke: 


In [6]:
import cv2
import os
import numpy as np

# Path ke direktori gambar augmented Mario
positive_folder = './augmented_data'  # Folder root (di mana augmented_mario_X.png berada)

# Load semua gambar augmented Mario
positive_images = []
for filename in sorted(os.listdir(positive_folder)):  # Pastikan file terurut
    if filename.startswith('augmented_mario') and filename.endswith('.png'):  # Filter nama file
        img_path = os.path.join(positive_folder, filename)
        img = cv2.imread(img_path)  # Baca gambar
        if img is not None:
            img = cv2.resize(img, (64, 64))  # Pastikan ukuran sama
            positive_images.append(img)

positive_images = np.array(positive_images)

In [7]:
# Load data (augmented positive samples and negative samples)
pairs, labels = create_pairs(positive_images, negative_images)

# Train model
siamese_model.fit([pairs[:, 0], pairs[:, 1]], labels, batch_size=32, epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1e802681ea0>

# Detect in Video

In [8]:
# Load template Mario
import cv2
template_path = './assets/dataset/Mario-Target.png'  # Path ke salah satu gambar Mario
template_image = cv2.imread(template_path)
template_image = cv2.resize(template_image, (64, 64))  # Pastikan ukuran sesuai dengan training

In [9]:
import numpy as np

def non_max_suppression(boxes, scores, iou_threshold=0.5):
    """
    Non-Maximum Suppression untuk menggabungkan bounding box yang tumpang tindih.
    """
    if len(boxes) == 0:
        return []

    boxes = np.array(boxes)
    scores = np.array(scores)

    # Koordinat kotak
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # Area bounding box
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]  # Urutkan skor secara menurun

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        overlap = (w * h) / areas[order[1:]]

        # Hanya pertahankan kotak yang overlap-nya kurang dari threshold
        order = order[np.where(overlap <= iou_threshold)[0] + 1]

    return boxes[keep].astype(int)


In [10]:
def predict_in_batches(siamese_model, template_input, patches_input, batch_size=32):
    """
    Prediksi batch secara bertahap untuk menghindari kehabisan memori.
    """
    scores = []
    for i in range(0, len(patches_input), batch_size):
        batch_patches = patches_input[i:i + batch_size]
        batch_templates = np.repeat(template_input, len(batch_patches), axis=0)
        batch_scores = siamese_model.predict([batch_templates, batch_patches], verbose=0)
        scores.extend(batch_scores)
    return np.array(scores)

In [11]:
def detect_mario_in_frame(frame, siamese_model, template_image, threshold=0.7, iou_threshold=0.5):
    h, w, _ = frame.shape
    patch_size = template_image.shape[:2]
    stride = 64  # Langkah sliding window

    detected_boxes = []
    scores = []
    patches = []
    coordinates = []

    # Sliding window
    for y in range(0, h - patch_size[0], stride):
        for x in range(0, w - patch_size[1], stride):
            patch = frame[y:y + patch_size[0], x:x + patch_size[1]]
            patch = cv2.resize(patch, (patch_size[1], patch_size[0]))
            patches.append(patch)
            coordinates.append((x, y, x + patch_size[1], y + patch_size[0]))

    # Preprocess template dan patches
    template_input = np.expand_dims(template_image.astype(np.float32) / 255.0, axis=0)
    patches_input = np.array(patches).astype(np.float32) / 255.0

    # Predict in batches
    batch_size = 32
    pred_scores = predict_in_batches(siamese_model, template_input, patches_input, batch_size=batch_size)

    # Filter berdasarkan threshold
    for i, score in enumerate(pred_scores):
        if score[0] > threshold:
            detected_boxes.append(coordinates[i])
            scores.append(score[0])

    # Gunakan NMS untuk menggabungkan bounding box
    if len(detected_boxes) > 0:
        final_boxes = non_max_suppression(detected_boxes, scores, iou_threshold=iou_threshold)
    else:
        final_boxes = []

    return final_boxes

**Test**

In [12]:
import cv2
from tqdm import tqdm

# Path ke video input dan output
video_path = './assets/test_video/OTV1.mp4'
output_path = './output_video/test1.mp4'

# Load video
cap = cv2.VideoCapture(video_path)

# Dapatkan properti video
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Total frame dalam video

# Video Writer untuk menyimpan hasil
fourcc = cv2.VideoWriter_fourcc(*'H264')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Progress bar
progress_bar = tqdm(total=total_frames, desc="Processing Video", unit="frame")

# Proses setiap frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.resize(frame, (frame_width // 2, frame_height // 2))

    # Deteksi Mario pada frame
    boxes = detect_mario_in_frame(frame, siamese_model, template_image, threshold=0.7)

    # Gambar bounding box pada frame
    for box in boxes:
        x1, y1, x2, y2 = box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Bounding box hijau

    # Simpan frame hasil
    out.write(frame)

    # Update progress bar
    progress_bar.update(1)

# Release resources
progress_bar.close()
cap.release()
out.release()
cv2.destroyAllWindows()

print("Video processing completed.")


Processing Video: 100%|██████████| 1008/1008 [04:19<00:00,  3.88frame/s]

Video processing completed.





### Predict

In [13]:
import cv2
import numpy as np

def preprocess_image(image, target_size):
    """
    Preprocess gambar untuk prediksi:
    - Resize ke target size.
    - Normalisasi pixel value ke [0, 1].
    - Tambahkan dimensi batch.
    """
    image = cv2.resize(image, target_size)  # Resize
    image = image.astype(np.float32) / 255.0  # Normalisasi
    image = np.expand_dims(image, axis=0)  # Tambahkan dimensi batch
    return image

def predict_similarity(template_path, target_path, siamese_model):
    """
    Prediksi kesamaan antara template dan gambar target.
    """
    # Load template dan gambar target
    template_image = cv2.imread(template_path)
    target_image = cv2.imread(target_path)

    if template_image is None or target_image is None:
        print("Error: Cannot load one or both images.")
        return

    # Preprocess kedua gambar
    target_size = (64, 64)  # Sesuaikan dengan input model Anda
    template_processed = preprocess_image(template_image, target_size)
    target_processed = preprocess_image(target_image, target_size)

    # Prediksi kesamaan menggunakan model Siamese
    similarity_score = siamese_model.predict([template_processed, target_processed])[0][0]

    print(f"Similarity Score: {similarity_score}")
    return similarity_score


In [14]:
template_path = './assets/dataset/Mario-Target.PNG'  # Path ke template Mario
target_path = './assets/dataset/video.PNG'  # Path ke gambar target

# Prediksi kesamaan
similarity_score = predict_similarity(template_path, target_path, siamese_model)

# Keputusan berdasarkan threshold
threshold = 0.7
if similarity_score > threshold:
    print("Mario Detected!")
else:
    print("Not Mario!")

similarity_score


Similarity Score: 0.4636431932449341
Not Mario!


0.4636432