In [6]:
#!/usr/bin/env python3
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
import joblib
import sys

In [8]:
# -------------------------------
# Load model and label encoder
# -------------------------------
model_path = '../problem_1/efficientnet_best_model.h5'
encoder_path = '../problem_1/label_encoder.pkl'

if not os.path.exists(model_path):
    print(f"Error: Model not found at {model_path}")
    sys.exit(1)

if not os.path.exists(encoder_path):
    print(f"Error: Label encoder not found at {encoder_path}")
    sys.exit(1)

model = load_model(model_path)
label_encoder = joblib.load(encoder_path)

num_classes = len(label_encoder.classes_)
print(f"Loaded model with {num_classes} classes: {label_encoder.classes_}")



Loaded model with 26 classes: ['4011' '4015' '4088' '4196' '7020097009819' '7020097026113'
 '7023026089401' '7035620058776' '7037203626563' '7037206100022'
 '7038010009457' '7038010013966' '7038010021145' '7038010054488'
 '7038010068980' '7039610000318' '7040513000022' '7040513001753'
 '7040913336684' '7044610874661' '7048840205868' '7071688004713'
 '7622210410337' '90433917' '90433924' '94011']


In [9]:
def preprocess_image(img, target_size=(224, 224)):
    """
    Preprocess the image for the model.
    Args:
        img: Input image (numpy array).
        target_size: Target size for resizing the image.
    Returns:
        Preprocessed image (numpy array).
    """
    img = cv2.resize(img, target_size)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_array = img_to_array(img)
    img_array = tf.keras.applications.efficientnet.preprocess_input(img_array)
    return np.expand_dims(img_array, axis=0)

In [10]:
# -------------------------------
# Parameters
# -------------------------------
video_path = 'videos/Noen få enkle varer 480P.mp4'
output_receipt_file = 'generated_receipt.txt'

area_threshold = 2000
distance_threshold = 60
disappearance_threshold = 1.0
confidence_threshold = 0.9

In [11]:
# -------------------------------
# Open video
# -------------------------------
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)

ret, first_frame = cap.read()
if not ret:
    print("Error: Cannot read the video.")
    cap.release()
    sys.exit(1)

frame_height, frame_width = first_frame.shape[:2]
print(f"Frame dimensions: {frame_width} x {frame_height}")
print(f"Video FPS: {fps}")

Frame dimensions: 852 x 480
Video FPS: 30.0


In [12]:
# -------------------------------
# Define ROI (Region of Interest)
# -------------------------------
roi_y = int(frame_height * 0.5)
roi_h = int(frame_height * 0.5)
roi_x = int(frame_width * 0.333)
roi_w = int(frame_width * 0.333)

print(f"Using ROI -> X:{roi_x}, Y:{roi_y}, Width:{roi_w}, Height:{roi_h}")

Using ROI -> X:283, Y:240, Width:283, Height:240


In [13]:
# -------------------------------
# Initialize background subtractor
# -------------------------------
fgbg = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=50, detectShadows=True)

active_detections = []
detections_by_frame = []

receipt = {}

def euclidean_distance(p1, p2):
    return np.linalg.norm(np.array(p1) - np.array(p2))

In [14]:
# -------------------------------
# Main loop
# -------------------------------
frame_number = 0
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_number += 1
    current_time = frame_number / fps
    roi_frame = frame[roi_y:roi_y+roi_h, roi_x:roi_x+roi_w]

    fgmask = fgbg.apply(roi_frame)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel)
    fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_DILATE, kernel, iterations=2)

    contours, _ = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    detections_in_frame = []
    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area > area_threshold:
            x, y, w, h = cv2.boundingRect(cnt)
            crop = roi_frame[y:y+h, x:x+w]
            if crop.size == 0:
                continue
            processed_crop = preprocess_image(crop)
            preds = model.predict(processed_crop, verbose=0)
            conf = np.max(preds)
            if conf < confidence_threshold:
                continue
            pred_class = np.argmax(preds, axis=1)[0]
            label = label_encoder.classes_[pred_class]
            centroid = (x + w // 2, y + h // 2)
            detections_in_frame.append({'label': label, 'centroid': centroid, 'bbox': (x, y, w, h), 'conf': conf})

            real_x = x + roi_x
            real_y = y + roi_y
            cv2.rectangle(frame, (real_x, real_y), (real_x+w, real_y+h), (0, 255, 0), 2)
            cv2.putText(frame, f"{label} ({conf:.2f})", (real_x, real_y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            
    detections_by_frame.append(([{'label': det['label'], 'conf': det['conf']} for det in detections_in_frame], current_time))

    for det in detections_in_frame:
        matched = False
        for active in active_detections:
            if det['label'] == active['label']:
                distance = euclidean_distance(det['centroid'], active['centroid'])
                if distance < distance_threshold:
                    active['centroid'] = det['centroid']
                    active['last_seen_time'] = current_time
                    matched = True
                    break
        if not matched:
            active_detections.append({
                'label': det['label'],
                'centroid': det['centroid'],
                'last_seen_time': current_time,
                'bbox': det['bbox']
            })

    for active in active_detections.copy():
        if current_time - active['last_seen_time'] > disappearance_threshold:
            product_label = active['label']
            receipt[product_label] = receipt.get(product_label, 0) + 1
            active_detections.remove(active)

    cv2.imshow('Full Frame with Detections', frame)
    cv2.imshow('ROI Frame', roi_frame)
    cv2.imshow('Foreground Mask', fgmask)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()

# Finalize lingering detections
for active in active_detections:
    product_label = active['label']
    receipt[product_label] = receipt.get(product_label, 0) + 1

In [None]:
print(detections_by_frame)

[([], 0.03333333333333333), ([], 0.06666666666666667), ([], 0.1), ([], 0.13333333333333333), ([], 0.16666666666666666), ([], 0.2), ([], 0.23333333333333334), ([], 0.26666666666666666), ([], 0.3), ([], 0.3333333333333333), ([], 0.36666666666666664), ([], 0.4), ([], 0.43333333333333335), ([], 0.4666666666666667), ([], 0.5), ([], 0.5333333333333333), ([], 0.5666666666666667), ([], 0.6), ([], 0.6333333333333333), ([], 0.6666666666666666), ([], 0.7), ([], 0.7333333333333333), ([], 0.7666666666666667), ([], 0.8), ([], 0.8333333333333334), ([], 0.8666666666666667), ([], 0.9), ([], 0.9333333333333333), ([], 0.9666666666666667), ([], 1.0), ([], 1.0333333333333334), ([], 1.0666666666666667), ([], 1.1), ([], 1.1333333333333333), ([], 1.1666666666666667), ([], 1.2), ([], 1.2333333333333334), ([], 1.2666666666666666), ([], 1.3), ([], 1.3333333333333333), ([], 1.3666666666666667), ([], 1.4), ([], 1.4333333333333333), ([], 1.4666666666666666), ([], 1.5), ([], 1.5333333333333334), ([], 1.5666666666666

: 

In [None]:
# function to chedck future frames for the same product
def check_future_frames(label, frame_number, frame_depth=10):
    # check average confidence for the label in future 10 frames
    future_frames = detections_by_frame[frame_number:frame_number + frame_depth]
    confidences = []
    for ff in future_frames:
        for detection in ff[0]:
            if detection['label'] == label:
                confidences.append(detection['conf'])
    return np.mean(confidences) if confidences else 0

In [None]:
frame_number = 0
enccountered_safe_period = False

while frame_number < len(detections_by_frame):

    if not enccountered_safe_period:
        #...

    dbf = detections_by_frame[frame_number]
    current_detections = dbf[0]
    time = dbf[1]

    confidences = []
    for detection in current_detections:
        label = detection['label']
        future_conf = check_future_frames(label, frame_number)
        confidences.append((label, future_conf))
    
    # if argmax of is greateter than 0.8 add the label with the highest confidence to the receipt
    if confidences:
        max_label = max(confidences, key=lambda x: x[1])
        if max_label[1] > 0.8:
            receipt[max_label[0]] = receipt.get(max_label[0], 0) + 1
            frame_number += 10
    else:
        frame_number += 1
    




In [None]:
# -------------------------------
# Save receipt
# -------------------------------
print("\nGenerated Receipt:")
for product, count in receipt.items():
    print(f"{product}: {count}")

with open(output_receipt_file, 'w') as f:
    f.write("Generated Receipt:\n")
    for product, count in receipt.items():
        f.write(f"{product}: {count}\n")
print(f"Receipt saved to {output_receipt_file}")


Generated Receipt:
90433917: 8
7040913336684: 2
90433924: 1
7023026089401: 1
4011: 2
94011: 1
7622210410337: 2
7044610874661: 1
7048840205868: 1
7071688004713: 1
7038010013966: 1
Receipt saved to generated_receipt.txt


: 