In [1]:
import cv2
import os

In [8]:
# Create the main dataset directory and subfolders
main_folder = "mydigits_dataset"
subfolders = ['1', '2', '3', '4']

if not os.path.exists(main_folder):
    os.makedirs(main_folder)

# Create subfolders
for subfolder in subfolders:
    folder_path = os.path.join(main_folder, subfolder)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

In [3]:
arucoDict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_ARUCO_ORIGINAL)
arucoParams = cv2.aruco.DetectorParameters()
detector = cv2.aruco.ArucoDetector(arucoDict,arucoParams)

In [4]:
img_height, img_width = 224,224

def resize_frame(frame):
    # Resize the frame to img_heightximg_width pixels
    resized = cv2.resize(frame, (img_height, img_width))
  
    return resized

In [5]:
# Function to capture and save frames from video, with start/stop functionality
def capture_and_save_frames_manual(folder_name):
    cap = cv2.VideoCapture(0)
    recording = False
    frame_count = 0
    
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return

    print("Press 'r' to start recording, 's' to stop, and 'q' to quit.")
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame.")
            break

        # Display the live feed
        cv2.imshow('Camera', frame)
        
        key = cv2.waitKey(1) & 0xFF
        
        if key == ord('r'):  # Start recording
            print("Recording started...")
            recording = True
        
        elif key == ord('s'):  # Stop recording
            print("Recording stopped.")
            recording = False
        
        elif key == ord('q'):  # Quit the program
            print("Quitting...")
            break

        # If recording, save the frames to the folder
        if recording:
            (corners, ids, rejected) = detector.detectMarkers(frame)

            m1_n=m2_n=m3_n=0
            m1_xmin=m1_ymin=m2_xmin=m2_ymin=m3_xmin=m3_ymin=10000
            m1_xmax=m1_ymax=m2_xmax=m2_ymax=m3_xmax=m3_ymax=0

        	# verify *at least* one ArUco marker was detected
            if len(corners) > 0:
        		# flatten the ArUco IDs list
                ids = ids.flatten()
        		# loop over the detected ArUCo corners
                for (markerCorner, markerID) in zip(corners, ids):
        			# extract the marker corners (which are always returned
        			# in top-left, top-right, bottom-right, and bottom-left
        			# order)
                    corners = markerCorner.reshape((4, 2))
                    (topLeft, topRight, bottomRight, bottomLeft) = corners
        			# convert each of the (x, y)-coordinate pairs to integers
                    topRight = (int(topRight[0]), int(topRight[1]))
                    bottomRight = (int(bottomRight[0]), int(bottomRight[1]))
                    bottomLeft = (int(bottomLeft[0]), int(bottomLeft[1]))
                    topLeft = (int(topLeft[0]), int(topLeft[1]))

        			# NEW
                    if markerID == 1:
                        if bottomLeft[0]<m1_xmin:
                           m1_xmin=bottomLeft[0]
                        if bottomLeft[0]>m1_xmax:
                           m1_xmax=bottomLeft[0]
                        if bottomLeft[1]<m1_ymin:
                           m1_ymin=bottomLeft[1]
                        if bottomLeft[1]>m1_ymax:
                           m1_ymax=bottomLeft[1]
                        m1_n=m1_n+1

                    if markerID == 2:
                        if bottomLeft[0]<m2_xmin:
                           m2_xmin=bottomLeft[0]
                        if bottomLeft[0]>m2_xmax:
                           m2_xmax=bottomLeft[0]
                        if bottomLeft[1]<m2_ymin:
                           m2_ymin=bottomLeft[1]
                        if bottomLeft[1]>m2_ymax:
                           m2_ymax=bottomLeft[1]
                        m2_n=m2_n+1

                    if markerID == 3:
                        if bottomLeft[0]<m3_xmin:
                           m3_xmin=bottomLeft[0]
                        if bottomLeft[0]>m3_xmax:
                           m3_xmax=bottomLeft[0]
                        if bottomLeft[1]<m3_ymin:
                           m3_ymin=bottomLeft[1]
                        if bottomLeft[1]>m3_ymax:
                           m3_ymax=bottomLeft[1]
                        m3_n=m3_n+1

                    rows,cols,ch = frame.shape

                    if m1_n==4:
                        dst=frame[m1_ymin:m1_ymax,m1_xmin:m1_xmax]
        				# cv2.imshow("unités",dst)
                        frame_path = os.path.join(main_folder, folder_name, f"frame_{frame_count}.png")
                        cv2.imwrite(frame_path, resize_frame(dst))
                        frame_count += 1

                    if m2_n==4:
                        dst=frame[m2_ymin:m2_ymax,m2_xmin:m2_xmax]
        				# cv2.imshow("dizaines",dst)
                        frame_path = os.path.join(main_folder, folder_name, f"frame_{frame_count}.png")
                        cv2.imwrite(frame_path, resize_frame(dst))
                        frame_count += 1

                    if m3_n==4:
                        dst=frame[m3_ymin:m3_ymax,m3_xmin:m3_xmax]
        				# cv2.imshow("centaines",dst)
                        frame_path = os.path.join(main_folder, folder_name, f"frame_{frame_count}.png")
                        cv2.imwrite(frame_path, resize_frame(dst))
                        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()

In [9]:
capture_and_save_frames_manual('1')

Press 'r' to start recording, 's' to stop, and 'q' to quit.
Recording started...
Recording stopped.
Quitting...


In [10]:
capture_and_save_frames_manual('2')

Press 'r' to start recording, 's' to stop, and 'q' to quit.
Recording started...
Recording started...
Recording stopped.
Recording stopped.
Recording stopped.
Recording stopped.
Quitting...


In [11]:
capture_and_save_frames_manual('3')

Press 'r' to start recording, 's' to stop, and 'q' to quit.
Recording started...
Recording started...
Recording stopped.
Recording stopped.
Recording stopped.
Quitting...


In [12]:
capture_and_save_frames_manual('4')

Press 'r' to start recording, 's' to stop, and 'q' to quit.
Recording started...
Recording stopped.
Recording stopped.
Recording stopped.
Recording stopped.
Quitting...


In [14]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
import datetime

2025-02-04 19:25:19.140501: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-04 19:25:19.149235: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738693519.160286   91390 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738693519.163645   91390 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-04 19:25:19.175264: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [15]:
# Load pre-trained EfficientNetB0 without the top layers
#base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Load pre-trained MobileNetV2 without the top layers
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers for digit classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)  # Add dropout to prevent overfitting
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(4, activation='softmax')(x)  # For digits 0-9

I0000 00:00:1738693526.247160   91390 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1162 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [16]:
# Build the final model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [17]:
path_to_dataset = 'mydigits_dataset'

# Prepare the data
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
    path_to_dataset,  # Folder with subfolders 0-9
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    path_to_dataset,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

Found 505 images belonging to 4 classes.
Found 125 images belonging to 4 classes.


In [18]:
epochs = 10
# Train the model
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=epochs  # Adjust based on your dataset size
)

# Save the model
model.save('imagenet_digit_classifier.keras')

Epoch 1/10


  self._warn_if_super_not_called()
I0000 00:00:1738693569.061488   97376 service.cc:148] XLA service 0x7c35800026d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1738693569.061506   97376 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-02-04 19:26:09.120690: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1738693569.494043   97376 cuda_dnn.cc:529] Loaded cuDNN version 90600
E0000 00:00:1738693571.641352   97376 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1738693571.753114   97376 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight 

[1m 1/16[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:11[0m 9s/step - accuracy: 0.1875 - loss: 1.6990

I0000 00:00:1738693575.258835   97376 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
E0000 00:00:1738693578.885224   97376 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1738693578.975382   97376 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384ms/step - accuracy: 0.6040 - loss: 0.9223







E0000 00:00:1738693586.116822   97376 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1738693586.226040   97376 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1738693587.986274   97376 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1738693588.076315   97376 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 940ms/step - accuracy: 0.6159 - loss: 0.8972 - val_accuracy: 1.0000 - val_loss: 0.0334
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step - accuracy: 0.9675 - loss: 0.0878 - val_accuracy: 0.9920 - val_loss: 0.0223
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step - accuracy: 0.9889 - loss: 0.0348 - val_accuracy: 1.0000 - val_loss: 0.0115
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 61ms/step - accuracy: 1.0000 - loss: 0.0176 - val_accuracy: 1.0000 - val_loss: 0.0052
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - accuracy: 0.9995 - loss: 0.0188 - val_accuracy: 1.0000 - val_loss: 0.0031
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - accuracy: 0.9960 - loss: 0.0138 - val_accuracy: 1.0000 - val_loss: 0.0058
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━

In [21]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

img_height, img_width = 224,224

# Load your trained model
model = load_model("imagenet_digit_classifier.keras")

# Class labels corresponding to digits 1, 2, 3, and 4
class_labels = [1, 2, 3, 4]

# Function to preprocess the image before feeding into the model
def preprocess_frame(frame):
    # Convert the frame to grayscale
    #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Resize the frame to img_heightximg_width pixels
    resized = cv2.resize(frame, (img_height, img_width))
    
    # Normalize the image (as your training data would have been normalized)
    normalized = resized / 255.0
    
    # Reshape to match the input shape of your model (batch_size, height, width, channels)
    reshaped = normalized.reshape(1, img_height, img_width, 3)
    
    return reshaped


# Initialize camera capture
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame from the camera
    ret, frame = cap.read()
    
    if not ret:
        print("Failed to capture frame")
        break

    (corners, ids, rejected) = detector.detectMarkers(frame)
    
    m1_n=m2_n=m3_n=0
    m1_xmin=m1_ymin=m2_xmin=m2_ymin=m3_xmin=m3_ymin=10000
    m1_xmax=m1_ymax=m2_xmax=m2_ymax=m3_xmax=m3_ymax=0

    # verify *at least* one ArUco marker was detected
    if len(corners) >= 4:
        # flatten the ArUco IDs list
        ids = ids.flatten()
        # loop over the detected ArUCo corners
        for (markerCorner, markerID) in zip(corners, ids):
            # extract the marker corners (which are always returned
            # in top-left, top-right, bottom-right, and bottom-left
            # order)
            corners = markerCorner.reshape((4, 2))
            (topLeft, topRight, bottomRight, bottomLeft) = corners
            # convert each of the (x, y)-coordinate pairs to integers
            topRight = (int(topRight[0]), int(topRight[1]))
            bottomRight = (int(bottomRight[0]), int(bottomRight[1]))
            bottomLeft = (int(bottomLeft[0]), int(bottomLeft[1]))
            topLeft = (int(topLeft[0]), int(topLeft[1]))

            # NEW
            if markerID == 1:
                if bottomLeft[0]<m1_xmin:
                    m1_xmin=bottomLeft[0]
                if bottomLeft[0]>m1_xmax:
                    m1_xmax=bottomLeft[0]
                if bottomLeft[1]<m1_ymin:
                    m1_ymin=bottomLeft[1]
                if bottomLeft[1]>m1_ymax:
                    m1_ymax=bottomLeft[1]
                m1_n=m1_n+1

            if markerID == 2:
                if bottomLeft[0]<m2_xmin:
                    m2_xmin=bottomLeft[0]
                if bottomLeft[0]>m2_xmax:
                    m2_xmax=bottomLeft[0]
                if bottomLeft[1]<m2_ymin:
                    m2_ymin=bottomLeft[1]
                if bottomLeft[1]>m2_ymax:
                    m2_ymax=bottomLeft[1]
                m2_n=m2_n+1

            if markerID == 3:
                if bottomLeft[0]<m3_xmin:
                    m3_xmin=bottomLeft[0]
                if bottomLeft[0]>m3_xmax:
                    m3_xmax=bottomLeft[0]
                if bottomLeft[1]<m3_ymin:
                    m3_ymin=bottomLeft[1]
                if bottomLeft[1]>m3_ymax:
                    m3_ymax=bottomLeft[1]
                m3_n=m3_n+1

            
            if m1_n==4:
                dst=frame[m1_ymin:m1_ymax,m1_xmin:m1_xmax]
                # Preprocess the frame for the model
                processed_frame = preprocess_frame(dst)
                # cv2.imshow("unités",dst)

            if m2_n==4:
                dst=frame[m2_ymin:m2_ymax,m2_xmin:m2_xmax]
                # Preprocess the frame for the model                
                processed_frame = preprocess_frame(dst)
                # cv2.imshow("dizaines",dst)

            if m3_n==4:
                dst=frame[m3_ymin:m3_ymax,m3_xmin:m3_xmax]
                # Preprocess the frame for the model                
                processed_frame = preprocess_frame(dst)
                # cv2.imshow("centaines",dst)

            if m1_n==4 or m2_n==4 or m3_n==4:
                # Perform inference
                predictions = model.predict(processed_frame, verbose=0)
    
                # Get the probabilities for digits 1, 2, 3, 4
                probabilities = predictions[0]  # Assuming your model outputs probabilities for 4 classes

                # Find the index of the highest probability
                max_index = np.argmax(probabilities)
    
                # Display the probabilities on the frame
                for i, prob in enumerate(probabilities):
                    text = f"Digit {class_labels[i]}: {prob:.2f}"
        
                    # Set text color: green for the highest probability, blue for others
                    if i == max_index:
                        color = (0, 255, 0)  # Green
                    else:
                        color = (255, 0, 0)  # Blue
            
                    cv2.putText(frame, text, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

    # Show the frame with probabilities
    cv2.imshow('Digit Prediction', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything is done, release the capture and close windows
cap.release()
cv2.destroyAllWindows()