## Tratamiento dataset para hacerlo en formato YOLO

In [None]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('.'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import matplotlib.pyplot as plt
import torch

print(f'{torch.cuda.is_available() = }')
print(f'{torch.cuda.device_count() = }')


dataset_path = './plates'

import re

def the_number_in_the_string(filename):
    match=re.search(('\d+'),filename)
    if match:
        return int(match.group(0))
    else:
        return 0
print(the_number_in_the_string("file123.txt"))
print(the_number_in_the_string("no_numbers"))

labels_dict = dict(
    img_path=[],
    xmin=[],
    xmax=[],
    ymin=[],
    ymax=[],
    img_w=[],
    img_h=[]
)

from glob import glob

#get XML files from the annotations directory
xml_files = glob(f'{dataset_path}/annotations/*.xml')
xml_files

import xml.etree.ElementTree as xet

import cv2

for filename in sorted(xml_files, key=the_number_in_the_string):

    info = xet.parse(filename)
    root = info.getroot()

    member_object = root.find('object')
    labels_info = member_object.find('bndbox')
    xmin = int(labels_info.find('xmin').text)
    xmax = int(labels_info.find('xmax').text)
    ymin = int(labels_info.find('ymin').text)
    ymax = int(labels_info.find('ymax').text)

    img_name = root.find('filename').text
    img_path = os.path.join(dataset_path, 'images', img_name)

    labels_dict['img_path'].append(img_path)
    labels_dict['xmin'].append(xmin)
    labels_dict['xmax'].append(xmax)
    labels_dict['ymin'].append(ymin)
    labels_dict['ymax'].append(ymax)

    height, width, _ = cv2.imread(img_path).shape
    labels_dict['img_w'].append(width)
    labels_dict['img_h'].append(height)

    alldata = pd.DataFrame(labels_dict)


from sklearn.model_selection import train_test_split


train, test = train_test_split(alldata, test_size=1/10, random_state=42)
train, val = train_test_split(train, train_size=8/9, random_state=42)

print(f'''
      len(train) = {len(train)}
      len(val) = {len(val)}
      len(test) = {len(test)}
''')

import shutil

if os.path.exists('datasets'):
    shutil.rmtree('datasets')

def make_split_folder_in_yolo_format(split_name, split_df):
    labels_path = os.path.join('datasets', 'cars_license_plate_new', split_name, 'labels')
    images_path = os.path.join('datasets', 'cars_license_plate_new', split_name, 'images')


    os.makedirs(labels_path)
    os.makedirs(images_path)


    for _, row in split_df.iterrows():
        img_name, img_extension = os.path.splitext(os.path.basename(row['img_path']))


        x_center = (row['xmin'] + row['xmax']) / 2 / row['img_w']
        y_center = (row['ymin'] + row['ymax']) / 2 / row['img_h']
        width = (row['xmax'] - row['xmin']) / row['img_w']
        height = (row['ymax'] - row['ymin']) / row['img_h']

        label_path = os.path.join(labels_path, f'{img_name}.txt')
        with open(label_path, 'w') as file:
            file.write(f"0 {x_center:.4f} {y_center:.4f} {width:.4f} {height:.4f}\n")


        shutil.copy(row['img_path'], os.path.join(images_path, img_name + img_extension))

    print(f"Created '{images_path}' and '{labels_path}'")

make_split_folder_in_yolo_format("train", train)
make_split_folder_in_yolo_format("val", val)
make_split_folder_in_yolo_format("test", test)

image_dir = 'datasets/cars_license_plate_new/train/images'
label_dir = 'datasets/cars_license_plate_new/train/labels'

image_files = sorted(os.listdir(image_dir))
fifth_image_file = image_files[10]
image_path = os.path.join(image_dir, fifth_image_file)
label_path = os.path.join(label_dir, os.path.splitext(fifth_image_file)[0] + '.txt')

image = cv2.imread(image_path)

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)


with open(label_path, 'r') as f:
    lines = f.readlines()


for line in lines:

    class_id, x_center, y_center, width, height = map(float, line.strip().split())
    img_height, img_width, _ = image.shape

    x_center *= img_width
    y_center *= img_height
    width *= img_width
    height *= img_height


    x1 = int(x_center - width / 2)
    y1 = int(y_center - height / 2)
    x2 = int(x_center + width / 2)
    y2 = int(y_center + height / 2)

    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)


plt.imshow(image)
plt.axis('off')
plt.show()



# Practica 4

In [3]:
import cv2  
import math 
from matplotlib import pyplot as plt
from ultralytics import YOLO
from collections import defaultdict
import numpy as np
import easyocr
import csv


output_video_path = "detection_result.mp4"



lineaSalida = ""

exampleOut = [0, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ""]
outs = []

# Nombre del archivo
nombre_archivo = 'out_matriculas.csv'


def detectOCR(img):
    #Carga del modelo de lengua
    reader = easyocr.Reader(['es']) 
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 

    imagen_escalada = cv2.resize(gray_img, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)

    #Reconocimiento de una imagen
    res = reader.readtext(imagen_escalada)
    bestT = ""
    bestP = 0
    for (bbox, text, prob) in res:
        # Coordenadas en orden 
        (top_left, top_right, bottom_right, bottom_left) = bbox
        if prob > bestP:
            bestT = text
            bestP = prob

    return bestT 

model = YOLO('yolo11n.pt') #Contenedores

modeloMatriculas = YOLO('best.pt') #Matriculas

# Etiqueta de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]


# Captura desde la webcam
filename = "C0142.MP4"
#filename = "Prueba.mp4"
vid = cv2.VideoCapture(filename)
track_history = defaultdict(lambda: [])
foto = 0  
numCoches = 0
regCoches = []
numPersonas = 0
regPersonas = []

# Configuración del VideoWriter para volcar el vídeo procesado a disco
frame_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = vid.get(cv2.CAP_PROP_FPS)

# Inicializa el VideoWriter con el codec y la configuración deseada
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

with open(nombre_archivo, mode='a', newline='') as archivo:
    escritor = csv.writer(archivo)

    while(True):      
        # fotograma a fotograma
        ret, img = vid.read()
    
        # si hay imagen válida
        if ret:
            
            # Seguimiento, con persistencia entre fotogramas
            results = model.track(img, persist=True, classes = [0,2],  verbose=False )
            
            # Para cada detección
            for r in results:
                boxes = r.boxes

                outLine = exampleOut.copy()

                outLine[0] = foto

                for box in boxes:
                    # Contenedor
                    x1, y1, x2, y2 = box.xyxy[0]
                    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

                    

                    #Etiqueta de seguimiento
                    if box.id is not None:
                        track_id = str(int(box.id[0].tolist()))
                    else:
                        track_id = ''
                    
                    # Confianza
                    confidence = math.ceil((box.conf[0]*100))/100
                    #print("Confianza --->",confidence)

                    # Clase
                    cls = int(box.cls[0])

                    if cls == 0 and track_id not in regPersonas:
                        numPersonas += 1
                        regPersonas.append(track_id)

                    if cls == 2 and track_id not in regCoches:
                        numCoches += 1
                        regCoches.append(track_id)    

                    if x2 - x1 < 200 and cls == 2:
                        continue
                    #print("Clase -->", classNames[cls])

                    # Convierte identificador numérico de clase a un color RGB
                    escala = int((cls / len(classNames)) * 255 * 3)
                    if escala >= 255*2:
                        R = 255
                        G = 255
                        B = escala - 255*2
                    else:
                        if escala >= 255:
                            R = 255
                            G = escala - 255
                            B = 0
                        else:
                            R = escala
                            G = 0
                            B = 0

                    area = img[y1:y2, x1:x2]

                    # Dibuja el contenedor y clase
                    cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
                    cv2.putText(img, track_id + ' ' + classNames[cls] , [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

                    outLine[1] = classNames[cls]
                    outLine[2] = confidence
                    outLine[3] = track_id
                    outLine[4] = x1
                    outLine[5] = y1
                    outLine[6] = x2
                    outLine[7] = y2

                    if cls == 2:
                        outLine[8] = 1 
                    else:
                        outLine[8] = 0
                        escritor.writerow(outLine)
                        continue  

                    resultsMatriculas = modeloMatriculas(area,  verbose=False)

                    ocr_det = False

                    for re in resultsMatriculas:
                        detecciones = re.boxes

                        for boxe in detecciones:
                        # Contenedor
                            confidenceM = math.ceil((boxe.conf[0]*100))/100
                            if confidenceM < 0.5:
                                continue
                            X1, Y1, X2, Y2 = map(int, boxe.xyxy[0])
                            
                            outLine[9] = confidenceM
                            outLine[10] = X1
                            outLine[11] = Y1
                            outLine[12] = X2
                            outLine[13] = Y2

                            if ocr_det == False:

                                areaOCR = area[Y1:Y2, X1:X2]
                                resOCR = detectOCR(areaOCR)

                                outLine[14] = resOCR
                                escritor.writerow(outLine)
                                ocr_det = True

                            cv2.rectangle(img, (x1+X1, y1+Y1), (x1+X2, y1+Y2), (R, G, B), 3)    

            foto += 1  
            # Muestra fotograma
            cv2.imshow('Vid', img)
            out.write(img)
        else:
            break    
        
        # Detenemos pulsado ESC
        if cv2.waitKey(20) == 27:
            break

    escritor.writerow(["Numero Personas",numPersonas])
    escritor.writerow(["Numero Coches",numCoches])    

out.release()
# Libera el objeto de captura
vid.release()
# Destruye ventanas
cv2.destroyAllWindows()



OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'
