In [None]:
import glob, pandas as pd
records = []
for txt in glob.glob("data/valid/labels/*.txt"):
    fname = os.path.basename(txt).replace('.txt', '.jpg')
    with open(txt) as f:
        for line in f:
            cls, xc, yc, w, h = line.strip().split()
            records.append({'filename': fname, 'class': int(cls),
                            'xc':float(xc), 'yc':float(yc),
                            'width':float(w), 'height':float(h)})
df = pd.DataFrame(records)
print("Label counts:", df['class'].value_counts())

#%% EDA plots
import matplotlib.pyplot as plt
plt.hist(df['width'], bins=20); plt.title("Normalized bbox width distribution")
plt.show()

#%% Visual overlay example
import cv2
import random
row = df.sample(1).iloc[0]
img = cv2.imread(f"data/valid/images/{row.filename}")
h, w = img.shape[:2]
x1 = int((row.xc - row.width/2)*w)
y1 = int((row.yc - row.height/2)*h)
x2 = int((row.xc + row.width/2)*w)
y2 = int((row.yc + row.height/2)*h)
cv2.rectangle(img, (x1,y1),(x2,y2),(0,255,0),2)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)); plt.axis('off'); plt.show()

#%% YOLOv5 training via Ultralytics
!pip install ultralytics
from ultralytics import YOLO
model = YOLO('yolov5s.pt')
model.train(data={'train':'data/valid/images','val':'data/valid/images'},
            imgsz=640, epochs=10, batch=8)

#%% Evaluation
metrics = model.val()
print(metrics.box.map50, metrics.box.precision, metrics.box.recall)
