# Smart Farm

## Init

### Imports

In [1]:
import os
import shutil
import sys
from random import sample
from traceback import print_tb
import time
import math

### Global

In [2]:
DATASET_SIZE = 200
TRAIN_PROP = 0.6
VAL_PROP = 0.2
TEST_PROP = 0.2
dir='.'

## Auxiliar Functions

In [3]:
def get_split(train_size=0.6, val_size=0.2, test_size=0.2):
    dataset = list(range(DATASET_SIZE))
    test_files = [int(file.split('.')[0]) for file in os.listdir(os.path.join(dir, "dataset/official/test")) if file != "classes.txt"]

    train_val_size = int((train_size + val_size) * len(dataset))
    val_size = int(val_size * len(dataset))
    dataset = [num for num in dataset if num not in test_files]
    
    
    
    train_val = sample(dataset, train_val_size)
    val = sample(train_val, val_size)
    train = [num for num in train_val if num not in val]
    
    return train, val

def rm_from_dir():
    base_path = os.path.join(dir, 'dataset/run')
    for split in ['train', 'test', 'val']:
        split_path = os.path.join(base_path, split)
        for file in os.listdir(split_path):
            file_path = os.path.join(split_path, file)
            if os.path.isfile(file_path):
                os.remove(file_path)

def get_avg_conf(file_path, mode='confidence'):
    try:
        with open(file_path, "r") as file:
            lines = file.readlines()
            if mode == 'count':
                return len(lines)
            total_conf = sum(float(line.strip().split()[5]) for line in lines)
            return total_conf / len(lines) if lines else 0
    except Exception:
        return 0 if mode in ['confidence', 'count'] else 1
    
    
def avg_conf(mode='RANDOM', AL_mode = 'confidence', project='batch', name='name_batch'):
    train_dir = os.path.join(dir, 'dataset/run/train')
    val_dir = os.path.join(dir, "dataset/run/val")
    train = [file for file in os.listdir(train_dir) if file.endswith('.jpg')]
    val = [file for file in os.listdir(val_dir) if file.endswith('.jpg')]
    
    train_val_images_dir = os.path.join(dir, 'dataset/train_val_images')
    to_eval = [file[:-4] for file in os.listdir(train_val_images_dir) if file not in train and file not in val]
    
    if mode not in ['AL', 'APPROACH']:
        return to_eval
    
    path = os.path.join(dir, project, name, 'labels')
    evals = [(eval_, get_avg_conf(os.path.join(path, f'{eval_}.txt'), mode=mode)) for eval_ in to_eval]
    evals.sort(key=lambda x: x[1])
    
    return [file[0] for file in evals]


def new_batch(weights="''", project="batch", name="name_batch", mode="RANDOM", train_size=0.05, val_size=0.05, split_random=True, dataset="improved"):
    if mode in ["AL", "APPROACH"]:
        detect(weights=weights, project=project, name=name, source="dataset/train_val_images/")
    
    batch = avg_conf(mode=mode, project=project, name=name)
    
    if mode not in ["AL", "APPROACH"]:
        batch = sample(batch, int(DATASET_SIZE * (train_size + val_size)))
        train = batch[:int(DATASET_SIZE * train_size)]
        val = batch[int(DATASET_SIZE * train_size):int(DATASET_SIZE * train_size) + int(DATASET_SIZE * val_size)]
    else:
        batch = batch[:int(DATASET_SIZE * (train_size + val_size))]
        if split_random:
            train = sample(batch, int(DATASET_SIZE * train_size))
            val = [im for im in batch if im not in train]
        else:
            train = batch[:int(DATASET_SIZE * train_size)]
            val = batch[int(DATASET_SIZE * train_size):int(DATASET_SIZE * train_size) + int(DATASET_SIZE * val_size)]
    
    train_dst_dir = os.path.join(dir, "dataset/run/train")
    val_dst_dir = os.path.join(dir, "dataset/run/val")
    official_dir = os.path.join(dir, "dataset/official", dataset)
    
    for file in train:
        for file_type in ['.txt', '.jpg']:
            src = os.path.join(official_dir, file + file_type)
            dst = os.path.join(train_dst_dir, file + file_type)
            shutil.copy(src, dst)
    
    for file in val:
        for file_type in ['.txt', '.jpg']:
            src = os.path.join(official_dir, file + file_type)
            dst = os.path.join(val_dst_dir, file + file_type)
            shutil.copy(src, dst)


def split_dataset(dataset="improved", train_split=[], val_split=[], test_split=[]):
    rm_from_dir()
    
    def copy_files(file_list, split):
        split_dir = os.path.join(dir, 'dataset/run', split)
        for file in file_list:
            for file_type in ['.jpg', '.txt']:
                src = os.path.join(dir, 'dataset', dataset if split != 'test' else 'improved', f'{file}{file_type}')
                dst = os.path.join(split_dir, f'{file}{file_type}')
                shutil.copy(src, dst)
    
    copy_files(train_split, 'train')
    copy_files(val_split, 'val')
    copy_files(test_split, 'test')

## Object Detection

In [4]:
# #pull YOLOv5
#!git clone https://github.com/ultralytics/yolov5
#%cd $dir/yolov5
#!git pull
#!pip install -qr 'requirements.txt'  # install dependencies
#%cd $dir

In [5]:
#> /dev/null 2>&1
def train(img=1280, epochs=50, data='data.yaml', name='new_train', project='train', weights="''"):
    !python {dir}/yolov5/train.py \
    --cache ram --batch -1 --epochs {epochs} --img {img} \
    --data {data} \
    --name {name} \
    --project {project}/train \
    --cfg yolov5s.yaml --weights {weights} \
    --device 0 > /dev/null 2>&1

def detect(img=1280, source='imgs/', name='new_detect', project='detect', weights='best.pt'):
    !python {dir}/yolov5/detect.py \
    --save-txt --save-conf \
    --source {dir}/{source} \
    --weights {weights} \
    --img {img} \
    --name {name} \
    --project {project}/detect \
    --device 0 > /dev/null 2>&1

def test(img=1280, data='data.yaml', name='new_test', project='test', weights="''"):
    !python {dir}/yolov5/val.py \
    --data {dir}/{data} \
    --weights {weights} \
    --img {img} --task test \
    --name {name} \
    --project {project}/test \
    --device 0 --exist-ok > /dev/null 2>&1


## Execute

In [None]:
from tqdm import tqdm

def run_all():
    train_proportion = 0.6
    val_proportion = 0.2
    num_splits = 8
    train_split_proportion = round(train_proportion / num_splits, 3)
    val_split_proportion = 0#round(val_proportion / num_splits, 3)
    num_runs = 30
    split_type = True
    epochs = 500

    # Get initial train and validation splits
    train_split, val_split = get_split(train_size=train_split_proportion, val_size=0.2)
    experiment_name = '250eppochs'
    project_name = f'train_output/{experiment_name}'
    
    # Loop through each run
    for run in tqdm(range(num_runs), desc="Runs", leave=True, position=0):
        project = f'{project_name}_{run}'
        
        # Loop through each mode
        for mode in tqdm(["TL", "APPROACH", "RANDOM", "AL", "MA"], desc="Modes", leave=True, position=1):
            name = f"{mode}_split0"
            dataset = "original" if mode in ["RANDOM", "AL", "TL"] else "improved"
            
            # Split dataset
            split_dataset(dataset=dataset, train_split=train_split, val_split=val_split)
            
            # Determine weights
            weights = "''" if mode in ["RANDOM", "AL", "MA"] else f"{dir}/bruno.pt"
            
            # Train initial model
            start = time.time()
            train(project=project, name=name, epochs=epochs, weights=weights)
            end = time.time() - start
            
            # Calculate number of images
            images = math.floor(DATASET_SIZE * (train_split_proportion + val_split_proportion))
            
            # Log results
            with open(f"{project}.txt", 'a') as file:
                file.write(f"{run}\t{mode}\t{images}\t{end}\t")
            
            # Test initial model
            test(name=project, weights=f"{dir}/{project}/train/{name}/weights/best.pt")
            
            # Loop through each split
            for num in tqdm(range(1, num_splits), desc="Splits", leave=False, position=2):
                images += math.floor(DATASET_SIZE * (train_split_proportion + val_split_proportion))
                
                # Create new batch
                new_batch(weights=f"{dir}/{project}/train/{name}/weights/best.pt", project=f"{project}/batch", name=name, mode=mode, split_random=split_type, train_size=train_split_proportion, val_size=val_split_proportion, dataset=dataset)
                
                name = f"{mode}_split{num+1}"
                
                # Train model on new batch
                start = time.time()
                train(project=project, name=name, epochs=epochs, weights=weights)
                end = time.time() - start
                
                # Log results
                with open(f"{project}.txt", 'a') as file:
                    file.write(f"{run}\t{mode}\t{images}\t{end}\t")
                
                # Test model on new batch
                test(name=project, weights=f"{dir}/{project}/train/{name}/weights/best.pt")


run_all()

Runs:   0%|                                              | 0/30 [00:00<?, ?it/s]
Modes:   0%|                                              | 0/5 [00:00<?, ?it/s][A

Splits:   0%|                                             | 0/7 [00:00<?, ?it/s][A[A

Splits:  14%|█████▏                              | 1/7 [08:13<49:20, 493.39s/it][A[A

Splits:  29%|██████████▎                         | 2/7 [17:52<45:17, 543.59s/it][A[A

Splits:  43%|███████████████▍                    | 3/7 [29:07<40:15, 603.83s/it][A[A

Splits:  57%|████████████████████▌               | 4/7 [41:07<32:29, 649.74s/it][A[A

Splits:  71%|█████████████████████████▋          | 5/7 [53:29<22:45, 682.79s/it][A[A

Splits:  86%|█████████████████████████████▏    | 6/7 [1:08:12<12:31, 751.06s/it][A[A

Splits: 100%|██████████████████████████████████| 7/7 [1:24:37<00:00, 827.53s/it][A[A

                                                                                [A[A
Modes:  20%|██████▍                        

In [2]:
from ultralytics import yolo


# Path to the YOLOv5 model weights
model_path = "/home/dinis/data-efficiency/RECPAD_250epochs__0/train/ALL_split16/weights/best.pt"

# Initialize the YOLOv5 model
model = YOLO(model_path)

# Path to the image for classification
image_path = "/home/dinis/data-efficiency/dataset/only_images/165.jpg"

# Perform inference on the image
results = model.predict(image_path)

# Display the output image with predictions
results.show()

ModuleNotFoundError: No module named 'ultralytics'

In [11]:
#!pip install ultralytics
!python yolov5/detect.py \
  --weights /home/dinis/data-efficiency/RECPAD_250epochs__0/train/ALL_split16/weights/best.pt \
  --source /home/dinis/data-efficiency/VID_20221007_152743.mp4 \
  --img 1280 \
  --conf 0.25 \
  --save-txt \
  --save-conf

[34m[1mdetect: [0mweights=['/home/dinis/data-efficiency/RECPAD_250epochs__0/train/ALL_split16/weights/best.pt'], source=/home/dinis/data-efficiency/VID_20221007_152743.mp4, data=yolov5/data/coco128.yaml, imgsz=[1280, 1280], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_format=0, save_csv=False, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=yolov5/runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-371-g6629839d Python-3.10.12 torch-2.7.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4090, 24215MiB)

Fusing layers... 
YOLOv5s summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
video 1/1 (1/388) /home/dinis/data-efficiency/VID_20221007_152743.mp4: 736x1280 13 WFs, 17.3ms
video 1/1 (2/388) /home/dinis/data-efficiency/VID_20221007_152743.mp4: 736x1280 12 WFs, 2.3m

In [1]:
from utils import train_yolov12
from utils import get_split, split_dataset

train,val = get_split(train_size=0.6, val_size=0.2, test_size=0.2)
split_dataset(train_split=train, val_split=val, test_split=[], dataset="improved")

train_yolov12()

Ultralytics 8.3.159 🚀 Python-3.11.5 torch-2.7.1 CPU (Apple M3 Pro)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=2, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=dataset/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=128, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo12n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=new_train3, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0, pretrained=True, prof

[34m[1mtrain: [0mScanning /Users/ddcosta/Desktop/IbPRIA2015---Data-Efficient-Strategies-for-Object-Detection/dataset/run/train... 120 images, 0 backgrounds, 0 corrupt: 100%|██████████| 120/120 [00:00<00:00, 4029.66it/s]

[34m[1mtrain: [0mNew cache created: /Users/ddcosta/Desktop/IbPRIA2015---Data-Efficient-Strategies-for-Object-Detection/dataset/run/train.cache
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2017.2±333.9 MB/s, size: 879.3 KB)



[34m[1mval: [0mScanning /Users/ddcosta/Desktop/IbPRIA2015---Data-Efficient-Strategies-for-Object-Detection/dataset/run/val... 40 images, 0 backgrounds, 0 corrupt: 100%|██████████| 40/40 [00:00<00:00, 4358.62it/s]

[34m[1mval: [0mNew cache created: /Users/ddcosta/Desktop/IbPRIA2015---Data-Efficient-Strategies-for-Object-Detection/dataset/run/val.cache





Plotting labels to train/new_train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 113 weight(decay=0.0), 120 weight(decay=0.0005), 119 bias(decay=0.0)
Image sizes 128 train, 128 val
Using 0 dataloader workers
Logging results to [1mtrain/new_train3[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G    0.03777     0.6068   0.007247          2        128: 100%|██████████| 60/60 [00:13<00:00,  4.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:02<00:00,  3.40it/s]

                   all         40       3061          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50         0G    0.04973     0.5703   0.009716         35        128: 100%|██████████| 60/60 [00:14<00:00,  4.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  7.07it/s]

                   all         40       3061          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50         0G     0.1079     0.6052    0.02101          3        128: 100%|██████████| 60/60 [00:13<00:00,  4.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:01<00:00,  7.03it/s]

                   all         40       3061          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50         0G     0.2193     0.6386     0.0354          9        128:  70%|███████   | 42/60 [00:09<00:04,  4.35it/s]


KeyboardInterrupt: 