# Tests on YoloV8 for fAIr
Using model V2 from Omdena results.
Mocking `test_yolo_v2.py`

## Data import and variables definition

In [29]:
# Standard library imports
import os
import time
import warnings
import ultralytics
import yaml
import csv
import pandas as pd

os.environ.update(os.environ)
os.environ["RAMP_HOME"] = os.getcwd()

In [12]:
# Reader imports
from hot_fair_utilities import polygonize, predict, preprocess
from hot_fair_utilities.preprocessing.yolo_v8_v2.yolo_format_anna import yolo_format
from hot_fair_utilities.training.yolo_v8_v2 import train as train_yolo

warnings.simplefilter(action="ignore", category=FutureWarning)


In [103]:
# Define basic variables
# base_path = f"{os.getcwd()}/ramp-data/sample_2"
base_path = '/Users/azanchetta/fAIr_metric'
data_path = f'{base_path}/training_results'
preprocessed_ramp_data_path=f'{base_path}/metric_data'
k_data_path = f'{base_path}/anna-dataset' # this has been added for dealing with Kshiitj's data
#  Obtain cities list folders name in data folder
# cities_list = ['modelfake', 'model149_td489'] # sample of names, for tests
cities_list= [ item for item in os.listdir(preprocessed_ramp_data_path) if os.path.isdir(os.path.join(preprocessed_ramp_data_path, item)) ]
datasets_list = cities_list= [ item for item in os.listdir(k_data_path) if os.path.isdir(os.path.join(k_data_path, item)) ]  # this has been added for dealing with Kshiitj's data

In [None]:
print(len(cities_list))
for i in cities_list:
    print(i)

In [14]:

class print_time:
    def __init__(self, name):
        self.name = name

    def __enter__(self):
        self.start = time.perf_counter()
        return self

    def __exit__(self, type, value, traceback):
        print(f"{self.name} took {round(time.perf_counter() - self.start, 2)} seconds")

start_time = time.time()

---

ONLY RUN THE CELL BELOW ONCE

## Generate Yolo format input files
Note: need to re-run the preprocessing, can't use the preprocessed Ramp data

We have a problem with the data, as in the only data I have is the ramp preprocessed one. I can't run the yolo pre-processing on my own, so Kshiitj sent preprocessed data (he done that in the backend).

`anna-dataset` has this structure:
```model name
    |
     - preprocessed
        |
         - binarymasks/
         - chips/
         - inputs/
         - labels/
     - yolo_v1 
        |
         - images/
         - labels/
         - yolo_dataset.yaml
```

I need to restructure this to be consistent with my folder structure... or could just decide to use it like it is :P

**IMPORTANT** I need to rerun the preprocessing anyways, because the division train/val/test must be the same as ramp

--- renaming the folder `yolo_v2_dataset` 

In [89]:
#  importing csv file, for LUT with models / training dataset / dataset codes
lut_csvfile = f'{base_path}/cities_lut.csv'
# with open(lut_csvfile) as csv_file:
#     lut = csv.reader(csv_file)

lut = pd.read_csv(lut_csvfile,
                  )

In [90]:
lut.head()

Unnamed: 0,id,id_model,id_train,ds_size,urban_region,country,continent,id_dataset,urban_type,density,roof_type
0,1,51,364,399,Kakuma,Kenya,Africa,58,refugee camp,sparse,metal
1,2,95,370,168,Denver,USA,America North,135,peri-urban,grid,shingles
2,3,97,372,420,Montevideo,Uruguay,America South,137,urban,grid,cement
3,4,98,373,399,Montevideo dense,Uruguay,America South,138,urban,dense,cement
4,5,102,391,231,Kutupalong,Bangladesh,Asia,144,refugee camp,dense,mixed


In [None]:
print(lut.dtypes)

In [None]:
print(lut)

In [91]:
lut = lut.astype(str) # convert them all to string, for later

In [None]:
#  '/Users/azanchetta/fAIr_metric/training_results/model108_td385/train/fair_split_train.csv'"
#  /Users/azanchetta/fAIr_metric/training_results/model162_td519/train/fair_split_train.csv'

In [None]:
datasets_list

In [None]:
for idx, dataset in enumerate(datasets_list):
    print(f'original name to be split {dataset}')
    dataset_name = dataset.split("_")[1]
    print(f'{idx}) dataset {dataset} and dataset_name {dataset_name}')
    model_name = lut.loc[lut['id_dataset'] == dataset_name, 'id_model'].values[0] # without the values bit, you get a whole piece of dataframe
    td_name = lut.loc[lut['id_dataset'] == dataset_name, 'id_train'].values[0]
    print(f'model {model_name}, td {td_name}, dataset {dataset_name}')

In [None]:
# cities_list = ['modelfake', 'model149_td489'] # sample of names, for tests
# datasets_list = ['dataset_205']

In [None]:
# Looping through the cities list, and check per each folder that tiles number is consistent, and the shapes too
# for city in cities_list:
for dataset in datasets_list:
    dataset_name = dataset.split("_")[1]

    model_name = lut.loc[lut['id_dataset'] == dataset_name, 'id_model'].values[0] # without the values bit, you get a whole piece of dataframe
    td_name = lut.loc[lut['id_dataset'] == dataset_name, 'id_train'].values[0]
    print(f'_________\nDataset {dataset}, model {model_name}, training dataset {td_name}\n')
    
    city = f'model{model_name}_td{td_name}'
    # city_folder_name=f'{base_path}/metric_data/{city}'
    
    csv_file_basepath = f'{data_path}/{city}/train'
    print(f'\n---\nNow working on {city}\n---')
    # print(f'city folder name is {city_folder_name}\n\n---\n')

    # model_input_image_path = f"{base_path}/input"
    # preprocess_output = f"{base_path}/preprocessed"
    # with print_time("preprocessing"):
    #     preprocess(
    #         input_path=model_input_image_path,
    #         output_path=preprocess_output,
    #         rasterize=True,
    #         rasterize_options=["binary"],
    #         georeference_images=True,
    #         multimasks=False,
    #         epsg=4326
    #     )

    # city_data_dir = f'{base_path}/training_results/{city}/train'
    # city_data_dir = f'{city_folder_name}'
    dataset_foldername = f'dataset_{dataset_name}'
    city_data_dir = f'{k_data_path}/{dataset_foldername}/preprocessed' # name as it appears in kshitij's folder
    
    yolo_data_dir = f'{base_path}/yolo_v2_preprocessed' # name for the output
    print(f'city is {city}')
    with print_time("yolo conversion"):
        print(f'\n___ Starting yolo files conversion\n')
        yolo_format(
            input_path=city_data_dir,
            csv_path=csv_file_basepath,
            output_path=yolo_data_dir,
            city_name=city
        )


---

## Training

In [111]:
# cities_list = ['modelfake', 'model149_td489'] # sample of names, for tests
cities_list = ['model149_td489'] #['modelfake'] # sample of names, for tests
# cities_list= [ item for item in os.listdir(preprocessed_data_path) if os.path.isdir(os.path.join(preprocessed_data_path, item)) ]

In [113]:
# Training
yolo_output_path = f'{base_path}/yolo_v2_training'
yolo_data_dir = f'{base_path}/yolo_v2_preprocessed'
with print_time("yolo training"):
    for city in cities_list:
        city_yolodata_path = f'{base_path}/yolo_v2_preprocessed/{city}'
        city_output_path = f'{yolo_output_path}/{city}'
        # deal with the yaml file:
        basic_yaml_file_name_with_path = '/Users/azanchetta/fAIr-utilities/ramp-data/sample_2/yolo_v2/yolo_dataset.yaml'
        yaml_file_path_for_city = f'{city_yolodata_path}/yolo_dataset.yaml'
        # print(f'name that will used to create a new yaml file for the city: {yaml_file_path_for_city}')
        info_to_write_in_yaml = city_yolodata_path

        with open(basic_yaml_file_name_with_path, 'r') as file:
            yamlfile = yaml.safe_load(file)
        yamlfile['path'] = city_yolodata_path
        with open(yaml_file_path_for_city, 'w') as file:  # Save the updated YAML file
            yaml.dump(yamlfile, file)

        # Run the training
        output_model_path,output_model_iou_accuracy = train_yolo(
            data=city_yolodata_path, #f"{base_path}",
            weights=f"{os.getcwd()}/yolov8s_v2-seg.pt", 
            # gpu="cpu",
            epochs=20,
            batch_size=8,
            pc=2.0,
            output_path=city_output_path,
            dataset_yaml_path=yaml_file_path_for_city #'/Users/azanchetta/fAIr_metric/yolo_v2_data/model51_td364/yolo_dataset.yaml'
            # dataset_yaml_path='/Users/azanchetta/fAIr-utilities/ramp-data/sample_2/yolo_v2/yolo_dataset.yaml' ## this name is just a placeholder, we overwrite the variables in the code
        )
        print(output_model_iou_accuracy)

        # output_model_path,output_model_iou_accuracy = train_yolo(
        #     data=f"{base_path}",
        #     weights=f"{os.getcwd()}/yolov8s_v2-seg.pt", 
        #     # gpu="cpu",
        #     epochs=2,
        #     batch_size=16,
        #     pc=2.0,
        #     output_path=yolo_data_dir,
        #     dataset_yaml_path=os.path.join(yolo_data_dir,'yolo_dataset.yaml')
        # )
        # print(output_model_iou_accuracy)

Backbone: s, Dataset: yolo_v2_preprocessed, Epochs: 20
New https://pypi.org/project/ultralytics/8.3.48 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.26 🚀 Python-3.12.4 torch-2.2.2 CPU (Intel Core(TM) i9-9980HK 2.40GHz)
[34m[1mengine/trainer: [0mtask=segment, mode=train, model=/Users/azanchetta/fAIr-utilities/yolov8s_v2-seg.pt, data=/Users/azanchetta/fAIr_metric/yolo_v2_preprocessed/model149_td489/yolo_dataset.yaml, epochs=20, time=None, patience=100, batch=16, imgsz=256, save=True, save_period=-1, cache=True, device=cpu, workers=8, project=/Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints, name=yolov8s-seg_yolo_v2_preprocessed_ep20_bs8_pc2.0, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=False, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=False, mask_ratio=4, dropout=0.0, val=True, spli

[34m[1mtrain: [0mScanning /Users/azanchetta/fAIr_metric/yolo_v2_preprocessed/model149_td489/labels/train.cache... 103 images, 2 backgrounds, 0 corrupt: 100%|██████████| 103/103 [00:00<?, ?it/s]
[34m[1mtrain: [0mCaching images (0.0GB RAM): 100%|██████████| 103/103 [00:00<00:00, 1401.19it/s]
[34m[1mval: [0mScanning /Users/azanchetta/fAIr_metric/yolo_v2_preprocessed/model149_td489/labels/val.cache... 22 images, 5 backgrounds, 0 corrupt: 100%|██████████| 22/22 [00:00<?, ?it/s]
[34m[1mval: [0mCaching images (0.0GB RAM): 100%|██████████| 22/22 [00:00<00:00, 1611.00it/s]


Plotting labels to /Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints/yolov8s-seg_yolo_v2_preprocessed_ep20_bs8_pc2.0/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.00854' and 'momentum=0.95275' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.00058), 76 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 256 train, 256 val
Using 0 dataloader workers
Logging results to [1m/Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints/yolov8s-seg_yolo_v2_preprocessed_ep20_bs8_pc2.0[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/20         0G      1.079      2.027      3.311      1.141         31        256: 100%|██████████| 7/7 [00:15<00:00,  2.26s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

                   all         22        110      0.745      0.718      0.773      0.634      0.722      0.745       0.78      0.595






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       2/20         0G     0.9099      1.603      2.364      1.042         17        256: 100%|██████████| 7/7 [00:15<00:00,  2.15s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

                   all         22        110      0.763      0.587       0.72       0.59      0.648      0.709      0.732      0.553






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       3/20         0G     0.8467      1.418        1.9       1.02         74        256: 100%|██████████| 7/7 [00:18<00:00,  2.64s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:02<00:00,  2.84s/it]

                   all         22        110      0.782      0.536      0.694      0.551      0.673      0.609      0.706      0.522






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       4/20         0G     0.7982      1.292      1.719     0.9857         50        256: 100%|██████████| 7/7 [00:14<00:00,  2.00s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

                   all         22        110      0.769      0.591      0.737      0.579      0.789      0.578      0.741      0.556






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       5/20         0G     0.7896      1.285      1.736     0.9957         21        256: 100%|██████████| 7/7 [00:15<00:00,  2.15s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

                   all         22        110      0.721      0.755      0.796      0.638      0.721      0.755      0.807      0.608






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       6/20         0G     0.7635      1.236      1.461     0.9566         52        256: 100%|██████████| 7/7 [00:14<00:00,  2.03s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

                   all         22        110      0.739      0.782        0.8      0.653      0.739      0.782      0.804       0.62






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       7/20         0G     0.7508      1.136       1.25     0.9462         40        256: 100%|██████████| 7/7 [00:16<00:00,  2.31s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.85s/it]

                   all         22        110      0.755      0.814      0.823      0.656      0.755      0.814      0.823      0.636






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       8/20         0G     0.7524      1.211      1.222     0.9531         89        256: 100%|██████████| 7/7 [00:14<00:00,  2.01s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

                   all         22        110      0.784       0.76      0.835       0.66      0.753      0.803      0.836      0.646






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       9/20         0G     0.7141      1.106      1.154     0.9442         78        256: 100%|██████████| 7/7 [00:14<00:00,  2.04s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.58s/it]

                   all         22        110      0.779      0.773      0.837      0.671      0.775      0.785      0.849      0.655






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      10/20         0G     0.7243      1.103      1.108     0.9359         47        256: 100%|██████████| 7/7 [00:16<00:00,  2.42s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.67s/it]

                   all         22        110        0.8      0.773      0.829      0.669        0.8      0.773      0.841      0.632





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      11/20         0G     0.6992       1.03      1.081     0.9394         33        256: 100%|██████████| 7/7 [00:20<00:00,  2.87s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.73s/it]

                   all         22        110      0.853      0.687      0.834      0.678      0.853      0.687      0.839      0.631






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      12/20         0G     0.6775      1.034      1.047     0.9243         55        256: 100%|██████████| 7/7 [00:15<00:00,  2.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.49s/it]

                   all         22        110      0.832      0.721      0.841      0.686      0.832      0.721      0.845      0.638






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      13/20         0G     0.6818      1.021      1.011     0.9308         51        256: 100%|██████████| 7/7 [00:14<00:00,  2.05s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

                   all         22        110      0.869      0.723      0.847      0.684      0.869      0.723      0.851      0.636






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      14/20         0G      0.683      1.063      1.033     0.9506         31        256: 100%|██████████| 7/7 [00:14<00:00,  2.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.42s/it]

                   all         22        110      0.872      0.745      0.849      0.692      0.872      0.745      0.855      0.647






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      15/20         0G     0.6588      1.023      1.007     0.9158         95        256: 100%|██████████| 7/7 [00:14<00:00,  2.05s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

                   all         22        110      0.791      0.809      0.852      0.684      0.791      0.809      0.858      0.648






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      16/20         0G     0.6525      1.001      1.026     0.9398         11        256: 100%|██████████| 7/7 [00:16<00:00,  2.30s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.62s/it]

                   all         22        110      0.815      0.773      0.843      0.675      0.824      0.782      0.856      0.637






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      17/20         0G     0.6631     0.9446      1.077     0.9093         23        256: 100%|██████████| 7/7 [00:14<00:00,  2.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.50s/it]

                   all         22        110      0.791      0.791      0.845      0.674        0.8        0.8      0.858      0.633






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      18/20         0G     0.6487     0.9699     0.9644     0.9162         23        256: 100%|██████████| 7/7 [00:15<00:00,  2.19s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.49s/it]

                   all         22        110      0.823      0.762      0.846      0.677      0.833      0.771       0.86      0.634






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      19/20         0G     0.6554     0.9879     0.9563     0.9198         27        256: 100%|██████████| 7/7 [00:15<00:00,  2.19s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.54s/it]

                   all         22        110       0.81      0.782      0.848      0.689       0.82      0.791      0.862      0.646






      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      20/20         0G     0.6411     0.9411     0.9172     0.9245         35        256: 100%|██████████| 7/7 [00:15<00:00,  2.20s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.59s/it]

                   all         22        110      0.826      0.777      0.848      0.692      0.836      0.785      0.862      0.653






20 epochs completed in 0.101 hours.
Optimizer stripped from /Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints/yolov8s-seg_yolo_v2_preprocessed_ep20_bs8_pc2.0/weights/last.pt, 23.8MB
Optimizer stripped from /Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints/yolov8s-seg_yolo_v2_preprocessed_ep20_bs8_pc2.0/weights/best.pt, 23.8MB

Validating /Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints/yolov8s-seg_yolo_v2_preprocessed_ep20_bs8_pc2.0/weights/best.pt...
Ultralytics 8.3.26 🚀 Python-3.12.4 torch-2.2.2 CPU (Intel Core(TM) i9-9980HK 2.40GHz)
YOLOv8s-seg summary (fused): 195 layers, 11,779,987 parameters, 0 gradients, 42.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.29s/it]


                   all         22        110      0.826      0.776      0.848      0.692      0.836      0.786      0.862      0.653
Speed: 0.4ms preprocess, 52.3ms inference, 0.0ms loss, 0.5ms postprocess per image
Results saved to [1m/Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints/yolov8s-seg_yolo_v2_preprocessed_ep20_bs8_pc2.0[0m
Ultralytics 8.3.26 🚀 Python-3.12.4 torch-2.2.2 CPU (Intel Core(TM) i9-9980HK 2.40GHz)
YOLOv8s-seg summary (fused): 195 layers, 11,779,987 parameters, 0 gradients, 42.4 GFLOPs


[34m[1mval: [0mScanning /Users/azanchetta/fAIr_metric/yolo_v2_preprocessed/model149_td489/labels/val.cache... 22 images, 5 backgrounds, 0 corrupt: 100%|██████████| 22/22 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:02<00:00,  1.45s/it]


                   all         22        110      0.826      0.776      0.848      0.692      0.836      0.786      0.862      0.648
Speed: 0.8ms preprocess, 115.0ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1m/Users/azanchetta/fAIr-utilities/runs/segment/val9[0m
Ultralytics 8.3.26 🚀 Python-3.12.4 torch-2.2.2 CPU (Intel Core(TM) i9-9980HK 2.40GHz)
YOLOv8s-seg summary (fused): 195 layers, 11,779,987 parameters, 0 gradients, 42.4 GFLOPs

[34m[1mPyTorch:[0m starting from '/Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints/yolov8s-seg_yolo_v2_preprocessed_ep20_bs8_pc2.0/weights/best.pt' with input shape (1, 3, 256, 256) BCHW and output shape(s) ((1, 37, 1344), (1, 32, 64, 64)) (22.7 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 17...
[34m[1mONNX:[0m slimming with onnxslim 0.1.42...
[34m[1mONNX:[0m export success ✅ 5.7s, saved as '/Users/azanchetta/fAIr_metric/yolo_v2_training/model149_td489/checkpoints/yolov8s-se

In [None]:
# Training
yolo_output_path = f'{base_path}/yolo_v2_predictions'
yolo_data_dir = f'{base_path}/yolo_v2'
with print_time("yolo training"):
    for city in cities_list:
        city_yolodata_path = f'{base_path}/yolo_v2_data/{city}'
        print(f'city path: {city_yolodata_path}')
        output_model_path,output_model_iou_accuracy = train_yolo(
            data=city_yolodata_path, #f"{base_path}",
            weights=f"{os.getcwd()}/yolov8s_v2-seg.pt", 
            # gpu="cpu",
            epochs=2,
            batch_size=16,
            pc=2.0,
            output_path=yolo_output_path,
            dataset_yaml_path='/Users/azanchetta/fAIr-utilities/ramp-data/sample_2/yolo_v2/yolo_dataset.yaml' ## this name is just a placeholder, we overwrite the variables in the code
        )
        print(output_model_iou_accuracy)

        # output_model_path,output_model_iou_accuracy = train_yolo(
        #     data=f"{base_path}",
        #     weights=f"{os.getcwd()}/yolov8s_v2-seg.pt", 
        #     # gpu="cpu",
        #     epochs=2,
        #     batch_size=16,
        #     pc=2.0,
        #     output_path=yolo_data_dir,
        #     dataset_yaml_path=os.path.join(yolo_data_dir,'yolo_dataset.yaml')
        # )
        # print(output_model_iou_accuracy)

## Prediction

In [None]:
# Prediction
prediction_output = f"{base_path}/prediction/output"
# model_path = f"{output_path}/weights/best.pt"
with print_time("inference"):
    predict(
        checkpoint_path=output_model_path,
        input_path=f"{base_path}/prediction/input",
        prediction_path=prediction_output,
    )

geojson_output = f"{prediction_output}/prediction.geojson"
with print_time("polygonization"):
    polygonize(
        input_path=prediction_output,
        output_path=geojson_output,
        remove_inputs=False,
    )

print(f"\n Total Process Completed in : {time.time()-start_time} sec")

In [None]:
# # Deal with the csv files with list of train/val/pred images used in RAMP
# #  testing function ... this goes inside `fined_files` in yolo_format_anna.py
# city="model51_td364"
# city_folder_name=f'{data_path}/{city}/train'
# csv_file_name = f'fair_split_train.csv'
# csv_file_path = f'{city_folder_name}/{csv_file_name}'
# print(f'CSV file is {csv_file_name}')
# print(f'CSV file is {csv_file_path}')
# csv_raw_list = []

# with open(csv_file_path, "r") as file_obj:
#     heading = next(file_obj)
#     reader_obj = csv.reader(file_obj, delimiter="\t")
#     for row in reader_obj:
#         csv_raw_list.append(row)
# print(f'this is the list from the csv file:\n{csv_raw_list}')
# csv_nested_list = []
# for ccc in csv_raw_list:
#     # print(ccc)
#     nested = ccc[0]
#     # print(f'nested {nested}')
#     name_csv = nested.split('/')[-1]
#     csv_nested_list.append(name_csv)

# # filenames_from_csv = [csvi.split("/",1)[-1] for csvi in csv_nested_list] # this is to get the last element of the string (i.e. the file name)
# # print(f'filenames hopefully {filenames_from_csv}')
# print(f'is this the names? {csv_nested_list}')