In [None]:
#-- Install ultralytics for YOLO --------------------------------------------------------------------------------
!pip install ultralytics

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

In [None]:
#-- Install GroundingDINO for Zero-Shot ------------------------------------------------------------------------
%cd /kaggle/working/  

!git clone https://github.com/IDEA-Research/GroundingDINO.git

%cd GroundingDINO/
!pip install -e .

!mkdir weights
%cd weights
!wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth

%cd /kaggle/working/GroundingDINO    

#-- clear output --
from IPython import display
display.clear_output()  

!python -c "import groundingdino" && echo "Module installed successfully" || echo "Module installation failed"

In [None]:
#-- Import -----------------------------------------------------------------------------------------------------
%cd /kaggle/working/GroundingDINO
from groundingdino.util.inference import load_model as dn_load_model
from groundingdino.util.inference import load_image as dn_load_image
from groundingdino.util.inference import predict as dn_predict
from groundingdino.util.inference import annotate as dn_annotate
%cd /kaggle/working


from ultralytics import YOLO
import yaml

import torch

import numpy as np
import pandas as pd

import random

import cv2
from IPython import display
import matplotlib.pyplot as plt
import PIL
import cv2
import matplotlib.pyplot as plt

import shutil
import os

In [None]:
#-- Download  Pothole Dataset From Roboflow --------------------------------------------------------------------
!curl -L "https://public.roboflow.com/ds/xmNJAw8Mjg?key=YOUR_API_KEY" > roboflow.zip; unzip -o roboflow.zip; rm roboflow.zip
display.clear_output()
print('DS successfully downloaded :)')

In [None]:
#-- Initialize -------------------------------------------------------------------------------------------------
output_path = '/kaggle/working/'

dino_config_file = output_path + 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py'
dino_weights_file = output_path + 'GroundingDINO/weights/groundingdino_swint_ogc.pth'

gt_train_dir = output_path + 'train/images/'
gt_val_dir = output_path + 'valid/images/'
gt_test_dir = output_path + 'test/images/'

gt_train_lbl_dir = output_path + 'train/labels/'
gt_val_lbl_dir = output_path + 'valid/labels/'
gt_test_lbl_dir = output_path + 'test/labels/'

dino_train_dir = output_path + 'dino_results/train/images/'
dino_val_dir = output_path + 'dino_results/valid/images/'

dino_train_lbl_dir = output_path + 'dino_results/train/labels/'
dino_val_lbl_dir = output_path + 'dino_results/valid/labels/'

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device:' , DEVICE)

NUM_EPOCHS = 100

gt_data_config_file = output_path + 'data.yaml'
dino_data_config_file = output_path + 'dino_data.yaml'
test_data_config_file = output_path + 'test_data.yaml'

CONF_THRESHOLDS = [0.25, 0.5, 0.75, 0.9]
IOU_THRESHOLDS = [0.5, 0.6, 0.7, 0.8, 0.9]
NUMBER_OF_FREEZED_LAYERS = 7



In [None]:
#-- Run DINO and predict Bounding Boxes  -----------------------------------------------------------------------
def run_dino(model_dino, image, text_prompt='pothole', box_threshold=0.4, text_threshold=0.1):
    boxes, logits, phrases = dn_predict(
        model = model_dino,
        image = image,
        caption = text_prompt,
        box_threshold = box_threshold,
        text_threshold = text_threshold
    )
    
    return boxes, logits, phrases

In [None]:
#-- Plot Bounding Boxes for all Object Detected by DINO --------------------------------------------------------
def plot_boxes(image, boxes, logits, phrases):
    
    annotated_img = dn_annotate(image_source=image,
                                    boxes=boxes,
                                    logits=logits,
                                    phrases=phrases)
        
    out_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)        
    plt.imshow(out_img, interpolation = 'bicubic')
    plt.xticks([]), plt.yticks([])        
    plt.show()   

In [None]:
#-- Annotate All Images with predicted bounding boxes ----------------------------------------------------------
def annotate(dino, image_files, result_imgs_path, result_lbls_path, sampled_to_show):
    
    os.makedirs(result_imgs_path, exist_ok=True)
    os.makedirs(result_lbls_path, exist_ok=True)
    
    i = 1
    for img_file in image_files:
        
        #-- log --
        if i==1 or i%100==0:
            print(f'annotating {i}th image -------------')
        
        title = img_file.split('/')[-1]
        title = title.split('.jpg')[0]        
        image_path = result_imgs_path + title + '.jpg'
        label_path = result_lbls_path + title + '.txt'
        
        img = PIL.Image.open(img_file)
        img = img.resize((640, 640))
        img.save(image_path)
        
        image_source, image = dn_load_image(image_path)
        boxes, logits, phrases = run_dino(dino, image)
        
        label = ['0 ' + ' '.join(list(map(str, b))) for b in boxes.tolist()]
        label = '\n'.join(label)
        with open(label_path, 'w') as f:
            f.write(label)
        
        if img_file in sampled_to_show:
            plot_boxes(image_source, boxes, logits, phrases)
        
        i += 1

In [None]:
#-- Create a dino model ----------------------------------------------------------------------------------------
model_dino = dn_load_model(dino_config_file,dino_weights_file, device= DEVICE)

In [None]:
#-- Annotate All Images in train and validation sets -----------------------------------------------------------

train_image_files = []
for root, dirs, files in os.walk(gt_train_dir):
    for file in files:
        if file.endswith('.jpg'):
            train_image_files.append(os.path.join(root, file)) 

sampled_train_images_to_show = random.sample(train_image_files, 10)

val_image_files = []
for root, dirs, files in os.walk(gt_val_dir):
    for file in files:
        if file.endswith('.jpg'):
            val_image_files.append(os.path.join(root, file)) 
            
sampled_val_images_to_show = random.sample(val_image_files, 10)

total_train_images = len(train_image_files)
print('total_train_images:' , total_train_images)  

total_val_images = len(val_image_files)
print('total_val_images:' , total_val_images)

#-- log --
print('Annotating Train Images --------------------------------------')
annotate(dino = model_dino ,
         image_files = train_image_files,
         result_imgs_path = dino_train_dir,
         result_lbls_path = dino_train_lbl_dir,
         sampled_to_show = sampled_train_images_to_show)

#-- log --
print('Annotating Val Images --------------------------------------')
annotate(dino = model_dino ,
         image_files = val_image_files,
         result_imgs_path = dino_val_dir,
         result_lbls_path = dino_val_lbl_dir,
         sampled_to_show = sampled_val_images_to_show)

In [None]:
#-- Create Config File to Train On DINO Results -----------------------------------------------------------
config = {
    'names': ['pothole'],
    'nc': 1,
    'train': 'dino_results/train/images',
    'val': 'dino_results/valid/images/',
    'test': 'test/images'
}

with open(dino_data_config_file, 'w') as f:
    yaml.dump(config, f)

In [None]:
#-- Set test dir as val for evaluting models on test ds ---------------------------------------------------
config = {
    'names': ['pothole'],
    'nc': 1,
    'train': 'dino_results/train/images',
    'val': 'test/images',    
}

with open(test_data_config_file, 'w') as f:
    yaml.dump(config, f)

In [None]:
#-- Train, EValuate, and Save Results -----------------------------------------------------
def Run(model ,number_of_freezed_layers, results_file, is_gt= True):
    
    #--  create new folder for each run --
    if is_gt:
        project_name = 'run_on_gt'
        data_cfg = gt_data_config_file
    else:
        project_name = 'run_on_dino'
        data_cfg = dino_data_config_file
        
    
    #-- Create DF for save results --
    cols_names = ['val_or_test','conf', 'iou', 'map_50_95', 'map_50', 'map_75', 'maps']        
    df_results = pd.DataFrame(columns=cols_names)
    
    #-- Train --
    print('Training Model ---------------------------------------------------------')
    model.train(data = data_cfg,
              epochs = NUM_EPOCHS,
              freeze = number_of_freezed_layers,
              device = DEVICE,
              val = True,
              save = True,
              exist_ok = True,
              plots=True,
              project = project_name,
              name = 'train')
    
    #-- load best model --
    best_model_file = output_path + project_name + '/train/weights/best.pt'
    best_model = YOLO(best_model_file) 
    
    #-- Evaluate Model on Val Data --
    print('Evaluating Model On Val Data --------------------------------------------')
    metrics = best_model.val(data = data_cfg,
                             device = DEVICE,
                             project = project_name,
                             name = 'validation')

    map_50_95 = metrics.box.map
    map_50 = metrics.box.map50
    map_75 = metrics.box.map75
    maps = metrics.box.maps #--a list contains map50-95 of each category --
    
    results = {'val_or_test': 'val',
               'conf':None,
               'iou':None,
               'map_50_95':map_50_95,
               'map_50':map_50,
               'map_75':map_75,
               'maps':maps}

    new_df = pd.DataFrame(results)
    df_results = pd.concat([df_results, new_df], ignore_index=True)   

    print(f'map_50_95:{map_50_95}\nmap_50:{map_50}\nmap_75:{map_75}\nmaps:{maps}')

    #-- Evaluate Model Test Data --
    print('Evaluating Model On Test Data --------------------------------------------')
    for conf in CONF_THRESHOLDS:
        for iou in IOU_THRESHOLDS:        
        #-- log --
            print(f'\n\tConf={conf} - IoU={iou} ...................')

            name = 'test/test_' + str(conf) + '_' + str(iou)
            metrics_1 = best_model.val(data = test_data_config_file,
                                       conf = conf,
                                       iou = iou,
                                       device = DEVICE,
                                       project = project_name,
                                       name = name)

            map_50_95 = metrics_1.box.map
            map_50 = metrics_1.box.map50
            map_75 = metrics_1.box.map75
            maps = metrics_1.box.maps #--a list contains map50-95 of each category --
            
            results = {'val_or_test': 'test',
                       'conf':conf,
                       'iou':iou,
                       'map_50_95':map_50_95,
                       'map_50':map_50,
                       'map_75':map_75,
                       'maps':maps}

            new_df = pd.DataFrame(results)
            df_results = pd.concat([df_results, new_df], ignore_index=True)   

            print(f'\t\tmap_50_95:{map_50_95}\nmap_50:{map_50}\nmap_75:{map_75}\nmaps:{maps}')
            
    #-- Save DF Rsults --
    df_results.to_csv(output_path + results_file, index=False)            

    
    #-- Run Best Model on Test Images and Save Results --
    print('Running Model on Test Images and Saving Results ----------------------------------')
    for conf in CONF_THRESHOLDS:
        for iou in IOU_THRESHOLDS:        
            #-- log --
            print(f'\n\tConf={conf} - IoU={iou} ......................')

            name = 'predictions_' + str(conf) + '_' + str(iou)

            best_model.predict(source = gt_test_dir,
                               conf = conf,
                               iou = iou,
                               show = False,
                               save= True,
                               project= project_name +'/prediction_results',
                               name=name)   

In [None]:
#-- Create and Run YOLO on Ground Truth Data --------------------------------------------------------------
model = YOLO("yolov8m.pt") 

results_file = 'gt_results.csv'
Run(model =  model, 
    number_of_freezed_layers = NUMBER_OF_FREEZED_LAYERS,
    results_file = results_file,
    is_gt= True)

In [None]:
#-- Create and Run YOLO on Dino Results Data --------------------------------------------------------------
model = YOLO("yolov8m.pt") 

results_file = 'dino_results.csv'
Run(model =  model, 
    number_of_freezed_layers = NUMBER_OF_FREEZED_LAYERS,
    results_file = results_file,
    is_gt= False)