1- Install Detectron

In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities.
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

In [2]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Sun_Feb_14_21:12:58_PST_2021
Cuda compilation tools, release 11.2, V11.2.152
Build cuda_11.2.r11.2/compiler.29618528_0
torch:  1.13 ; cuda:  cu116
detectron2: 0.6


2- Make sure detectron workds by **importing** needed libs

In [3]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from pathlib import Path
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog

3- Download Competition data from google drive

In [4]:
import gdown
url='https://docs.google.com/uc?export=download&id=1ULqYtd9yomeGz53WBhgRdPRFB37ppeDU&confirm=t'
#https://drive.google.com/file/d/1fx80PtbueBXMmGlVxNk2NMAyLZBUTmRH/view?usp=share_link
#https://drive.google.com/file/d/1h8yBDGDe49HhJa5Mf-jMAgc6aHEgPtEs/view?usp=share_link
#https://drive.google.com/file/d/1ULqYtd9yomeGz53WBhgRdPRFB37ppeDU/view
output='dataset.zip'
gdown.download(url, output, quiet=False) #, use_cookies=True)

Downloading...
From: https://docs.google.com/uc?export=download&id=1ULqYtd9yomeGz53WBhgRdPRFB37ppeDU&confirm=t
To: /content/dataset.zip
100%|██████████| 2.34G/2.34G [00:10<00:00, 220MB/s]


'dataset.zip'

4-Extract data folder

In [None]:
import os
os.chdir('/content/')  #change dir
!unzip -u dataset.zip -d dataset/

5- Load training data to convert it to coco-format

In [6]:
import pandas as pd
all_data_df = pd.read_csv("/content/dataset/dataset/train.csv")
all_data_df

Unnamed: 0,class,image_path,name,xmax,xmin,ymax,ymin
0,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,797.0,701.0,262.0,211.0
1,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,932.0,786.0,329.0,238.0
2,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,736.0,657.0,275.0,229.0
3,7.0,ea906a663da6321bcef78be4b7d1afff.jpg,BAD_BILLBOARD,986.0,786.0,136.0,0.0
4,8.0,1c7d48005a12d1b19261b8e71df7cafe.jpg,SAND_ON_ROAD,667.0,549.0,228.0,179.0
...,...,...,...,...,...,...,...
19945,4.0,081e7bb3832ec5bb25276db161a96274.jpg,CONSTRUCTION_ROAD,1025.0,600.0,408.0,148.0
19946,2.0,1ff38a7af7f13b1201d17c6e1829373a.jpg,POTHOLES,657.0,418.0,364.0,282.0
19947,2.0,1ff38a7af7f13b1201d17c6e1829373a.jpg,POTHOLES,507.0,338.0,436.0,283.0
19948,7.0,ac97490f13140fc1bfe613ec69301b34.jpg,BAD_BILLBOARD,956.0,713.0,110.0,25.0


Remove bad_streetlight entry

In [7]:
all_data_df.loc[all_data_df['name'] == "BAD_STREETLIGHT"]


Unnamed: 0,class,image_path,name,xmax,xmin,ymax,ymin
19353,6.0,53d3797457a0d2e3afe146e2f797e77e.jpg,BAD_STREETLIGHT,985.0,359.0,161.0,-47.0


In [8]:
all_data_df.drop(all_data_df.index[[19353]], inplace=True)
all_data_df.loc[all_data_df['name'] == "BAD_STREETLIGHT"]


Unnamed: 0,class,image_path,name,xmax,xmin,ymax,ymin


Fix boxes ...


In [9]:
def adjust_box_x (value):
  if value < 0:
    return 0
  elif value > 1920:
    return 1920
  else:
    return 2*value

def adjust_box_y (value):
  if value < 0:
    return 0
  elif value > 1080:
    return 1080
  else:
    return 2*value


all_data_df["xmin"] = all_data_df["xmin"].apply(adjust_box_x)
all_data_df["xmax"] = all_data_df["xmax"].apply(adjust_box_x)
all_data_df["ymin"] = all_data_df["ymin"].apply(adjust_box_y)
all_data_df["ymax"] = all_data_df["ymax"].apply(adjust_box_y)


all_data_df

Unnamed: 0,class,image_path,name,xmax,xmin,ymax,ymin
0,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,1594.0,1402.0,524.0,422.0
1,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,1864.0,1572.0,658.0,476.0
2,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,1472.0,1314.0,550.0,458.0
3,7.0,ea906a663da6321bcef78be4b7d1afff.jpg,BAD_BILLBOARD,1972.0,1572.0,272.0,0.0
4,8.0,1c7d48005a12d1b19261b8e71df7cafe.jpg,SAND_ON_ROAD,1334.0,1098.0,456.0,358.0
...,...,...,...,...,...,...,...
19945,4.0,081e7bb3832ec5bb25276db161a96274.jpg,CONSTRUCTION_ROAD,2050.0,1200.0,816.0,296.0
19946,2.0,1ff38a7af7f13b1201d17c6e1829373a.jpg,POTHOLES,1314.0,836.0,728.0,564.0
19947,2.0,1ff38a7af7f13b1201d17c6e1829373a.jpg,POTHOLES,1014.0,676.0,872.0,566.0
19948,7.0,ac97490f13140fc1bfe613ec69301b34.jpg,BAD_BILLBOARD,1912.0,1426.0,220.0,50.0


In [10]:
df_images = pd.DataFrame(all_data_df["image_path"].value_counts())
df_images = df_images.reset_index()
df_images.columns = ['image_path', 'number_of_objects']
df_images

Unnamed: 0,image_path,number_of_objects
0,291d3a6d38674da0e4b05135008bceb1.jpg,22
1,e76cb6343518dde6aaf5105e5250ebeb.jpg,19
2,0a2bc0dc2371794509f4b776aff0dd88.jpg,19
3,fec90b848c9b6c4bcc1ec562597c55d8.jpg,19
4,8b3335738dc291770ffb3613810128f4.jpg,19
...,...,...
7868,b8c5a771ec3f46006cac8ece1108d5d5.jpg,1
7869,4aacf426bd007ef10fcda6fd5ba875c4.jpg,1
7870,ddf523beb18a11c77462c3fa072b41bd.jpg,1
7871,8fdd463cacbdf40d481232cc30846064.jpg,1


Data Spllitting , 85% train and 15% validation 

In [11]:
from sklearn.model_selection import train_test_split
train_set, val_set = train_test_split(df_images, test_size=0.15, random_state=42)

In [12]:
train_imgs = train_set["image_path"]
train_data = all_data_df.loc[all_data_df['image_path'].isin(train_imgs)]
train_data.reset_index(drop=True, inplace=True)
train_data

Unnamed: 0,class,image_path,name,xmax,xmin,ymax,ymin
0,7.0,ea906a663da6321bcef78be4b7d1afff.jpg,BAD_BILLBOARD,1972.0,1572.0,272.0,0.0
1,8.0,1c7d48005a12d1b19261b8e71df7cafe.jpg,SAND_ON_ROAD,1334.0,1098.0,456.0,358.0
2,8.0,1c7d48005a12d1b19261b8e71df7cafe.jpg,SAND_ON_ROAD,1598.0,1256.0,476.0,352.0
3,8.0,8ca1b825716ea6755180fde347ac79c1.jpg,SAND_ON_ROAD,1064.0,868.0,392.0,294.0
4,0.0,8ca1b825716ea6755180fde347ac79c1.jpg,GRAFFITI,1098.0,942.0,354.0,262.0
...,...,...,...,...,...,...,...
16875,3.0,f2d628379c60843a907f26d1f9d86f91.jpg,GARBAGE,1208.0,1098.0,536.0,440.0
16876,2.0,1ff38a7af7f13b1201d17c6e1829373a.jpg,POTHOLES,1314.0,836.0,728.0,564.0
16877,2.0,1ff38a7af7f13b1201d17c6e1829373a.jpg,POTHOLES,1014.0,676.0,872.0,566.0
16878,7.0,ac97490f13140fc1bfe613ec69301b34.jpg,BAD_BILLBOARD,1912.0,1426.0,220.0,50.0


In [13]:
val_imgs = val_set["image_path"]
val_data = all_data_df.loc[all_data_df['image_path'].isin(val_imgs)]
val_data.reset_index(drop=True, inplace=True)
val_data

Unnamed: 0,class,image_path,name,xmax,xmin,ymax,ymin
0,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,1594.0,1402.0,524.0,422.0
1,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,1864.0,1572.0,658.0,476.0
2,3.0,4a48c42c9579ec0399e6c5a3e825e765.jpg,GARBAGE,1472.0,1314.0,550.0,458.0
3,3.0,7fb40d10dde6d5643aa8e197b6b46c2e.jpg,GARBAGE,1702.0,1382.0,448.0,314.0
4,3.0,7fb40d10dde6d5643aa8e197b6b46c2e.jpg,GARBAGE,1330.0,1232.0,440.0,326.0
...,...,...,...,...,...,...,...
3064,9.0,9d20fda7ea773092aa59494e662b676a.jpg,CLUTTER_SIDEWALK,902.0,0.0,632.0,420.0
3065,9.0,9d20fda7ea773092aa59494e662b676a.jpg,CLUTTER_SIDEWALK,1932.0,554.0,628.0,380.0
3066,4.0,081e7bb3832ec5bb25276db161a96274.jpg,CONSTRUCTION_ROAD,1020.0,0.0,852.0,270.0
3067,4.0,081e7bb3832ec5bb25276db161a96274.jpg,CONSTRUCTION_ROAD,1588.0,604.0,836.0,304.0


Save train data and validation data

In [14]:
train_data.to_csv("/content/dataset/dataset/train_data.csv")
val_data.to_csv("/content/dataset/dataset/val_data.csv")

Convert csv to coco-format

In [15]:
import numpy as np
import json
import pandas as pd

def create_coco (path,save_json_path):
  data = pd.read_csv(path)

  images = []
  categories = []
  annotations = []

  category = {}
  category["supercategory"] = 'none'
  category["id"] = 0
  category["name"] = 'None'
  categories.append(category)

  data['fileid'] = data['image_path'].astype('category').cat.codes
  #print(data['fileid'])
  data['categoryid']= pd.Categorical(data['name'],ordered= True).codes
  data['categoryid'] = data['categoryid']+1
  data['annid'] = data.index


  def image(row):
      image = {}
      image["height"] = 1080 #row.height
      image["width"] = 1920 #row.width
      image["id"] = row.fileid
      image["file_name"] = row.image_path
      return image

  def category(row):
      category = {}
      category["supercategory"] = 'None'
      category["id"] = row.categoryid
      category["name"] = row[4]
      return category

  def annotation(row):
      margin_x = 400
      margin_y = 400
      annotation = {}
  
      xcenter = row.xmin+abs((row.xmax-row.xmin)/2)
      ycenter = row.ymin+abs((row.ymin-row.ymax)/2)
      xlength = abs((row.xmax-row.xmin)/2) 
      ylength = abs((row.ymin-row.ymax)/2) 

      xmin = xcenter + xlength/2
      ymin = ycenter + ylength/2

      xmax = xcenter -  xlength/2
      ymax = ycenter -  ylength/2

    
      area = (xmax - xmin)*( ymax -  ymin)
      annotation["segmentation"] = []
      annotation["iscrowd"] = 0
      annotation["area"] = area
      annotation["image_id"] = row.fileid

      annotation["bbox"] = [xmin, ymin, xmax - xmin,ymax - ymin ]

      annotation["category_id"] = row.categoryid
      annotation["id"] = row.annid
      return annotation

  for row in data.itertuples():
      annotations.append(annotation(row))

  imagedf = data.drop_duplicates(subset=['fileid']).sort_values(by='fileid')
  for row in imagedf.itertuples():
      images.append(image(row))

  catdf = data.drop_duplicates(subset=['categoryid']).sort_values(by='categoryid')
  for row in catdf.itertuples():
      categories.append(category(row))

  data_coco = {}
  data_coco["images"] = images
  data_coco["categories"] = categories
  data_coco["annotations"] = annotations


  return json.dump(data_coco, open(save_json_path, "w"), indent=4)


Create coco sets and register it.

In [16]:
create_coco("/content/dataset/dataset/train_data.csv","/content/dataset/dataset/images/train_coco.json")
create_coco("/content/dataset/dataset/val_data.csv","/content/dataset/dataset/images/val_coco.json")
#create_coco("/content/dataset/dataset/train_data.csv","/content/dataset/dataset/train_coco.json")
#create_coco("/content/dataset/dataset/val_data.csv","/content/dataset/dataset/val_coco.json")

In [17]:
dataDir=Path('/content/dataset/dataset/images')
register_coco_instances('smartathon_train',{}, '/content/dataset/dataset/images/train_coco.json', dataDir)
register_coco_instances('smartathon_val',{},'/content/dataset/dataset/images/val_coco.json', dataDir)

metadata = MetadataCatalog.get('smartathon_train')
train_ds = DatasetCatalog.get('smartathon_train')

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[01/21 13:48:43 d2.data.datasets.coco]: Loaded 6692 images in COCO format from /content/dataset/dataset/images/train_coco.json


In [18]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator

class CocoTrainer(DefaultTrainer):

  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):

    if output_folder is None:
        os.makedirs("coco_eval", exist_ok=True)
        output_folder = "coco_eval"

    return COCOEvaluator(dataset_name, cfg, False, output_folder)

In [19]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
import os
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")) #Get the basic model configuration from the model zoo 

cfg.DATASETS.TRAIN = ("smartathon_train",)
cfg.DATASETS.TEST = ("smartathon_val",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")  # Let training initialize from model zoo
# Number of per batch across all machines.
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.01250  # pick a good LearningRate
cfg.SOLVER.MAX_ITER = 25000  #No. of iterations   
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 384 #256  
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 11 # No. of classes = [HINDI, ENGLISH, OTHER]
cfg.TEST.EVAL_PERIOD = 250 # No. of iterations after which the Validation Set is evaluated. 
cfg.SOLVER.CHECKPOINT_PERIOD= 250
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CocoTrainer(cfg)

[01/21 13:48:50 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

In [20]:
# from google.colab import drive
# drive.mount('/content/gdrive')


In [21]:
# #Load phase one trained model (trained earlier)
# #url='https://docs.google.com/uc?export=download&id=16ysSGK5kevNjW0CiToGhX5JJNchVkSjU&confirm=t'
# url='https://www.dropbox.com/s/mxg09qkxalxnpr2/phase2_detectron_model.pth?dl=0'
# output='phase2_detectron_model.pth'
# gdown.download(url, output, quiet=False) #, use_cookies=True)

!wget -O phase2_detectron_model.pth https://www.dropbox.com/s/mxg09qkxalxnpr2/phase2_detectron_model.pth?dl=0




--2023-01-21 13:48:53--  https://www.dropbox.com/s/mxg09qkxalxnpr2/phase2_detectron_model.pth?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.67.18, 2620:100:6031:18::a27d:5112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.67.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /s/raw/mxg09qkxalxnpr2/phase2_detectron_model.pth [following]
--2023-01-21 13:48:54--  https://www.dropbox.com/s/raw/mxg09qkxalxnpr2/phase2_detectron_model.pth
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc6ca0b0793ac1a5d146a4e60b37.dl.dropboxusercontent.com/cd/0/inline/B08JU0wPGK2SqFNW3TGAPzBKdE9JvTZD-4LaWX9rovArbVg-8bZ0IWXtrOmcJIlSO8zfIj-3QyZTcRbi5gDo-cR5NBMPpZKSWcg86pRUZDYbVsjUM8vfjHAZZKMl2i67RPTRBc2_ckBS2rwyX79eTLb_L2Qo8Admj3EOZgbyx565OA/file# [following]
--2023-01-21 13:48:55--  https://uc6ca0b0793ac1a5d146a4e60b37.dl.dropboxusercontent.com/cd/0/inline/B08JU0wPGK2SqFNW3TGAPzBKdE9JvT

In [22]:
from detectron2.utils.visualizer import ColorMode
from tqdm import tqdm

smart_dict={'GRAFFITI' : 0.0 , 'FADED_SIGNAGE': 1.0 , 'POTHOLES': 2.0,
            'GARBAGE' : 3.0 , 'CONSTRUCTION_ROAD': 4.0 , 'BROKEN_SIGNAGE': 5.0,
            'BAD_STREETLIGHT' : 6.0 , 'BAD_BILLBOARD': 7.0 , 'SAND_ON_ROAD':8.0,
            'CLUTTER_SIDEWALK' : 9.0 , 'UNKEPT_FACADE': 10.0}


#Use the final weights generated after successful training for inference  
cfg.MODEL.WEIGHTS ="/content/phase2_detectron_model.pth" 



outs = []

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.15 # set the testing threshold for this model
#Pass the validation dataset
cfg.DATASETS.TEST = ("smartathon_val", )

predictor = DefaultPredictor(cfg)

#dataset_dicts = get_board_dicts("Text_Detection_Dataset_COCO_Format/val")
dataset_dicts = DatasetCatalog.get('smartathon_val')

#for d in random.sample(dataset_dicts, 3): 
count_test = 0 
countt = 0
class_names = MetadataCatalog.get("smartathon_train").thing_classes


[01/21 13:49:48 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from /content/phase2_detectron_model.pth ...
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[01/21 13:49:49 d2.data.datasets.coco]: Loaded 1181 images in COCO format from /content/dataset/dataset/images/val_coco.json


In [23]:
import pandas as pd
test_df = pd.read_csv("/content/dataset/dataset/test.csv")
test_df

Unnamed: 0,image_path
0,953ab1447c46ecfef67ab14629cd70c7.jpg
1,e4ddbaa7970fca225a51288ce5f7d3f9.jpg
2,5b8120d69607a077b7583334be3ba18b.jpg
3,138b1dc82005b4c33e4886260649d313.jpg
4,0f91ec1533b845b13089f8cf4e0a36f7.jpg
...,...
2087,3e7dd159466f14474a0173e9b02f512a.jpg
2088,a464d4600589ddf541bed59942c0228d.jpg
2089,0a490958aac3c79bb815a0f1f6825284.jpg
2090,15afe8bce6c71deed2d53f56b78b90a8.jpg


In [24]:
for index, row in tqdm(test_df.iterrows()):  
  t_path = '/content/dataset/dataset/images/'+row["image_path"]
  im = cv2.imread(t_path)
  outputs = predictor(im)
  break
outputs

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
0it [00:06, ?it/s]


{'instances': Instances(num_instances=3, image_height=1080, image_width=1920, fields=[pred_boxes: Boxes(tensor([[1269.1141,  364.7889, 1364.9874,  422.0097],
         [ 791.9026,  521.9858,  839.4863,  558.5504],
         [ 826.6548,  545.4191,  855.9062,  578.3278]], device='cuda:0')), scores: tensor([0.7010, 0.3422, 0.2316], device='cuda:0'), pred_classes: tensor([1, 6, 6], device='cuda:0')])}

In [25]:
# url='https://docs.google.com/uc?export=download&id=1--xn75VlBCaxx2rE4IIFiyHWRTGO2R5X&confirm=t'
# output='phase3_yolo5_trained.pt'
# gdown.download(url, output, quiet=False) #, use_cookies=True)

!wget -O phase3_yolo5_trained.pt https://www.dropbox.com/s/cr9snmzpktruc5z/yolo5x_trained_model_phase3.pt?dl=0

--2023-01-21 13:49:55--  https://www.dropbox.com/s/cr9snmzpktruc5z/yolo5x_trained_model_phase3.pt?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.64.18, 2620:100:6031:18::a27d:5112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.64.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /s/raw/cr9snmzpktruc5z/yolo5x_trained_model_phase3.pt [following]
--2023-01-21 13:49:57--  https://www.dropbox.com/s/raw/cr9snmzpktruc5z/yolo5x_trained_model_phase3.pt
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc3767c9bba86890a82ed0c65dcb.dl.dropboxusercontent.com/cd/0/inline/B09hdw4dYBXC8rnFKa--zz-4ozWW7AZiO3Q7rLQv72qgVOHktOT0RRaDh0KREZkuTl2LOZ1pFcNJeMliLXbnqxrJTeQ8Pg_64OjrdehdqKDuvI9jdZQT-yp-U5h7Qzwbrv_o8ayW2g_jybWzAxDQex84aRZlCGkdl_aUVn97BewTTA/file# [following]
--2023-01-21 13:49:57--  https://uc3767c9bba86890a82ed0c65dcb.dl.dropboxusercontent.com/cd/0/inline/B09hdw4dYBXC8rnFKa

In [26]:
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
%pip install -qr requirements.txt  # install

import torch
import utils
display = utils.notebook_init()  # checks

YOLOv5 🚀 v7.0-72-g064365d Python-3.8.10 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15110MiB)


Setup complete ✅ (8 CPUs, 51.0 GB RAM, 28.9/166.8 GB disk)


In [27]:
print(torch.__version__, torch.cuda.is_available())


1.13.1+cu116 True


In [28]:
#model_yolo = torch.hub.load('/content/yolov5', 'custom', path='/content/yolov5train1.pt', source='local') /content/gdrive/MyDrive/Smartathon/bbestxyolo5.pt
#model_yolo = torch.hub.load('/content/yolov5', 'custom', path='/content/gdrive/MyDrive/Smartathon/bbestxyolo5.pt', source='local')
model_yolo = torch.hub.load('/content/yolov5', 'custom', path='/content/phase3_yolo5_trained.pt', source='local')



YOLOv5 🚀 v7.0-72-g064365d Python-3.8.10 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15110MiB)

Fusing layers... 
Model summary: 322 layers, 86233975 parameters, 0 gradients, 203.9 GFLOPs
Adding AutoShape... 


In [29]:
from tqdm import tqdm

count_test = 0 

smart_dict={'GRAFFITI' : 0.0 , 'FADED_SIGNAGE': 1.0 , 'POTHOLES': 2.0,
            'GARBAGE' : 3.0 , 'CONSTRUCTION_ROAD': 4.0 , 'BROKEN_SIGNAGE': 5.0,
            'BAD_STREETLIGHT' : 6.0 , 'BAD_BILLBOARD': 7.0 , 'SAND_ON_ROAD':8.0,
            'CLUTTER_SIDEWALK' : 9.0 , 'UNKEPT_FACADE': 10.0}



sub_df = pd.DataFrame(columns=['class','image_path','name','xmax','xmin','ymax','ymin'])
for index, row in tqdm(test_df.iterrows()):  
  t_path = '/content/dataset/dataset/images/'+row["image_path"]
  outputs = model_yolo(t_path) #, save=False, save_txt=False) 
  pred_boxes = outputs.pandas().xyxy[0]#.iloc[0]
  break
print(pred_boxes)

0it [00:00, ?it/s]

          xmin        ymin         xmax        ymax  confidence  class  \
0  1212.592285  346.484222  1420.863770  440.897034    0.728607      0   
1   768.800659  498.822937   862.331787  572.478638    0.675482      5   

            name  
0  BAD_BILLBOARD  
1        GARBAGE  





In [30]:
!pip install ensemble-boxes

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ensemble-boxes
  Downloading ensemble_boxes-1.0.9-py3-none-any.whl (23 kB)
Installing collected packages: ensemble-boxes
Successfully installed ensemble-boxes-1.0.9


In [31]:
def yolobbox2bbox(x,y,w,h):
    x1, y1 = x-w/2, y-h/2
    x2, y2 = x+w/2, y+h/2
    return x1, y1, x2, y2

def unconvert(width, height, x, y, w, h):

    xmax = int((x*width) + (w * width)/2.0)
    xmin = int((x*width) - (w * width)/2.0)
    ymax = int((y*height) + (h * height)/2.0)
    ymin = int((y*height) - (h * height)/2.0)
    return (xmin, xmax, ymin, ymax)

In [32]:
# import csv
# import pandas as pd
# import datetime as dt
# from os import listdir
# import os
# import numpy as np
# from numpy import mean, sqrt, square, arange
# import matplotlib.pyplot as plt
# from sklearn import neighbors
# from sklearn.model_selection import train_test_split
# from mpl_toolkits.mplot3d import Axes3D

# import pandas as pd
# RawData = pd.read_csv("/content/train-with-gps.csv")
# #RawData

# X = RawData[['lat', 'long']]
# X = X.values.reshape((len(X),2))
# y = RawData[['class']]
# y = y.values.reshape((len(y),1))

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

# #Create the model and predict
# nn = 17955

# weights = 'distance'
# knn = neighbors.KNeighborsRegressor(nn, weights=weights)
# y_pred = knn.fit(X_train, y_train).predict(X_test)
# y_pred
# err = y_pred - y_test
# rms = sqrt(mean(square(err)))

# print(rms)

# #demo
# X = [[24.6658197, 46.7609689]]
# X2 = np.array(X)

# #X =## X.reshape((len(X),2))
# res = knn.predict(X2)
# print(res[0][0])

In [33]:
# !pip install piexif==0.7.1
# !pip install ExifRead
# !pip install gpsphoto
# from GPSPhoto import gpsphoto

In [34]:

# metadata = gpsphoto.getGPSData("/content/dataset/dataset/images/138b1dc82005b4c33e4886260649d313.jpg")
# lat = metadata['Latitude']
# longg = metadata['Longitude']

# print(lat,longg)
# X = [[lat,longg]]
# X2 = np.array(X)
# res = knn.predict(X2)
# res = float(round(res[0][0]))
# print(res)



In [35]:
from ensemble_boxes import *

w = 1920
h = 1080
sub_df = pd.DataFrame(columns=['class','image_path','name','xmax','xmin','ymax','ymin'])

smart_dict={'GRAFFITI' : 0.0 , 'FADED_SIGNAGE': 1.0 , 'POTHOLES': 2.0,
            'GARBAGE' : 3.0 , 'CONSTRUCTION_ROAD': 4.0 , 'BROKEN_SIGNAGE': 5.0,
            'BAD_STREETLIGHT' : 6.0 , 'BAD_BILLBOARD': 7.0 , 'SAND_ON_ROAD':8.0,
            'CLUTTER_SIDEWALK' : 9.0 , 'UNKEPT_FACADE': 10.0}

smart_dict_inv={0.0: 'GRAFFITI' , 1.0: 'FADED_SIGNAGE' , 2.0: 'POTHOLES',
            3.0: 'GARBAGE' , 4.0: 'CONSTRUCTION_ROAD' , 5.0: 'BROKEN_SIGNAGE',
            6.0: 'BAD_STREETLIGHT' , 7.0: 'BAD_BILLBOARD' , 8.0: 'SAND_ON_ROAD',
            9.0: 'CLUTTER_SIDEWALK' , 10.0: 'UNKEPT_FACADE'}
countt = 0
for index, row in tqdm(test_df.iterrows()):  
  t_path = '/content/dataset/dataset/images/'+row["image_path"]
  im = cv2.imread(t_path)
  outputs_detectron = predictor(im)
  outputs_yolo = model_yolo(t_path) #, size=(960,540))

  pred_boxes_detectron = outputs_detectron['instances'].pred_boxes.tensor.tolist()
  pred_scores_detectron = outputs_detectron['instances'].scores.tolist()
  pred_classes_detectron = outputs_detectron['instances'].pred_classes
  
  pred_boxes_detectron_final = []
  for idx in range(len(pred_boxes_detectron)):
    xmin, ymin, xmax, ymax = pred_boxes_detectron[idx]
    xcenter = xmin+abs((xmax-xmin)/2)
    ycenter = ymin+abs((ymax-ymin)/2)
    xlength = abs((xmax - xcenter))
    ylength = abs((ymax - ycenter))
    xmin = xcenter - 2*xlength # - 10
    ymin = ycenter - 2*ylength #- 10
    xmax = xcenter + 2*xlength #- 10
    ymax = ycenter + 2*ylength #- 10
    
    xMin = max(0, xmin)/w
    yMin = max(0, ymin)/h
    xMax = min(w, xmax)/w
    yMax = min(h, ymax)/h
    
    pred_boxes_detectron_final.append([xMin, yMin, xMax, yMax])


    


  pred_boxes_yolo = outputs_yolo.pandas().xyxy[0]
  pred_scores_yolo = []
  pred_boxes_yolo_final = []
  pred_classes_yolo = [] 
  for idx in range(len(pred_boxes_yolo)):
    xmin, ymin, xmax, ymax,score,cls,name = pred_boxes_yolo.iloc[idx]
    xMin = max(0, xmin)/w
    yMin = max(0, ymin)/h
    xMax = min(w, xmax)/w
    yMax = min(h, ymax)/h



    pred_boxes_yolo_final.append([xMin, yMin, xMax, yMax])

    pred_scores_yolo.append(score)
    pred_classes_yolo.append(smart_dict[name])
    

  boxes_list = []
  scores_list = []
  labels_list = []


  pred_classes_detectron_final = []
  pred_classes_detectron_final = []
  for idx in range(len(pred_classes_detectron)):
    name = class_names[pred_classes_detectron[idx]]
    if name == 'None':
      continue
    pclass = smart_dict[name]
    pred_classes_detectron_final.append(pclass)


  if len(pred_boxes_detectron_final) == 0 and len(pred_boxes_yolo_final) > 0:
    boxes_list.append(pred_boxes_yolo_final)
    boxes_list.append(pred_boxes_yolo_final)
    scores_list.append(pred_scores_yolo)
    scores_list.append(pred_scores_yolo)
    labels_list.append(pred_classes_yolo)
    labels_list.append(pred_classes_yolo)
  elif len(pred_boxes_detectron_final) > 0 and len(pred_boxes_yolo_final) == 0:
    boxes_list.append(pred_boxes_detectron_final)
    boxes_list.append(pred_boxes_detectron_final)
    scores_list.append(pred_scores_detectron)
    scores_list.append(pred_scores_detectron)
    labels_list.append(pred_classes_detectron_final)
    labels_list.append(pred_classes_detectron_final)
  elif len(pred_boxes_detectron_final) > 0 and len(pred_boxes_yolo_final) > 0:
    boxes_list.append(pred_boxes_detectron_final)
    boxes_list.append(pred_boxes_yolo_final)
    scores_list.append(pred_scores_detectron)
    scores_list.append(pred_scores_yolo)
    labels_list.append(pred_classes_detectron_final)
    labels_list.append(pred_classes_yolo)
  else:
    # metadata = gpsphoto.getGPSData(t_path)
    # lat = metadata['Latitude']
    # longg = metadata['Longitude']
    # #print(lat,longg)
    # X = [[lat,longg]]
    # X2 = np.array(X)
    # res = knn.predict(X2)
    # possible_class = float(round(res[0][0]))
    # possible_name = smart_dict_inv[possible_class]
        #print(res)

    #sub_df = sub_df.append({'class':possible_class, 'image_path':row["image_path"], 'name':possible_name, 'xmax':1260, 'xmin':660, 'ymax':660, 'ymin':420}, ignore_index=True)
    countt+=1
    sub_df = sub_df.append({'class':3.0, 'image_path':row["image_path"], 'name':"GARBAGE", 'xmax':1260, 'xmin':660, 'ymax':660, 'ymin':420}, ignore_index=True)

    continue



  iou_thr = 0.39 
  skip_box_thr = 0.0001
  sigma = 0.1
  weights = [1,1]
  #print("-------")
  boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr)
  if len(boxes)>0:
    passit = True
    for idx in range(len(boxes)):
      if scores[idx]>=0.34:  
        nxmin, nymin, nxmax, nymax = boxes[idx]
        xmin = nxmin*w 
        ymin = nymin*h
        xmax = nxmax*w
        ymax = nymax*h
        xmin = xmin/2
        ymin = ymin/2
        xmax = xmax/2
        ymax = ymax/2

       


        pclass = labels[idx]
        name = smart_dict_inv[pclass]

        sub_df = sub_df.append({'class':pclass, 'image_path':row["image_path"], 'name':name, 'xmax':xmax, 'xmin':xmin, 'ymax':ymax, 'ymin':ymin}, ignore_index=True)
        passit = False
      
    if passit == True:
        # metadata = gpsphoto.getGPSData(t_path)
        # lat = metadata['Latitude']
        # longg = metadata['Longitude']
        # #print(lat,longg)
        # X = [[lat,longg]]
        # X2 = np.array(X)
        # res = knn.predict(X2)
        # possible_class = float(round(res[0][0]))
        # possible_name = smart_dict_inv[possible_class]
        #print(res)

        #sub_df = sub_df.append({'class':possible_class, 'image_path':row["image_path"], 'name':possible_name, 'xmax':1260, 'xmin':660, 'ymax':660, 'ymin':420}, ignore_index=True)
        sub_df = sub_df.append({'class':3.0, 'image_path':row["image_path"], 'name':"GARBAGE", 'xmax':1260, 'xmin':660, 'ymax':660, 'ymin':420}, ignore_index=True)

        countt+=1


  else:
    # metadata = gpsphoto.getGPSData(t_path)
    # lat = metadata['Latitude']
    # longg = metadata['Longitude']
    # #print(lat,longg)
    # X = [[lat,longg]]
    # X2 = np.array(X)
    # res = knn.predict(X2)
    # possible_class = float(round(res[0][0]))
    # possible_name = smart_dict_inv[possible_class]
    #sub_df = sub_df.append({'class':possible_class, 'image_path':row["image_path"], 'name':possible_name, 'xmax':1260, 'xmin':660, 'ymax':660, 'ymin':420}, ignore_index=True)
    sub_df = sub_df.append({'class':3.0, 'image_path':row["image_path"], 'name':"GARBAGE", 'xmax':1260, 'xmin':660, 'ymax':660, 'ymin':420}, ignore_index=True)

    countt+=1



2092it [14:56,  2.33it/s]


In [36]:
sub_df

Unnamed: 0,class,image_path,name,xmax,xmin,ymax,ymin
0,7.0,953ab1447c46ecfef67ab14629cd70c7.jpg,BAD_BILLBOARD,708.485298,608.400993,222.832265,170.715517
1,3.0,953ab1447c46ecfef67ab14629cd70c7.jpg,GARBAGE,431.324987,384.284363,286.971302,250.231964
2,3.0,e4ddbaa7970fca225a51288ce5f7d3f9.jpg,GARBAGE,641.811962,492.888393,420.974872,322.010050
3,2.0,5b8120d69607a077b7583334be3ba18b.jpg,POTHOLES,754.232140,524.461098,528.689210,336.412354
4,2.0,5b8120d69607a077b7583334be3ba18b.jpg,POTHOLES,657.323112,447.207241,394.490397,288.078518
...,...,...,...,...,...,...,...
5165,3.0,15afe8bce6c71deed2d53f56b78b90a8.jpg,GARBAGE,818.897095,769.021854,330.899287,289.461347
5166,2.0,15afe8bce6c71deed2d53f56b78b90a8.jpg,POTHOLES,703.571835,516.120186,504.094663,353.291377
5167,3.0,15afe8bce6c71deed2d53f56b78b90a8.jpg,GARBAGE,676.223946,598.567600,438.099028,374.612106
5168,9.0,984841724143c66506986b3faa50e4b9.jpg,CLUTTER_SIDEWALK,959.523239,598.872185,316.865712,198.981741


In [37]:
#sub_df.loc[sub_df['image_path'] == "d3842adfda2cefd5e3cdb588fc7d52b7.jpg"]

In [38]:
sub_df.to_csv("/content/sub_finalv1.csv",index=False)