Mount drive onto Google colab, to load dataset.
Dataset contains 800 images and 800 annotations of wet and dry waste.


In [None]:
from google.colab import drive 
drive.mount('/content/drive')

Installation instructions:

Run these commands:

git clone https://github.com/Tessellate-Imaging/Monk_Object_Detection.git

cd Monk_Object_Detection/5_pytorch_retinanet/installation

Select the right requirements file and run

cat requirements_cuda.txt | xargs -n 1 -L 1 pip install

In [None]:
! git clone https://github.com/Tessellate-Imaging/Monk_Object_Detection.git

In [None]:
# For colab use the command below
! cd Monk_Object_Detection/5_pytorch_retinanet/installation && cat requirements_colab.txt | xargs -n 1 -L 1 pip install

# For Local systems and cloud select the other file
#! cd Monk_Object_Detection/5_pytorch_retinanet/installation && cat requirements.txt | xargs -n 1 -L 1 pip install

In [None]:
import os
os.chdir('/content/drive/My Drive')

Annotations are in Pascal VOC format.

They need to be converted to COCO type via Monk type format.

Step 1:To convert from VOC to Monk type:

In [None]:
!pip install xmltodict

In [None]:
import os
import sys
import numpy as np
import pandas as pd

import xmltodict
import json
from tqdm.notebook import tqdm

from pycocotools.coco import COCO

In [None]:
root_dir = "Waste_Dataset/";
img_dir = "Images_merged/";
anno_dir = "Annotations_merged/";

In [None]:
files = os.listdir(root_dir + anno_dir);
 print(files)

In [None]:
combined = [];
for i in tqdm(range(len(files))):
    annoFile = root_dir + "/" + anno_dir + "/" + files[i];
    f = open(annoFile, 'r');
    my_xml = f.read();
    anno = dict(dict(xmltodict.parse(my_xml))["annotation"])
    fname = anno["filename"];
    label_str = "";
    if(type(anno["object"]) == list ):
        for j in range(len(anno["object"])):
            obj = dict(anno["object"][j]);
            label = anno["object"][j]["name"];
            bbox = dict(anno["object"][j]["bndbox"])
            x1 = bbox["xmin"];
            y1 = bbox["ymin"];
            x2 = bbox["xmax"];
            y2 = bbox["ymax"];
            if(j == len(anno["object"])-1):
                label_str += x1 + " " + y1 + " " + x2 + " " + y2 + " " + label;
            else:        
                label_str += x1 + " " + y1 + " " + x2 + " " + y2 + " " + label + " ";
    else:
        obj = dict(anno["object"]);
        label = anno["object"]["name"];
        bbox = dict(anno["object"]["bndbox"])
        x1 = bbox["xmin"];
        y1 = bbox["ymin"];
        x2 = bbox["xmax"];
        y2 = bbox["ymax"];
        
        label_str += x1 + " " + y1 + " " + x2 + " " + y2 + " " + label;
    
    
    combined.append([fname, label_str])

In [None]:
print(combined)


In [None]:
df = pd.DataFrame(combined, columns = ['ID', 'Label']);
df.to_csv(root_dir + "/train_labels.csv", index=False);

Step 2: Monk type to COCO type format

In [None]:
import os
import numpy as np 
import cv2
import dicttoxml
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString
from tqdm import tqdm
import shutil
import json
import pandas as pd

In [None]:
root = "Waste_Dataset/";
img_dir = "Images_merged/";
anno_file = "train_labels.csv";

In [None]:
dataset_path = root;
images_folder = root + "/" + img_dir;
annotations_path = root + "/annotations/";

In [None]:
if not os.path.isdir(annotations_path):
    os.mkdir(annotations_path)
    
input_images_folder = images_folder;
input_annotations_path = root + "/" + anno_file;

In [None]:
output_dataset_path = root;
output_image_folder = input_images_folder;
output_annotation_folder = annotations_path;

tmp = img_dir.replace("/", "");
output_annotation_file = output_annotation_folder + "/instances_" + tmp + ".json";
output_classes_file = output_annotation_folder + "/classes.txt";

In [None]:
if not os.path.isdir(output_annotation_folder):
    os.mkdir(output_annotation_folder);

In [None]:
df = pd.read_csv(input_annotations_path);
columns = df.columns

In [None]:
delimiter = " ";

In [None]:
list_dict = [];
anno = [];
for i in range(len(df)):
    img_name = df[columns[0]][i];
    labels = df[columns[1]][i];
    tmp = labels.split(delimiter);
    for j in range(len(tmp)//5):
        label = tmp[j*5+4];
        if(label not in anno):
            anno.append(label);
    anno = sorted(anno)
    
for i in tqdm(range(len(anno))):
    tmp = {};
    tmp["supercategory"] = "master";
    tmp["id"] = i;
    tmp["name"] = anno[i];
    list_dict.append(tmp);

anno_f = open(output_classes_file, 'w');
for i in range(len(anno)):
    anno_f.write(anno[i] + "\n");
anno_f.close();

In [None]:
coco_data = {};
coco_data["type"] = "instances";
coco_data["images"] = [];
coco_data["annotations"] = [];
coco_data["categories"] = list_dict;
image_id = 0;
annotation_id = 0;


for i in tqdm(range(len(df))):
    img_name = df[columns[0]][i];
    labels = df[columns[1]][i];
    tmp = labels.split(delimiter);
    image_in_path = input_images_folder + "/" + img_name;
    print(image_in_path)
    img = cv2.imread(image_in_path, 1);
    h, w, c = img.shape;

    images_tmp = {};
    images_tmp["file_name"] = img_name;
    images_tmp["height"] = h;
    images_tmp["width"] = w;
    images_tmp["id"] = image_id;
    coco_data["images"].append(images_tmp);
    

    for j in range(len(tmp)//5):
        x1 = int(tmp[j*5+0]);
        y1 = int(tmp[j*5+1]);
        x2 = int(tmp[j*5+2]);
        y2 = int(tmp[j*5+3]);
        label = tmp[j*5+4];
        annotations_tmp = {};
        annotations_tmp["id"] = annotation_id;
        annotation_id += 1;
        annotations_tmp["image_id"] = image_id;
        annotations_tmp["segmentation"] = [];
        annotations_tmp["ignore"] = 0;
        annotations_tmp["area"] = (x2-x1)*(y2-y1);
        annotations_tmp["iscrowd"] = 0;
        annotations_tmp["bbox"] = [x1, y1, x2-x1, y2-y1];
        annotations_tmp["category_id"] = anno.index(label);

        coco_data["annotations"].append(annotations_tmp)
    image_id += 1;

outfile =  open(output_annotation_file, 'w');
json_str = json.dumps(coco_data, indent=4);
outfile.write(json_str);
outfile.close();

Use already trained model to train for detection:

In [None]:
import os
import sys
print(sys.path.append("Monk_Object_Detection/5_pytorch_retinanet/lib/"))

In [None]:
from train_detector import Detector
gtf = Detector();

In [None]:
root_dir = "./";
coco_dir="Waste_Dataset";
img_dir = "./";
set_dir = "Images_merged";

In [None]:
gtf.Train_Dataset(root_dir,coco_dir, img_dir, set_dir, batch_size=2, use_gpu=True)

In [None]:
gtf.system_dict["local"]["dataset_train"].classes

Model used: resnet50

In [None]:
gtf.Model(model_name="resnet50");

In [None]:
gtf.Set_Hyperparams(lr=0.0001, print_interval=20)

For training number of epochs used is 8:

In [None]:
gtf.Train(num_epochs=8, output_model_name="final_model.pt");

In [None]:
import os
import sys
sys.path.append("Monk_Object_Detection/5_pytorch_retinanet/lib/");

In [None]:
from infer_detector import Infe

In [None]:
gtf = Infer();

In [None]:
print(gtf.Model(model_path="final_model.pt"))

In [None]:
f = open("Waste_Dataset/annotations/classes.txt", 'r');
class_list = f.readlines();
f.close();
for i in range(len(class_list)):
    class_list[i] = class_list[i][:-1]

In [None]:
len(class_list)

To test on sample images for detection:

In [None]:
img_path = "Waste_Dataset/Images_merged/plastic251.jpg";
scores, labels, boxes = gtf.Predict(img_path, class_list, vis_threshold=0.4);
from IPython.display import Image
Image(filename='output.jpg')

In [None]:
img_path = "Waste_Dataset/Images_merged/plastic258.jpg";
scores, labels, boxes = gtf.Predict(img_path, class_list, vis_threshold=0.4);
from IPython.display import Image
Image(filename='output.jpg')

In [None]:
img_path = "Waste_Dataset/Images_merged/plastic249.jpg";
scores, labels, boxes = gtf.Predict(img_path, class_list, vis_threshold=0.4);
from IPython.display import Image
Image(filename='output.jpg')

In [None]:
img_path = "Waste_Dataset/Images_merged/plastic236.jpg";
scores, labels, boxes = gtf.Predict(img_path, class_list, vis_threshold=0.4);
from IPython.display import Image
Image(filename='output.jpg')

In [None]:
img_path = "Waste_Dataset/Images_merged/O_41.jpg";
scores, labels, boxes = gtf.Predict(img_path, class_list, vis_threshold=0.4);
from IPython.display import Image
Image(filename='output.jpg')

In [None]:
img_path = "Waste_Dataset/Images_merged/O_37.jpg";
scores, labels, boxes = gtf.Predict(img_path, class_list, vis_threshold=0.4);
from IPython.display import Image
Image(filename='output.jpg')

In [None]:
img_path = "Waste_Dataset/Images_merged/image107.jpg";
scores, labels, boxes = gtf.Predict(img_path, class_list, vis_threshold=0.4);
from IPython.display import Image
Image(filename='output.jpg')