## Create data in coco annotation format

In [1]:
import numpy as np
from PIL import Image
from skimage import measure
from shapely.geometry import Polygon, MultiPolygon
import glob
import re
import os
import cv2
import json
from tqdm import tqdm_notebook, tqdm

In [2]:
def create_sub_masks(mask_image):
    width, height = mask_image.size
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            pixel = mask_image.getpixel((x,y))
            
            if pixel!=0:
                pixel_str = str(pixel)
                sub_mask = sub_masks.get(pixel_str)
                if sub_mask is None:
                    sub_masks[pixel_str] = Image.new('1', (width+2, height+2))
                sub_masks[pixel_str].putpixel((x+1,y+1),1)
                
    return sub_masks
                    

In [3]:
def create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):
    
    contours = measure.find_contours(sub_mask,0.5,positive_orientation='low')
    
    segmentations = []
    polygons = []
    for contour in contours:
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)
            
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)
        polygons.append(poly)
        
        segmentation = np.array(poly.exterior.coords).ravel().tolist()
        segmentations.append(segmentation)
        
    multi_poly = MultiPolygon(polygons)
    x,y, max_x, max_y = multi_poly.bounds
    width = max_x - x
    height = max_y - y
    bbox = (x,y,width, height)
    area = multi_poly.area

    annotation = {
    'id':annotation_id,
    'image_id':image_id,
    'category_id':category_id,
    'bbox':bbox,
    'area':area,
    'segmentation':segmentations,
    'iscrowd':is_crowd,
    }
    return annotation

In [4]:

def get_categories_id(ann_img,categories_dict):
    img_dict = {}
    img_id = os.path.basename(ann_img).split('.')[0]
    i = Image.open(ann_img)

    pixels = i.load() # this is not a list, nor is it list()'able
    width, height = i.size

    all_pixels = []
    for x in range(width):
        for y in range(height):
            cpixel = pixels[x, y]
            all_pixels.append(cpixel)
    unique_pixels = list(set(all_pixels))
    all_pixel_dict = {}
    for i in unique_pixels:
        all_pixel_dict[str(i)] = categories_dict.get(str(i))
    img_dict[img_id] = all_pixel_dict
    return img_dict

In [5]:
categoryy = {}
all_category = {}
all_ann = glob.glob('/home/dipesh/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/FoodSeg103/Images/ann_dir/test/*.png')
categories_dict_path = "/home/dipesh/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/FoodSeg103/category_id.txt"

with open(categories_dict_path,'r') as f:
    contents = f.readlines()

for line in contents:
    rslt = re.split(r'\t+', line)
    categoryy[rslt[0]] = rslt[1].rstrip('\n')

for all_mask in tqdm_notebook(all_ann):
    rslt = get_categories_id(all_mask,categoryy)
    all_category.update(rslt)
    
for key, value in all_category.items():
    value.pop('0')
    

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for all_mask in tqdm_notebook(all_ann):


  0%|          | 0/2135 [00:00<?, ?it/s]

In [7]:
mask_images = all_ann = glob.glob('/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/FoodSeg103/Images/ann_dir/test/*.png')

category_ids = all_category
is_crowd = 0
annotation_id = 1
annotations = []
final_categories = []
final_images = []
for mask_image in tqdm(mask_images):
    img_id = os.path.basename(mask_image).split('.')[0]
    i = Image.open(mask_image)
    width, height = i.size
    sub_masks = create_sub_masks(i)
    for color, sub_mask in sub_masks.items():
        open_cv_image = np.array(sub_mask)       
        category_id = color
        try:
            annotation = create_sub_mask_annotation(open_cv_image,img_id,category_id, annotation_id, is_crowd)
        except:
            pass
        annotations.append(annotation)
        category_to_be_added = {"id":category_id,"name":category_ids.get(img_id).get(color),"name_readable":category_ids.get(img_id).get(color), "supercategory":"food"}
        image_to_be_added = {"id":img_id, "file_name": img_id+'.jpg', "width": width, "height": height}
        if not category_to_be_added in final_categories:
            final_categories.append(category_to_be_added)
        if not image_to_be_added in final_images:
            final_images.append(image_to_be_added)
        annotation_id +=1
    

100%|███████████████████████████████████████| 2135/2135 [09:44<00:00,  3.65it/s]


In [8]:
to_be_dumped = {
    "categories":final_categories,
    "images":final_images,
    "annotations":annotations,
    "info":{},
}

In [9]:
with open('new_annotation_val.json','w') as f:
    json.dump(to_be_dumped,f)

## Merge two annotations

In [10]:
with open("dataset/val/annotations.json") as fp:
    previous_annotation = json.load(fp)


In [11]:
print(len(previous_annotation["categories"]))
print(len(previous_annotation["annotations"]))
print(len(previous_annotation["images"]))

print(len(to_be_dumped["categories"]))
print(len(to_be_dumped["annotations"]))
print(len(to_be_dumped["images"]))

323
1708
946
103
7697
2135


In [12]:
for i, j in previous_annotation.items():
    if i == "categories":
        for k in to_be_dumped.get("categories"):
            j.append(k)
    if i == "annotations":
        for k in to_be_dumped.get("annotations"):
            j.append(k)
    if i == "images":
        for k in to_be_dumped.get("images"):
            j.append(k)

In [13]:
print(len(previous_annotation.get("categories")))
print(len(previous_annotation.get("images")))
print(len(previous_annotation.get("annotations")))

426
3081
9405


In [14]:
with open('new_annotation_val_2.json','w') as f:
    json.dump(previous_annotation,f)

In [15]:
import shutil
from loguru import logger
def make_dataset(annotations:list,data_type:str) -> str:
    labels = ["chicken","apple","banana","country_fries","pizza_with_ham_with_mushrooms_baked","rice","tomato","bread", "pizza","chicken duck"]
    all_category = [category.get("name") for category in annotations.get("categories")]
    required_category = [value for value in annotations.get("categories") if value.get("name") in labels]
    required_idx = [value.get("id") for value in required_category]
    required_annotations = [value for value in annotations.get("annotations") if value.get("category_id") in required_idx]
    required_images = []
    for ann in tqdm(required_annotations):
        img_idx = ann.get("image_id")
        original_img_idx = img_idx
        if len(str(img_idx)) == 4:
            img_idx = str(0)+str(0)+str(img_idx)
        elif len(str(img_idx)) == 5:
            img_idx = str(0)+str(img_idx)
        if data_type == "train":
            img_path = os.path.join("/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/total_images/train",str(img_idx)+'.jpg')
            dest_path = os.path.join("/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/new_2/train",str(img_idx)+'.jpg')
        else:
            img_path = os.path.join("/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/total_images/val",str(img_idx)+'.jpg')
            dest_path = os.path.join("/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/new_2/val",str(img_idx)+'.jpg')
        
        for i in annotations.get("images"):
            if i.get("id") == original_img_idx:
                required_images.append(i)
    annotations["categories"] = required_category
    annotations["annotations"] = required_annotations
    annotations["images"] = required_images
    logger.info(f"Category:{len(annotations['categories'])}")
    logger.info(f"Images:{len(annotations['images'])}")
    if data_type == "train":
        with open("/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/new_2/train/annotations.json","w") as fp:
            json.dump(annotations,fp)
    else:
        with open("/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/new_2/val/annotations.json","w") as fp:
            json.dump(annotations,fp)
    print("Success")

In [16]:
make_dataset(previous_annotation,"valid")

100%|█████████████████████████████████████| 1626/1626 [00:00<00:00, 7643.02it/s]
2022-04-27 14:02:09.412 | INFO     | __main__:make_dataset:33 - Category:15
2022-04-27 14:02:09.412 | INFO     | __main__:make_dataset:34 - Images:1626


Success


## Make category id same

In [28]:
# read val data
import json
with open("/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/new_2/train/annotations.json","r") as f:
    cat_val = json.load(f)


In [18]:
cat_to_be_replaced = {
    "48":{"categories":"chicken", "id":101308},
    "73":{"categories":"tomato", "id":100089},
    "58":{"categories":"bread", "id":101243},
    "66":{"categories":"rice", "id":101197},
    "29":{"categories":"banana", "id":100133},
    "61":{"categories":"pizza_with_ham_with_mushrooms_baked", "id":101170},
    "25":{"categories":"apple", "id":100130},
}

In [19]:
for key, value in cat_val.items():
    if key == "categories":
        for item in value:
            id = item.get("id")
            item.update({"id":int(id)})
            

In [20]:
for key, value in cat_val.items():
    if key == "images":
        for item in value:
            id = item.get("id")
            item.update({"id":int(id)})

In [21]:
for key, value in cat_val.items():
    if key == "annotations":
        for item in value:
            image_id = item.get("image_id")
            item.update({"image_id":int(image_id)})

In [30]:
for key, value in cat_val.items():
    if key == "annotations":
        for j in value:
            if len(j.get("segmentation")) > 1:
                seg_val = j.get("segmentation")
                j.update({"segmentation":[[j for i in seg_val for j in i]]})

In [23]:
for key, value in cat_val.items():
    if key == "annotations":
        for i in value:
            category_id = i.get("category_id")
            if category_id in cat_to_be_replaced:
                i.update({"category_id":cat_to_be_replaced.get(category_id).get("id")})

In [34]:
for key,value in cat_val.items():
    if key == "images":
        seen = set()
        new_l = []
        for d in value:
            t = tuple(d.items())
            if t not in seen:
                seen.add(t)
                new_l.append(d)
cat_val.update({"images":new_l})

In [35]:
with open("/home/gvc/Desktop/dev/Food-detection-main/Food_Segmentation/dataset/FoodSeg103/new_2/train/annotations.json", "w") as f:
    json.dump(cat_val, f)