In [1]:
!pip install torch torchvision segmentation-models-pytorch opencv-python-headless


Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.3.4-py3-none-any.whl.metadata (30 kB)
Collecting efficientnet-pytorch==0.7.1 (from segmentation-models-pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting timm==0.9.7 (from segmentation-models-pytorch)
  Downloading timm-0.9.7-py3-none-any.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting munch (from pretrainedmodels==0.7.4->segmentation-models-pytorch)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading segmentation_mod

In [3]:
import os
import torch
from PIL import Image
import numpy as np
import torchvision.transforms as T
from tqdm import tqdm
from torchvision import models

class Label:
    def __init__(self, name, color):
        self.name = name
        self.color = color

# labels and colors
labels = [
    Label('road', (128, 64, 128)),
    Label('sidewalk', (244, 35, 232)),
    Label('building', (70, 70, 70)),
    Label('wall', (102, 102, 156)),
    Label('fence', (190, 153, 153)),
    Label('pole', (153, 153, 153)),
    Label('traffic light', (250, 170, 30)),
    Label('traffic sign', (220, 220, 0)),
    Label('vegetation', (107, 142, 35)),
    Label('terrain', (152, 251, 152)),
    Label('sky', (70, 130, 180)),
    Label('person', (220, 20, 60)),
    Label('rider', (255, 0, 0)),
    Label('car', (0, 0, 142)),
    Label('truck', (0, 0, 70)),
    Label('bus', (0, 60, 100)),
    Label('train', (0, 80, 100)),
    Label('motorcycle', (0, 0, 230)),
    Label('bicycle', (119, 11, 32))
]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Loading HRnet
model = models.segmentation.deeplabv3_resnet101(pretrained=True).to(device)
model.eval()

# image transformation
transform = T.Compose([
    T.Resize((1024, 2048)), 
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def decode_segmap(image, nc=21):
    label_colors = np.array([
        [0, 0, 0],       
        [128, 64, 128],  
        [244, 35, 232],  
        [70, 70, 70],    
        [102, 102, 156], 
        [190, 153, 153], 
        [153, 153, 153], 
        [250, 170, 30],  
        [220, 220, 0],   
        [107, 142, 35], 
        [152, 251, 152], 
        [70, 130, 180],  
        [220, 20, 60],   
        [255, 0, 0],     
        [0, 0, 142],     
        [0, 0, 70],      
        [0, 60, 100],    
        [0, 80, 100],    
        [0, 0, 230],     
        [119, 11, 32]   
    ])

    r = np.zeros_like(image).astype(np.uint8)
    g = np.zeros_like(image).astype(np.uint8)
    b = np.zeros_like(image).astype(np.uint8)

    for l in range(0, nc):
        idx = image == l
        r[idx] = label_colors[l, 0]
        g[idx] = label_colors[l, 1]
        b[idx] = label_colors[l, 2]

    rgb = np.stack([r, g, b], axis=2)
    return rgb

def segment_image(image_path):
    image = Image.open(image_path).convert('RGB')
    input_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)['out']
    output_predictions = output.argmax(1).cpu().numpy()[0]

    decoded_image = decode_segmap(output_predictions, nc=len(labels))
    
    return decoded_image

def process_test_images(test_images, test_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for img_file in tqdm(test_images):
        img_path = os.path.join(test_dir, img_file)
        segmented_image = segment_image(img_path)
        
        output_img = Image.fromarray(segmented_image)
        output_img.save(os.path.join(output_dir, img_file))  

test_dir = '/kaggle/input/iitg-ai-overnight-hackathon-2024/dataset/dataset/test'
output_dir = '/kaggle/working/high_quality_segment'

# List of test images 
test_images = [
    "frame0000_leftImg8bit.jpg", "frame0014_leftImg8bit.jpg", "frame0119_leftImg8bit.jpg",
    "frame0199_leftImg8bit.jpg", "frame0277_leftImg8bit.jpg", "frame0340_leftImg8bit.jpg",
    "frame0342_leftImg8bit.jpg", "frame0364_leftImg8bit.jpg", "frame0514_leftImg8bit.jpg",
    "frame0550_leftImg8bit.jpg", "frame0604_leftImg8bit.jpg", "frame0664_leftImg8bit.jpg",
    "frame0671_leftImg8bit.jpg", "frame0704_leftImg8bit.jpg", "frame0724_leftImg8bit.jpg",
    "frame0821_leftImg8bit.jpg", "frame0874_leftImg8bit.jpg", "frame0918_leftImg8bit.jpg",
    "frame0924_leftImg8bit.jpg", "frame0964_leftImg8bit.jpg", "frame0982_leftImg8bit.jpg",
    "frame1002_leftImg8bit.jpg", "frame1014_leftImg8bit.jpg", "frame10376_leftImg8bit.jpg",
    "frame1047_leftImg8bit.jpg", "frame10676_leftImg8bit.jpg", "frame10758_leftImg8bit.jpg",
    "frame1084_leftImg8bit.jpg", "frame11003_leftImg8bit.jpg", "frame11630_leftImg8bit.jpg",
    "frame1174_leftImg8bit.jpg", "frame12146_leftImg8bit.jpg", "frame1227_leftImg8bit.jpg",
    "frame1263_leftImg8bit.jpg", "frame1347_leftImg8bit.jpg", "frame1348_leftImg8bit.jpg",
    "frame1356_leftImg8bit.jpg", "frame1384_leftImg8bit.jpg", "frame1393_leftImg8bit.jpg",
    "frame1428_leftImg8bit.jpg", "frame1518_leftImg8bit.jpg", "frame1527_leftImg8bit.jpg",
    "frame1534_leftImg8bit.jpg", "frame1565_leftImg8bit.jpg", "frame1591_leftImg8bit.jpg",
    "frame1630_leftImg8bit.jpg", "frame1647_leftImg8bit.jpg", "frame1654_leftImg8bit.jpg",
    "frame1793_leftImg8bit.jpg", "frame1833_leftImg8bit.jpg", "frame1848_leftImg8bit.jpg",
    "frame1866_leftImg8bit.jpg", "frame1878_leftImg8bit.jpg", "frame1898_leftImg8bit.jpg",
    "frame1900_leftImg8bit.jpg", "frame1918_leftImg8bit.jpg", "frame2014_leftImg8bit.jpg",
    "frame2043_leftImg8bit.jpg", "frame2046_leftImg8bit.jpg", "frame2058_leftImg8bit.jpg",
    "frame2063_leftImg8bit.jpg", "frame2132_leftImg8bit.jpg", "frame2193_leftImg8bit.jpg",
    "frame2293_leftImg8bit.jpg", "frame2314_leftImg8bit.jpg", "frame2423_leftImg8bit.jpg",
    "frame2528_leftImg8bit.jpg", "frame2584_leftImg8bit.jpg", "frame2598_leftImg8bit.jpg",
    "frame2796_leftImg8bit.jpg", "frame2824_leftImg8bit.jpg", "frame2838_leftImg8bit.jpg",
    "frame2944_leftImg8bit.jpg", "frame2949_leftImg8bit.jpg", "frame2976_leftImg8bit.jpg",
    "frame2993_leftImg8bit.jpg", "frame3048_leftImg8bit.jpg", "frame3094_leftImg8bit.jpg",
    "frame3134_leftImg8bit.jpg", "frame3263_leftImg8bit.jpg", "frame3276_leftImg8bit.jpg",
    "frame3334_leftImg8bit.jpg", "frame3348_leftImg8bit.jpg", "frame3486_leftImg8bit.jpg",
    "frame3532_leftImg8bit.jpg", "frame3592_leftImg8bit.jpg", "frame3618_leftImg8bit.jpg",
    "frame3691_leftImg8bit.jpg", "frame3724_leftImg8bit.jpg", "frame3828_leftImg8bit.jpg",
    "frame3862_leftImg8bit.jpg", "frame3966_leftImg8bit.jpg", "frame3967_leftImg8bit.jpg",
    "frame4043_leftImg8bit.jpg", "frame4266_leftImg8bit.jpg", "frame4283_leftImg8bit.jpg",
    "frame4428_leftImg8bit.jpg", "frame4566_leftImg8bit.jpg", "frame4763_leftImg8bit.jpg",
    "frame4986_leftImg8bit.jpg", "frame5153_leftImg8bit.jpg", "frame5196_leftImg8bit.jpg",
    "frame5393_leftImg8bit.jpg", "frame5406_leftImg8bit.jpg", "frame5573_leftImg8bit.jpg",
    "frame5586_leftImg8bit.jpg", "frame5783_leftImg8bit.jpg", "frame6009_leftImg8bit.jpg",
    "frame6216_leftImg8bit.jpg", "frame6396_leftImg8bit.jpg", "frame6473_leftImg8bit.jpg",
    "frame6893_leftImg8bit.jpg", "frame6966_leftImg8bit.jpg", "frame7283_leftImg8bit.jpg",
    "frame7433_leftImg8bit.jpg", "frame7446_leftImg8bit.jpg", "frame8826_leftImg8bit.jpg",
    "frame9426_leftImg8bit.jpg"]

process_test_images(test_images, test_dir, output_dir)


100%|██████████| 118/118 [02:35<00:00,  1.31s/it]


In [4]:
import cv2
import numpy as np
import pandas as pd
import json
import os
import re  

class Label:
    def __init__(self, name, color):
        self.name = name
        self.color = color

labels = [
    Label('road', (128, 64, 128)),
    Label('sidewalk', (244, 35, 232)),
    Label('building', (70, 70, 70)),
    Label('wall', (102, 102, 156)),
    Label('fence', (190, 153, 153)),
    Label('pole', (153, 153, 153)),
    Label('traffic light', (250, 170, 30)),
    Label('traffic sign', (220, 220, 0)),
    Label('vegetation', (107, 142, 35)),
    Label('terrain', (152, 251, 152)),
    Label('sky', (70, 130, 180)),
    Label('person', (220, 20, 60)),
    Label('rider', (255, 0, 0)),
    Label('car', (0, 0, 142)),
    Label('truck', (0, 0, 70)),
    Label('bus', (0, 60, 100)),
    Label('train', (0, 80, 100)),
    Label('motorcycle', (0, 0, 230)),
    Label('bicycle', (119, 11, 32))
]

# extract polygons from a binary mask
def extract_polygons(mask):
    gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) if len(mask.shape) == 3 else mask
    contours, _ = cv2.findContours(gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    polygons = []
    for contour in contours:
        epsilon = 0.01 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)
        polygon = approx.reshape(-1, 2).tolist()
        polygons.append(polygon)
    return polygons

# map colors to labels and extract polygons
def convert_mask_to_objects(mask):
    objects = []
    for label in labels:
        mask_label = cv2.inRange(mask, np.array(label.color), np.array(label.color))
        polygons = extract_polygons(mask_label)
        for polygon in polygons:
            objects.append({"label": label.name, "polygon": polygon})
    return objects

# saving to csv
def process_masks(mask_dir, output_csv):
    data = []
    print(f"Processing directory: {mask_dir}")

    mask_files = [f for f in os.listdir(mask_dir) if f.endswith(".jpg")]
    mask_files.sort(key=lambda x: int(re.search(r'(\d+)', x).group()))  

    for mask_file in mask_files:
        mask_path = os.path.join(mask_dir, mask_file)
        print(f"Processing file: {mask_path}")
        mask = cv2.imread(mask_path)

        if mask is None:
            print(f"Error reading file: {mask_path}")
            continue 

        frame_id = os.path.splitext(mask_file)[0]
        objects = convert_mask_to_objects(mask)
        if objects:
            print(f"Objects found for frame {frame_id}: {objects}")
        else:
            print(f"No objects found for frame {frame_id}")
        data.append({"id": frame_id, "objects": json.dumps(objects)})

    if data:
        df = pd.DataFrame(data, columns=["id", "objects"])
        df.to_csv(output_csv, index=False)
        print(f"CSV file saved at: {output_csv}")
    else:
        print("No data to save to CSV.")


mask_dir = "/kaggle/working/high_quality_segment"
output_csv = "/kaggle/working/segmentation_output.csv"
process_masks(mask_dir, output_csv)

print("Processing completed.")


Processing directory: /kaggle/working/high_quality_segment
Processing file: /kaggle/working/high_quality_segment/frame0000_leftImg8bit.jpg
No objects found for frame frame0000_leftImg8bit
Processing file: /kaggle/working/high_quality_segment/frame0014_leftImg8bit.jpg
No objects found for frame frame0014_leftImg8bit
Processing file: /kaggle/working/high_quality_segment/frame0119_leftImg8bit.jpg
No objects found for frame frame0119_leftImg8bit
Processing file: /kaggle/working/high_quality_segment/frame0199_leftImg8bit.jpg
No objects found for frame frame0199_leftImg8bit
Processing file: /kaggle/working/high_quality_segment/frame0277_leftImg8bit.jpg
No objects found for frame frame0277_leftImg8bit
Processing file: /kaggle/working/high_quality_segment/frame0340_leftImg8bit.jpg
No objects found for frame frame0340_leftImg8bit
Processing file: /kaggle/working/high_quality_segment/frame0342_leftImg8bit.jpg
No objects found for frame frame0342_leftImg8bit
Processing file: /kaggle/working/high_