In [1]:
import pandas as pd
import os
import json

In [2]:
def get_directories(folder_path):
    directories = [d for d in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, d))]
    return directories

In [3]:
folder_path = './fsoco_bounding_boxes_train'
directories = get_directories(folder_path)

In [4]:
#print(directories)

In [5]:
def get_labels_path(directories):
    labels_path_list = []
    for dir in directories:
        labels_path = os.path.join(folder_path, dir + "/ann")
        json_files = [os.path.join(labels_path, f) for f in os.listdir(labels_path) if f.endswith('.json')]
        # print(dir, "Len:", len(json_files))
        labels_path_list.extend(json_files)
    # print("Total Length:", len(labels_path_list))
    return labels_path_list

In [6]:
JSON_LABELS_PATH = get_labels_path(directories)
print("length", len(JSON_LABELS_PATH))

length 11572


In [7]:
def read_labels_data(json_files):
    data = []
    for json_file in json_files:
        # new_pair = {"path_to_file" : str(json_file)}
        with open(json_file, 'r') as f:
            json_data = json.load(f)
            # objects_collected = json_data["objects"]
            objects_collected = [d.update({"path_to_file" : str(json_file), "img_height": json_data["size"]["height"], "img_width": json_data["size"]["width"]}) or d for d in json_data["objects"]]
            # objects_collected.append(new_pair)
            data.append(objects_collected)
    return data

In [8]:
labels_json_objects = read_labels_data(JSON_LABELS_PATH)

In [9]:
labels_json_objects[0]

[{'id': 920614137,
  'classId': 9993511,
  'description': '',
  'geometryType': 'rectangle',
  'labelerLogin': 'vniclas',
  'createdAt': '2022-04-20T07:30:56.238Z',
  'updatedAt': '2022-04-20T07:30:56.238Z',
  'tags': [],
  'classTitle': 'blue_cone',
  'points': {'exterior': [[602, 1039], [728, 1189]], 'interior': []},
  'path_to_file': './fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_00056.png.json',
  'img_height': 1360,
  'img_width': 2200},
 {'id': 920614136,
  'classId': 9993511,
  'description': '',
  'geometryType': 'rectangle',
  'labelerLogin': 'vniclas',
  'createdAt': '2022-04-20T07:30:56.238Z',
  'updatedAt': '2022-04-20T07:30:56.238Z',
  'tags': [],
  'classTitle': 'blue_cone',
  'points': {'exterior': [[728, 935], [800, 1029]], 'interior': []},
  'path_to_file': './fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_00056.png.json',
  'img_height': 1360,
  'img_width': 2200},
 {'id': 920614135,
  'classId': 9993511,
  'description': '',
  'geometryType': 'rectangle',
  'labelerLogin'

In [10]:
def get_required_objects(data_list):
    data_collection = []
    for items in data_list: 
        for obj in items:
            data_collection.append([ obj["classTitle"], obj["points"]["exterior"][0][0], obj["points"]["exterior"][0][1], obj["points"]["exterior"][1][0], obj["points"]["exterior"][1][1], obj["path_to_file"], obj["img_height"], obj["img_width"] ])
    return data_collection

In [11]:
data = get_required_objects(labels_json_objects)

In [12]:
def replace_ann_with_img(path):
    return path.replace('ann', 'img').replace('.json', '')

# Data Frame

In [13]:
columns = ['name', 'box_x1', 'box_y1', 'box_x2', 'box_y2', 'path', 'height', 'width']
df = pd.DataFrame(data, columns=columns)

In [14]:
print(df.head(2))
print("Shape:", df.shape)

        name  box_x1  box_y1  box_x2  box_y2  \
0  blue_cone     602    1039     728    1189   
1  blue_cone     728     935     800    1029   

                                                path  height  width  
0  ./fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_0...    1360   2200  
1  ./fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_0...    1360   2200  
Shape: (220862, 8)


In [15]:
print(df.name.unique())

['blue_cone' 'large_orange_cone' 'yellow_cone' 'orange_cone'
 'unknown_cone']


### Transform

In [16]:
df['name'] = df['name'].astype("string")
df['path'] = df['path'].astype("string")

In [17]:
df.dtypes

name      string[python]
box_x1             int64
box_y1             int64
box_x2             int64
box_y2             int64
path      string[python]
height             int64
width              int64
dtype: object

In [18]:
df.head(5)

Unnamed: 0,name,box_x1,box_y1,box_x2,box_y2,path,height,width
0,blue_cone,602,1039,728,1189,./fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_0...,1360,2200
1,blue_cone,728,935,800,1029,./fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_0...,1360,2200
2,blue_cone,796,876,840,936,./fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_0...,1360,2200
3,blue_cone,806,848,840,892,./fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_0...,1360,2200
4,blue_cone,833,834,859,874,./fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_0...,1360,2200


In [19]:
df.path[0]

'./fsoco_bounding_boxes_train/pwrrt/ann/pwrrt_00056.png.json'

## yellow_cone

In [None]:
yellow_cone_df = df[df['name'] == 'yellow_cone']
yellow_cone_df["class_label"] = 4
yellow_cone_df['image_path'] = yellow_cone_df['path'].apply(replace_ann_with_img)

In [None]:
print(yellow_cone_df.name.unique())
print(yellow_cone_df.describe(include='all').loc["count"])

### blue_cone

In [None]:
blue_cone_df = df[df['name'] == 'blue_cone']
blue_cone_df["class_label"] = 0
blue_cone_df['image_path'] = blue_cone_df['path'].apply(replace_ann_with_img)

In [None]:
print(blue_cone_df.name.unique())
print(blue_cone_df.describe(include='all').loc["count"])

### large_orange_cone

In [None]:
large_orange_cone_df = df[df['name'] == 'large_orange_cone']
large_orange_cone_df["class_label"] = 1
large_orange_cone_df['image_path'] = large_orange_cone_df['path'].apply(replace_ann_with_img)

In [None]:
print(large_orange_cone_df.name.unique())
print(large_orange_cone_df.describe(include='all').loc["count"])

### orange_cone

In [None]:
orange_cone_df = df[df['name'] == 'orange_cone']
orange_cone_df["class_label"] = 2
orange_cone_df['image_path'] = orange_cone_df['path'].apply(replace_ann_with_img)

In [None]:
print(orange_cone_df.name.unique())
print(orange_cone_df.describe(include='all').loc["count"])

### unknown_cone

In [None]:
unknown_cone_df = df[df['name'] == 'unknown_cone']
unknown_cone_df["class_label"] = 3
unknown_cone_df['image_path'] = unknown_cone_df['path'].apply(replace_ann_with_img)

In [None]:
print(unknown_cone_df.name.unique())
print(unknown_cone_df.describe(include='all').loc["count"])

#### Combine multiple df

In [None]:
combined_df = pd.concat([blue_cone_df, large_orange_cone_df, orange_cone_df, unknown_cone_df, yellow_cone_df], ignore_index=True)

In [None]:
combined_df

In [None]:
combined_df.to_csv("combined.csv", index=False)