In [7]:
import pandas as pd
import os
from tqdm import tqdm

from PIL import Image
import matplotlib.pyplot as plt
import numpy as np


## load parts data

In [8]:
data_dir = "../data"

In [9]:
parts_meta = pd.read_csv(os.path.join(data_dir, "catalog_db", "parts.csv.gz"))
parts_meta.head()

Unnamed: 0,part_num,name,part_cat_id,part_material
0,3381,Sticker Sheet for Set 663-1,58,Plastic
1,3383,"Sticker Sheet for Sets 618-1, 628-2",58,Plastic
2,3402,"Sticker Sheet for Sets 310-3, 311-1, 312-3",58,Plastic
3,3429,Sticker Sheet for Set 1550-1,58,Plastic
4,3432,"Sticker Sheet for Sets 357-1, 355-1, 940-1",58,Plastic


In [10]:
part_categories = pd.read_csv(os.path.join(data_dir, "catalog_db", "part_categories.csv.gz"))
part_categories.head(10)

Unnamed: 0,id,name
0,1,Baseplates
1,3,Bricks Sloped
2,4,"Duplo, Quatro and Primo"
3,5,Bricks Special
4,6,Bricks Wedged
5,7,Containers
6,8,Technic Bricks
7,9,Plates Special
8,11,Bricks
9,12,Technic Connectors


In [11]:
parts_meta = (
    parts_meta
    .join(part_categories.set_index('id').rename({'name': 'part_cat_name'}, axis=1), on='part_cat_id', how='left')
    .set_index('part_num')
)
parts_meta.head()

Unnamed: 0_level_0,name,part_cat_id,part_material,part_cat_name
part_num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3381,Sticker Sheet for Set 663-1,58,Plastic,Stickers
3383,"Sticker Sheet for Sets 618-1, 628-2",58,Plastic,Stickers
3402,"Sticker Sheet for Sets 310-3, 311-1, 312-3",58,Plastic,Stickers
3429,Sticker Sheet for Set 1550-1,58,Plastic,Stickers
3432,"Sticker Sheet for Sets 357-1, 355-1, 940-1",58,Plastic,Stickers


In [12]:
parts_meta['part_cat_name'].value_counts()

part_cat_name
Minifig Upper Body         6337
Duplo, Quatro and Primo    4758
Minifig Heads              4405
Non-LEGO                   3963
Stickers                   3881
                           ... 
Technic Axles                27
Technic Bricks               27
Tools                        22
Znap                         22
Technic Bushes                7
Name: count, Length: 66, dtype: int64

In [13]:
part_images = []

for part_num in tqdm(parts_meta.index.values):
    part_filename = os.path.join(data_dir, "ldraw_images", f"{part_num}.png")
    if os.path.exists(part_filename):
       part_images.append(np.array(Image.open(part_filename)))
    else:
        part_images.append(None)
        
parts_meta['part_image'] = part_images
    

100%|██████████| 52771/52771 [00:40<00:00, 1297.29it/s]


In [14]:
parts_meta['part_image_shape'] = parts_meta['part_image'].map(lambda x: x.shape if x is not None else None)

In [15]:
parts_meta['part_cat_name'][(~parts_meta['part_image'].isna())].value_counts()

part_cat_name
Minifig Accessories                             663
Minifig Headwear                                339
Large Buildable Figures                         303
Plants and Animals                              253
Windows and Doors                               247
                                               ... 
HO Scale                                          3
Modulex                                           2
Non-Buildable Figures (Duplo, Fabuland, etc)      1
Minidoll Lower Body                               1
Minidoll Heads                                    1
Name: count, Length: 65, dtype: int64

In [16]:
parts_meta['part_image_shape'].value_counts()

part_image_shape
(500, 500, 3)    5410
(75, 100, 3)      156
(250, 250, 3)      31
(200, 200, 3)      12
(75, 100)           5
(187, 250, 3)       1
(500, 500)          1
Name: count, dtype: int64

### inspect part images

In [54]:
import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageFont

In [55]:
# Function to add text to an image
def add_text_to_image(image, text, position, font_size, color):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()
    draw.text(position, text, fill=color, font=font)
    return image

# Main loop
for index, row in parts_meta.iterrows():
    if row['part_image'] is None:
        continue
    
    image = Image.fromarray(row['part_image'])

    # Add text
    image = add_text_to_image(image, f"Part Num: {index}", (10, 10), 12, 'black')
    image = add_text_to_image(image, f"Name: {row['name']}", (10, 40), 12, 'black')
    image = add_text_to_image(image, f"Category: {row['part_cat_name']}", (10, 70), 12, 'black')

    # Convert to format suitable for OpenCV
    image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

    # Display image
    cv2.imshow('Part Image', image)

    # Wait for user input; break the loop if the key is not spacebar
    key = cv2.waitKey(0)
    if key != 32:  # 32 is the ASCII code for spacebar
        break

cv2.destroyAllWindows()