In [None]:
import os
import json
from tqdm import tqdm

def extract_valid_furniture_data(json_dir):
    result = []

    bar = tqdm(os.listdir(json_dir), desc="Processing JSON files", unit="file")

    for filename in bar:
        if not filename.endswith(".json"):
            continue

        file_path = os.path.join(json_dir, filename)
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
        except Exception as e:
            print(f"Failed to load {filename}: {e}")
            continue

        furniture_list = data.get("furniture", [])
        for item in furniture_list:
            if not item.get("valid", False):
                continue
            if not all(k in item for k in ("category", "size", "jid")):
                continue
            result.append({
                "category": item["category"],
                "size": item["size"],
                "jid": item["jid"]
            })

    return result

# 사용 예시
json_directory = "/home/eden/Documents/JNU/2025-1/AI-System/AI-System-Project/3D-FRONT"
furniture_data = extract_valid_furniture_data(json_directory)


Processing JSON files:   1%|▏         | 97/6813 [00:03<02:59, 37.38file/s]Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x76ae22364050>>
Traceback (most recent call last):
  File "/home/eden/anaconda3/envs/aisystem/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 
Processing JSON files:   8%|▊         | 565/6813 [00:14<02:01, 51.27file/s]

In [5]:
# funiture_data 저장
output_file = "furniture_data.json"
with open(output_file, 'w') as f:
    json.dump(furniture_data, f, indent=4)


In [1]:
import json
# funiture_data 로드
output_file = "furniture_data.json"
with open(output_file, 'r') as f:
    furniture_data = json.load(f)

print(f"총 {len(furniture_data)}개의 유효한 가구 데이터가 추출되었습니다.")

# 결과 예시 출력
for item in furniture_data[:5]:
    print(item)

총 5831개의 유효한 가구 데이터가 추출되었습니다.
{'category': 'Lighting', 'size': [0.4941799926757813, 0.3422480010986328, 1.2236100006103516], 'jid': '71ab8f57-0cf2-4efb-83bd-6d87258622a6'}
{'category': 'Cabinet/Shelf/Desk', 'size': [0.9, 0.6229999923706054, 0.5], 'jid': '02a64bc1-70d6-4da7-98c8-19038cfe95ec'}
{'category': 'Cabinet/Shelf/Desk', 'size': [0.9, 0.625, 2.291999969482422], 'jid': 'a7cccc53-423b-401c-883d-60b4ed424507'}
{'category': 'Lighting', 'size': [0.28629800796508786, 0.18640800476074218, 1.5544700622558594], 'jid': 'd214e531-662b-42c0-a180-846178536688'}
{'category': 'Lighting', 'size': [0.5009120178222656, 0.4108599853515625, 0.05069169998168945], 'jid': 'b09d1f89-8d93-4263-82d4-264c49bd7ce5'}


In [2]:
from collections import Counter

# category 개수 세기
category_counts = Counter(item["category"] for item in furniture_data)

# 출력
for category, count in category_counts.items():
    print(f"{category}: {count}")


Lighting: 1511
Cabinet/Shelf/Desk: 2020
Table: 926
Chair: 320
Sofa: 379
Bed: 439
Pier/Stool: 141
Others: 95


In [3]:
from collections import defaultdict

# 고유 카테고리 추출 및 인덱스 매핑
category_set = sorted(set(item["category"] for item in furniture_data))
category_to_idx = {cat: i for i, cat in enumerate(category_set)}

# 클래스 인덱스 리스트 생성
class_indices = [category_to_idx[item["category"]] for item in furniture_data]
size_list = [item["size"] for item in furniture_data]


In [4]:
import numpy as np
size_np = np.array(size_list, dtype=np.float32)
print("max size:", size_np.max())
print("min size:", size_np.min())

normalized_size = (size_np - size_np.min()) / (size_np.max() - size_np.min())
# 정규화된 사이즈를 리스트로 변환
normalized_size_list = normalized_size.tolist()

max size: 5.52886
min size: 0.00321001


In [5]:
import os

latent_vectors = []
valid_class_indices = []
valid_sizes = []

def make_img_path(jid):
    return os.path.join("/home/eden/Data/JNU/AI-System/3D-FUTURE-model", f"{jid}/image.jpg")

img_paths = [make_img_path(item["jid"]) for item in furniture_data]
batch_size = 4
batch_img_paths = [img_paths[i:i + batch_size] for i in range(0, len(img_paths), batch_size)]



In [6]:
import torch
saved_latent_vectors = torch.load("/home/eden/Data/JNU/AI-System/latent_vectors__.pt")
print(f"Loaded latent vectors of shape: {saved_latent_vectors.shape}")

Loaded latent vectors of shape: torch.Size([1200, 1048576])


In [7]:
from tqdm import tqdm

for i in range(len(saved_latent_vectors)):
    latent_vectors.append(saved_latent_vectors[i])
    valid_class_indices.append(class_indices[i])
    valid_sizes.append(size_list[i])


In [8]:
import torch.nn as nn

class FurnitureSizeRegressor(nn.Module):
    def __init__(self, latent_dim, class_count, output_dim=3):
        super().__init__()
        self.class_embed = nn.Embedding(class_count, 64)
        self.mlp = nn.Sequential(
            nn.Linear(latent_dim+64, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim),
            # nn.Sigmoid()  # Normalize output to [0, 1] range
        )

    def forward(self, latent_vec, class_idx):
        class_vec = self.class_embed(class_idx)
        x = torch.cat([latent_vec, class_vec], dim=-1)
        return self.mlp(x)


In [9]:
from torch.utils.data import Dataset, DataLoader
import numpy as np

class FurnitureDataset(Dataset):
    def __init__(self, latent_vectors, class_indices, sizes):
        self.latents = torch.from_numpy(np.array(latent_vectors)).float()
        self.classes = torch.from_numpy(np.array(class_indices)).int()
        self.sizes = torch.from_numpy(np.array(sizes)).float()

    def __len__(self):
        return len(self.latents)

    def __getitem__(self, idx):
        return self.latents[idx], self.classes[idx], self.sizes[idx]

# 예시
dataset = FurnitureDataset(latent_vectors, valid_class_indices, valid_sizes)

# dataset을 test와 train으로 나눔
from sklearn.model_selection import train_test_split
train_indices, test_indices = train_test_split(range(len(dataset)), test_size=0.2, random_state=42)
class FurnitureSubset(Dataset):
    def __init__(self, dataset, indices):
        self.dataset = dataset
        self.indices = indices

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        return self.dataset[self.indices[idx]]
train_dataset = FurnitureSubset(dataset, train_indices)
test_dataset = FurnitureSubset(dataset, test_indices)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)


Train dataset size: 960
Test dataset size: 240


In [10]:
latent_dim = latent_vectors[0].shape[0]  # latent vector의 차원
class_count = len(category_set)

print(f"Latent dimension: {latent_dim}, Class count: {class_count}")

Latent dimension: 1048576, Class count: 8


In [11]:
def train_model(model, train_dataloader, test_dataloader, epochs=10, lr=1e-3):
    device = next(model.parameters()).device
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0
        train_bar = tqdm(train_dataloader, total=len(train_dataloader), desc=f"Epoch {epoch+1}/{epochs}", unit="batch")
        for latents, classes, sizes in train_bar:
            latents, classes, sizes = latents.to(device), classes.to(device), sizes.to(device)

            optimizer.zero_grad()
            outputs = model(latents, classes)
            loss = criterion(outputs, sizes)
            train_bar.set_postfix(loss=loss.item())
            loss.backward()
            optimizer.step()

            total_loss += loss.item()


        avg_loss = total_loss / len(train_dataloader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

        model.eval()
        test_bar = tqdm(test_dataloader, total=len(test_dataloader), desc="Testing", unit="batch")
        with torch.no_grad():
            total_test_loss = 0.0
            for latents, classes, sizes in test_bar:
                latents, classes, sizes = latents.to(device), classes.to(device), sizes.to(device)
                outputs = model(latents, classes)
                loss = criterion(outputs, sizes)
                total_test_loss += loss.item()
            avg_test_loss = total_test_loss / len(test_dataloader)
            print(f"Test Loss: {avg_test_loss:.4f}")



# 모델 초기화 및 학습
latent_dim = latent_vectors[0].shape[0]  # latent vector의 차원
class_count = len(category_set)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FurnitureSizeRegressor(latent_dim, class_count).to(device)
train_model(model, train_dataloader, test_dataloader, epochs=10, lr=1e-3)

# 모델 저장
output_model_path = "furniture_size_regressor.pth"
torch.save(model.state_dict(), output_model_path)

Epoch 1/10: 100%|██████████| 30/30 [00:02<00:00, 11.28batch/s, loss=40]    


Epoch 1/10, Loss: 1748.2022


Testing: 100%|██████████| 8/8 [00:00<00:00, 35.03batch/s]


Test Loss: 43.3904


Epoch 2/10: 100%|██████████| 30/30 [00:02<00:00, 11.78batch/s, loss=8.53]


Epoch 2/10, Loss: 18.6746


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.30batch/s]


Test Loss: 2.5804


Epoch 3/10: 100%|██████████| 30/30 [00:02<00:00, 11.76batch/s, loss=5.99]


Epoch 3/10, Loss: 4.7823


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.20batch/s]


Test Loss: 1.5865


Epoch 4/10: 100%|██████████| 30/30 [00:02<00:00, 11.71batch/s, loss=1.56] 


Epoch 4/10, Loss: 2.0714


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.61batch/s]


Test Loss: 0.9172


Epoch 5/10: 100%|██████████| 30/30 [00:02<00:00, 11.77batch/s, loss=0.878]


Epoch 5/10, Loss: 1.0626


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.54batch/s]


Test Loss: 0.9196


Epoch 6/10: 100%|██████████| 30/30 [00:02<00:00, 11.72batch/s, loss=0.509]


Epoch 6/10, Loss: 0.7873


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.66batch/s]


Test Loss: 0.7112


Epoch 7/10: 100%|██████████| 30/30 [00:02<00:00, 11.80batch/s, loss=0.63] 


Epoch 7/10, Loss: 0.5342


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.31batch/s]


Test Loss: 0.8696


Epoch 8/10: 100%|██████████| 30/30 [00:02<00:00, 11.81batch/s, loss=0.301]


Epoch 8/10, Loss: 0.4324


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.34batch/s]


Test Loss: 0.7196


Epoch 9/10: 100%|██████████| 30/30 [00:02<00:00, 11.77batch/s, loss=0.341]


Epoch 9/10, Loss: 0.3174


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.26batch/s]


Test Loss: 0.7210


Epoch 10/10: 100%|██████████| 30/30 [00:02<00:00, 11.76batch/s, loss=0.298]


Epoch 10/10, Loss: 0.2759


Testing: 100%|██████████| 8/8 [00:00<00:00, 34.38batch/s]


Test Loss: 0.6844
