In [None]:
# Full Implementation: Deep Learning + RL + Constraint Optimization

import numpy as np
import gym
from gym import spaces
import random
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

# --- Load SYNBUILD-3D like data (simulated here) ---
def load_layout_data(num_samples=100):
    return [np.zeros((10, 10)) for _ in range(num_samples)]

FURNITURE_ITEMS = {
    1: {'name': 'bed', 'size': (2, 3)},
    2: {'name': 'table', 'size': (2, 2)},
    3: {'name': 'wardrobe', 'size': (1, 2)},
    4: {'name': 'chair', 'size': (1, 1)},
}

# --- Constraint-Based Validator ---
def is_valid_placement(layout, x, y, item):
    h, w = item['size']
    if x + w > 10 or y + h > 10:
        return False
    if np.any(layout[y:y+h, x:x+w] != 0):
        return False
    return True

# --- Deep Learning Policy Network ---
class PolicyNetwork(nn.Module):
    def __init__(self):
        super(PolicyNetwork, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 10 * 10, 512)
        self.fc2 = nn.Linear(512, 400)  # 4 furniture * 100 positions

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.view(-1, 64 * 10 * 10)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

# --- Gym Env with Constraints ---
class HousingEnv(gym.Env):
    def __init__(self, room_layouts):
        super(HousingEnv, self).__init__()
        self.room_layouts = room_layouts
        self.action_space = spaces.Discrete(len(FURNITURE_ITEMS) * 100)
        self.observation_space = spaces.Box(low=0, high=1, shape=(10, 10), dtype=np.float32)
        self.reset()

    def reset(self):
        self.layout = random.choice(self.room_layouts).copy()
        self.placed_items = []
        self.done = False
        return self.layout

    def step(self, action):
        item_id = (action // 100) + 1
        pos_index = action % 100
        x, y = pos_index % 10, pos_index // 10

        item = FURNITURE_ITEMS.get(item_id)
        if not item or not is_valid_placement(self.layout, x, y, item):
            return self.layout, -3, False, {}

        h, w = item['size']
        self.layout[y:y+h, x:x+w] = item_id
        self.placed_items.append((item_id, (x, y)))

        reward = w * h
        self.done = len(self.placed_items) >= 5
        return self.layout, reward, self.done, {}

    def render(self):
        plt.imshow(self.layout, cmap='tab20')
        plt.title("Optimized Layout")
        plt.colorbar()
        plt.show()

# --- Training Loop (RL) ---
def train_policy(env, policy_net, optimizer, episodes=10):
    for ep in range(episodes):
        state = env.reset()
        done = False
        total_reward = 0

        while not done:
            state_tensor = torch.FloatTensor(state).unsqueeze(0).unsqueeze(0)
            logits = policy_net(state_tensor)
            probs = torch.softmax(logits, dim=1)
            action = torch.multinomial(probs, 1).item()

            next_state, reward, done, _ = env.step(action)
            loss = -torch.log(probs[0, action]) * reward

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_reward += reward
            state = next_state

        print(f"Episode {ep+1}, Reward: {total_reward}")
        env.render()

# --- Main Execution ---
layouts = load_layout_data()
env = HousingEnv(layouts)
policy_net = PolicyNetwork()
optimizer = optim.Adam(policy_net.parameters(), lr=0.001)

train_policy(env, policy_net, optimizer, episodes=5)
