In [None]:
# Monter Google Drive pour sauvegarder les modèles
from google.colab import drive
drive.mount('/content/drive')

# Installer PyTorch et autres bibliothèques nécessaires
#!pip install torch torchvision numpy matplotlib
#!pip install trimesh
#!pip install tqdm


# Télécharger et extraire le dataset Pix3D

!wget http://pix3d.csail.mit.edu/data/pix3d.zip
!unzip pix3d.zip && rm pix3d.zip


[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
  inflating: mask/sofa/1654.png      
  inflating: mask/sofa/1592.png      
  inflating: mask/sofa/1235.png      
  inflating: mask/sofa/0807.png      
  inflating: mask/sofa/0475.png      
  inflating: mask/sofa/0303.png      
  inflating: mask/sofa/1931.png      
  inflating: mask/sofa/1543.png      
  inflating: mask/sofa/1122.png      
  inflating: mask/sofa/1685.png      
  inflating: mask/sofa/0762.png      
  inflating: mask/sofa/0989.png      
  inflating: mask/sofa/0958.png      
  inflating: mask/sofa/1653.png      
  inflating: mask/sofa/0013.png      
  inflating: mask/sofa/0472.png      
  inflating: mask/sofa/0800.png      
  inflating: mask/sofa/1232.png      
 extracting: mask/sofa/1595.png      
 extracting: mask/sofa/1544.png      
 extracting: mask/sofa/1936.png      
  inflating: mask/sofa/0304.png      
  inflating: mask/sofa/0765.png      
  inflating: mask/sofa/1682.png   

In [None]:
!pip install trimesh

Collecting trimesh
  Downloading trimesh-4.5.3-py3-none-any.whl.metadata (18 kB)
Downloading trimesh-4.5.3-py3-none-any.whl (704 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/704.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.2/704.8 kB[0m [31m8.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m704.8/704.8 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trimesh
Successfully installed trimesh-4.5.3


## **1. Importation des bibliothèques**


---

### Ce code est généralement utilisé pour préparer un environnement de travail

In [None]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import scipy.io as sio
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt

# Configuration de l'appareil (GPU ou CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Utilisation de l'appareil : {device}")

Utilisation de l'appareil : cpu


## **Héritage de Dataset**

---


#### Dataset : Classe de base de PyTorch pour les ensembles de données.
#### Permet d'utiliser ce dataset avec un `DataLoader` pour faciliter le traitement par lot.

In [None]:
class Pix3DDataset(Dataset):
    def __init__(self, dataset_path, json_file, transform=None):
        self.dataset_path = Path(dataset_path)
        self.json_file = self.dataset_path / json_file
        self.transform = transform
        self.data = self.load_metadata()

    def load_metadata(self):
        with open(self.json_file, 'r') as f:
            metadata = json.load(f)
        data = []
        for entry in metadata:
            img_path = self.dataset_path / entry['img']
            voxel_path = self.dataset_path / entry['voxel']
            if img_path.exists() and voxel_path.exists():
                data.append({
                    'img': img_path,
                    'voxel': voxel_path
                })
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[idx]
        img = Image.open(entry['img'])

        # Ensure all images are converted to RGB format
        img = img.convert('RGB')  # This line ensures consistent channel dimensions

        if self.transform:
            img = self.transform(img)

        voxel = sio.loadmat(entry['voxel'])['voxel']
        voxel = torch.tensor(voxel, dtype=torch.float32)
        return {'img': img, 'voxel': voxel}


# **Classe VAE**

---

### *Ce* code implémente un Variationnel Auto-Encodeur (VAE) en utilisant PyTorch

In [None]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        # Encodeur
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 256, 4, stride=2, padding=1),
            nn.ReLU()
        )

        self.fc_mu = nn.Linear(256 * 8 * 8, 256)  # Moyenne latente
        self.fc_logvar = nn.Linear(256 * 8 * 8, 256)  # Variance latente

        # Décodeur
        self.fc_decode = nn.Linear(256, 256 * 4 * 4 * 4)
        self.decoder = nn.Sequential(
            nn.ConvTranspose3d(256, 128, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose3d(128, 64, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose3d(64, 1, 4, stride=2, padding=1),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        # Encode
        encoded = self.encoder(x)
        encoded = encoded.view(encoded.size(0), -1)
        mu = self.fc_mu(encoded)
        logvar = self.fc_logvar(encoded)

        # Reparamétrisation
        z = self.reparameterize(mu, logvar)

        # Decode
        decoded = self.fc_decode(z)
        decoded = decoded.view(-1, 256, 4, 4, 4)
        output = self.decoder(decoded)
        return output, mu, logvar


def vae_loss(recon_x, x, mu, logvar):
    recon_loss = nn.BCELoss()(recon_x, x)
    kld_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kld_loss


## **Boucle d'entrainement**

---

###### Pour 50 époques, le modèle est entraîné sur les données d'entrée :
#### Dataset : Convertit les images en tenseurs avec ToTensor et les redemonsionner ainsi faire la normalisation des valeurs entre [-1,1].
#### initialisation du modèle vae

In [None]:

# Charger le dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((64, 64)),
    transforms.Normalize((0.5,), (0.5,))
])
dataset = Pix3DDataset('pix3d', 'pix3d.json', transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialisation du modèle
vae = VAE().to(device)
optimizer = optim.Adam(vae.parameters(), lr=0.0002)

# Boucle d'entraînement
num_epochs = 50
for epoch in range(num_epochs):
    vae.train()
    epoch_loss = 0.0
    for step, batch in enumerate(dataloader):
        imgs = batch['img'].to(device)
        voxels = batch['voxel'].float().unsqueeze(1).to(device)

        # Redimensionner les voxels pour qu'ils correspondent à la sortie du décodeur
        voxels = torch.nn.functional.interpolate(voxels, size=(32, 32, 32), mode='trilinear', align_corners=False)

        optimizer.zero_grad()
        recon_voxels, mu, logvar = vae(imgs)
        loss = vae_loss(recon_voxels, voxels, mu, logvar)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

        # Afficher les étapes
        if step % 10 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{step}/{len(dataloader)}], Loss: {loss.item():.4f}")

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(dataloader):.4f}")

# Sauvegarde du modèle
output_dir = './vae_models'
os.makedirs(output_dir, exist_ok=True)
vae_path = os.path.join(output_dir, 'vae_model.pth')
torch.save(vae.state_dict(), vae_path)
print(f"Modèle VAE sauvegardé à : {vae_path}")

# Génération d'un modèle 3D
def generate_3d_model(image_path, vae, output_path):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((64, 64)),
        transforms.Normalize((0.5,), (0.5,))
    ])
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    vae.eval()
    with torch.no_grad():
        generated_voxel, _, _ = vae(image)
        save_as_obj(generated_voxel[0], output_path)






Epoch [1/50], Step [0/315], Loss: 5.2453
Epoch [1/50], Step [10/315], Loss: 0.7673
Epoch [1/50], Step [20/315], Loss: 0.5410
Epoch [1/50], Step [30/315], Loss: 0.3830
Epoch [1/50], Step [40/315], Loss: 0.3726




Epoch [1/50], Step [50/315], Loss: 0.3102
Epoch [1/50], Step [60/315], Loss: 0.3782
Epoch [1/50], Step [70/315], Loss: 0.3377
Epoch [1/50], Step [80/315], Loss: 0.3247
Epoch [1/50], Step [90/315], Loss: 0.3523
Epoch [1/50], Step [100/315], Loss: 0.3408
Epoch [1/50], Step [110/315], Loss: 0.3491
Epoch [1/50], Step [120/315], Loss: 0.3152
Epoch [1/50], Step [130/315], Loss: 0.2642
Epoch [1/50], Step [140/315], Loss: 0.3023
Epoch [1/50], Step [150/315], Loss: 0.2582
Epoch [1/50], Step [160/315], Loss: 0.2677
Epoch [1/50], Step [170/315], Loss: 0.3434
Epoch [1/50], Step [180/315], Loss: 0.2409
Epoch [1/50], Step [190/315], Loss: 0.2963
Epoch [1/50], Step [200/315], Loss: 0.2376
Epoch [1/50], Step [210/315], Loss: 0.2212
Epoch [1/50], Step [220/315], Loss: 0.3241
Epoch [1/50], Step [230/315], Loss: 0.3274
Epoch [1/50], Step [240/315], Loss: 0.2267
Epoch [1/50], Step [250/315], Loss: 0.2468
Epoch [1/50], Step [260/315], Loss: 0.1993
Epoch [1/50], Step [270/315], Loss: 0.2592
Epoch [1/50], St

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

## Sauvegarde des modéles 3d sous l'extension .obj


## Appel de la fonction de géneration pour génerer le modèle 3D a partir d'une image 2D et le sauvegarder

In [None]:
# Exemple de génération
generate_3d_model('pix3d/img/bed/0001.png', vae, './generated_vae_model1.obj')
print("Modèle 3D généré et sauvegardé.")

Modèle 3D généré et sauvegardé.
