<a href="https://colab.research.google.com/github/VISHNU193/ai-text-to-3d-room-furniture/blob/main/ai_textTo3dFurniture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Embedding, GlobalMaxPooling1D, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam

In [None]:
prompt = "a bookshelf"

texts = [
    "The sofa is comfortable and stylish.",
    "The coffee table is made of wood.",
    "The dining chair has a modern design.",
    "I love the armchair with the soft cushions.",
    "The bed frame is sturdy and elegant."
]

In [None]:
# Parameters
max_len = 20  # Maximum sequence length for padding
embedding_dim = 100  # Dimensionality of the word embeddings
num_filters = 64  # Number of filters in the CNN layer
kernel_size = 3  # Size of the convolutional filter
pool_size = 2  # Size of the pooling window
num_classes = 2  # For binary classification or feature extraction

In [None]:
# Step 1: Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index

In [None]:
# Step 2: Pad the sequences to ensure uniform input length
X = pad_sequences(sequences, maxlen=max_len)

In [None]:
# Step 3: Load pre-trained embeddings

embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim))

In [None]:
# Step 4: Define the CNN model
input_layer = Input(shape=(max_len,))   #it takes text as input
embedding_layer = Embedding(input_dim=len(word_index) + 1, output_dim=embedding_dim,   #(convert the text to vectors)
                            weights=[embedding_matrix], input_length=max_len, trainable=False)(input_layer)


# Add convolutional layers   (uses filters to extract the features from the text)
conv_layer = Conv1D(filters=num_filters, kernel_size=kernel_size, activation='relu')(embedding_layer)
pooling_layer = MaxPooling1D(pool_size=pool_size)(conv_layer)
# Use global max pooling to reduce dimensions
global_pool_layer = GlobalMaxPooling1D()(pooling_layer)  #reduces dimenstionality

# Dense layer for classification or feature extraction   further reduces
dense_layer = Dense(128, activation='relu')(global_pool_layer)
output_layer = Dense(num_classes, activation='softmax')(dense_layer)

# Define the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Summary of the model architecture
model.summary()



In [None]:
# Step 5: Train the model (or extract features)

y_train = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1]])
model.fit(X, y_train, epochs=5)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.6000 - loss: 0.7212
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - accuracy: 0.6000 - loss: 0.5982
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.6000 - loss: 0.5216
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 1.0000 - loss: 0.4605
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 1.0000 - loss: 0.4047


<keras.src.callbacks.history.History at 0x7f072c157190>

In [None]:
# Step 6: Feature extraction
# We use the model up to the last dense layer to extract features.
feature_extractor = Model(inputs=input_layer, outputs=global_pool_layer)
features = feature_extractor.predict(X)

print("Extracted Features:", features)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
Extracted Features: [[0.8579904  0.         0.84438014 0.24362385 0.9416987  0.
  0.58927816 1.2330464  0.570373   0.         0.8199944  0.0761911
  0.53566563 0.85701823 0.         1.338826   0.         0.
  0.58362424 0.         0.75690717 0.         0.12177208 0.
  0.43856707 0.         0.73598    0.23834656 0.         0.
  0.8073627  0.35936067 1.0743966  0.64308786 0.4568168  0.8663199
  0.820102   0.73997915 1.0766374  0.73505646 0.36141166 0.78407484
  0.5465213  0.         1.320176   0.817257   0.7958217  0.5546915
  0.3235797  1.1808324  1.07332    0.         0.         0.41424534
  0.20478374 0.04249731 0.         0.         0.5658072  0.5435004
  0.953198   0.83658123 0.7971029  1.6962273 ]
 [0.6841751  0.         1.0734409  0.28313476 0.67891484 0.
  0.63860434 1.0897424  0.6862547  0.         0.8958914  0.09105913
  0.20246251 0.7140173  0.         1.338826   0.         0.
  0.52808803 0.         1.11

In [None]:
!git clone https://github.com/openai/shap-e
%cd shap-e
!pip install -e .


Cloning into 'shap-e'...
remote: Enumerating objects: 336, done.[K
remote: Counting objects: 100% (260/260), done.[K
remote: Compressing objects: 100% (241/241), done.[K
remote: Total 336 (delta 41), reused 218 (delta 18), pack-reused 76 (from 1)[K
Receiving objects: 100% (336/336), 11.72 MiB | 26.92 MiB/s, done.
Resolving deltas: 100% (44/44), done.
/content/shap-e/shap-e
Obtaining file:///content/shap-e/shap-e
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting clip@ git+https://github.com/openai/CLIP.git (from shap-e==0.0.0)
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-install-5szb3tk3/clip_f51732249e7c4a509909f610ac33fa08
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-install-5szb3tk3/clip_f51732249e7c4a509909f610ac33fa08
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: sha

In [None]:
import torch

from shap_e.diffusion.sample import sample_latents
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
from shap_e.models.download import load_model, load_config
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
xm = load_model('transmitter', device=device)
model = load_model('text300M', device=device)
diffusion = diffusion_from_config(load_config('diffusion'))

  return torch.load(path, map_location=device)


In [None]:
batch_size = 4
guidance_scale = 15.0


latents = sample_latents(
    batch_size=batch_size,
    model=model,
    diffusion=diffusion,
    guidance_scale=guidance_scale,
    model_kwargs=dict(texts=[prompt] * batch_size),
    progress=True,
    clip_denoised=True,
    use_fp16=True,
    use_karras=True,
    karras_steps=64,
    sigma_min=1e-3,
    sigma_max=160,
    s_churn=0,
)

  0%|          | 0/64 [00:00<?, ?it/s]

In [None]:
render_mode = 'nerf' # you can change this to 'stf'
size = 128 # this is the size of the renders, higher values take longer to render.

cameras = create_pan_cameras(size, device)
for i, latent in enumerate(latents):
    images = decode_latent_images(xm, latent, cameras, rendering_mode=render_mode)
    display(gif_widget(images))

HTML(value='<img src="data:image/gif;base64,R0lGODlhQABAAIcAAF83C142C102C102Cl41C101C1w1C1w1Cls1C1s1Cls0Clo0Cl…

HTML(value='<img src="data:image/gif;base64,R0lGODlhQABAAIYAAFU1A1Q0AVM1BFM1A1M1AlM1AVM1AFM0BFM0AlI1BFI1A1I1Al…

HTML(value='<img src="data:image/gif;base64,R0lGODlhQABAAIcAAF0/AF89AF49AF09AFs9AF88AF48AV48AF08AFw8AFs8AFo8AV…

HTML(value='<img src="data:image/gif;base64,R0lGODlhQABAAIcAAF45Il05Il04Il04IVw4IFw4H1s4H1w3IFw3H1s3IFs3H1s3Hl…

In [None]:

# Example of saving the latents as meshes.
from shap_e.util.notebooks import decode_latent_mesh

for i, latent in enumerate(latents):
    t = decode_latent_mesh(xm, latent).tri_mesh()
    with open(f'example_mesh_{i}.ply', 'wb') as f:
        t.write_ply(f)
    with open(f'example_mesh_{i}.obj', 'w') as f:
        t.write_obj(f)



In [None]:

!python benchmark.py

Starting AI model benchmark...
Initializing model...
Loading dataset...
Running inference...
Calculating metrics...

Benchmark Progress:

^C
