# Example usage of [stable-dreamfusion](https://github.com/ashawkey/stable-dreamfusion)

### Check the machine

In [None]:
! nvidia-smi

Tue Feb 21 20:44:34 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   74C    P0    19W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Setup

In [None]:
#@title install dependencies
! git clone https://github.com/ashawkey/stable-dreamfusion.git

%cd stable-dreamfusion

# install requirements
! pip install -r requirements.txt
! pip install git+https://github.com/NVlabs/nvdiffrast/

Cloning into 'stable-dreamfusion'...
remote: Enumerating objects: 496, done.[K
remote: Counting objects: 100% (256/256), done.[K
remote: Compressing objects: 100% (91/91), done.[K
remote: Total 496 (delta 193), reused 179 (delta 165), pack-reused 240[K
Receiving objects: 100% (496/496), 215.05 KiB | 3.84 MiB/s, done.
Resolving deltas: 100% (308/308), done.
/content/stable-dreamfusion
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch-ema
  Downloading torch_ema-0.3-py3-none-any.whl (5.5 kB)
Collecting ninja
  Downloading ninja-1.11.1-py2.py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (145 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m146.0/146.0 KB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trimesh
  Downloading trimesh-3.20.0-py3-none-any.whl (676 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m676.6/676.6 KB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m


### Training & Testing
* First time training will take some time to build the CUDA extensions.
* It takes about 0.7s per training step, so the default 5000 training steps take around 1 hour to finish. A larger `Training_iters` usually leads to better results.
* If CUDA OOM, try to decrease `Max_steps` and `Training_nerf_resolution`.
* If the NeRF fails to learn anything (empty scene, only background), try to decrease `Lambda_entropy` which regularizes the learned opacity.

In [None]:
#@markdown ####**Training Settings:**
Prompt_text = "a DSLR photo of a delicious hamburger" #@param {type: 'string'}
Training_iters = 2000 #@param {type: 'integer'}
Learning_rate = 1e-3 #@param {type: 'number'}
Training_nerf_resolution = 64  #@param {type: 'integer'}
# CUDA_ray = True #@param {type: 'boolean'}
# View_dependent_prompt = True #@param {type: 'boolean'}
# FP16 = True #@param {type: 'boolean'}
Seed = 0 #@param {type: 'integer'}
Lambda_entropy = 1e-4 #@param {type: 'number'}
Max_steps = 128 #@param {type: 'number'}
Checkpoint = 'latest' #@param {type: 'string'}

#@markdown ---

#@markdown ####**Output Settings:**
Workspace = "trial" #@param{type: 'string'}
# Save_mesh = True #@param {type: 'boolean'}

# processings
Prompt_text = "'" + Prompt_text + "'"

In [None]:
#@title start training
import torch
torch.cuda.empty_cache()

%run main.py -O --text {Prompt_text} --workspace {Workspace} --iters {Training_iters} --lr {Learning_rate} --w {Training_nerf_resolution} --h {Training_nerf_resolution} --seed {Seed} --lambda_entropy {Lambda_entropy} --ckpt {Checkpoint} --save_mesh --max_steps {Max_steps}

Namespace(H=800, O=True, O2=False, W=800, albedo=False, albedo_iters=1000, angle_front=60, angle_overhead=30, backbone='grid', bg_radius=1.4, blob_density=10, blob_radius=0.5, bound=1, ckpt='latest', cuda_ray=True, decimate_target=100000.0, density_activation='softplus', density_thresh=0.1, dir_text=True, dt_gamma=0, eval_interval=1, fovy=60, fovy_range=[40, 70], fp16=True, gui=False, guidance='stable-diffusion', h=64, hf_key=None, iters=2000, jitter_pose=False, lambda_entropy=0.0001, lambda_opacity=0, lambda_orient=0.01, lambda_tv=1e-07, light_phi=0, light_theta=60, lr=0.001, max_ray_batch=4096, max_spp=1, max_steps=128, mcubes_resolution=256, min_lr=0.0001, min_near=0.1, negative='', num_steps=64, optim='adan', radius=3, radius_range=[1.0, 1.5], save_mesh=True, sd_version='2.1', seed=0, suppress_face=False, test=False, text='a DSLR photo of a a pink rabbit', uniform_sphere_rate=0.5, update_extra_interval=16, upsample_steps=32, w=64, warm_iters=500, workspace='trial')
NeRFNetwork(
  (

  0% 0/100 [01:30<?, ?it/s]


In [None]:
#@markdown ####**Testing Settings:**

Workspace_test = "trial" #@param{type: 'string'}
# Save_mesh = True #@param {type: 'boolean'}

In [None]:
#@title testing 
%run main.py -O --test --workspace {Workspace_test} --save_mesh

Namespace(H=800, O=True, O2=False, W=800, albedo=False, albedo_iters=1000, angle_front=60, angle_overhead=30, backbone='grid', bg_radius=1.4, bound=1, ckpt='latest', cuda_ray=True, density_thresh=10, dir_text=True, dt_gamma=0, eval_interval=10, fovy=60, fovy_range=[40, 70], fp16=True, gui=False, guidance='stable-diffusion', h=64, iters=10000, jitter_pose=False, lambda_entropy=0.0001, lambda_opacity=0, lambda_orient=0.01, lambda_smooth=0, light_phi=0, light_theta=60, lr=0.001, max_ray_batch=4096, max_spp=1, max_steps=512, min_near=0.1, negative='', num_steps=64, radius=3, radius_range=[1.0, 1.5], save_mesh=True, sd_version='2.0', seed=0, suppress_face=False, test=True, text=None, uniform_sphere_rate=0.5, update_extra_interval=16, upsample_steps=32, w=64, workspace='{Workspace_test}')
NeRFNetwork(
  (encoder): GridEncoder: input_dim=3 num_levels=16 level_dim=2 resolution=16 -> 2048 per_level_scale=1.3819 params=(903480, 2) gridtype=tiled align_corners=False interpolation=linear
  (sigma_


  0% 0/100 [00:00<?, ?it/s][A
  2% 2/100 [00:00<00:06, 14.49it/s][A
  4% 4/100 [00:00<00:07, 12.36it/s][A
  6% 6/100 [00:00<00:09,  9.91it/s][A
  8% 8/100 [00:00<00:09, 10.08it/s][A
 10% 10/100 [00:00<00:07, 11.55it/s][A
 12% 12/100 [00:01<00:07, 12.24it/s][A
 14% 14/100 [00:01<00:06, 13.28it/s][A
 16% 16/100 [00:01<00:05, 14.28it/s][A
 18% 18/100 [00:01<00:05, 14.87it/s][A
 20% 20/100 [00:01<00:05, 15.62it/s][A
 22% 22/100 [00:01<00:04, 16.35it/s][A
 24% 24/100 [00:01<00:04, 16.35it/s][A
 26% 26/100 [00:01<00:04, 16.81it/s][A
 28% 28/100 [00:01<00:04, 17.26it/s][A
 30% 30/100 [00:02<00:04, 16.75it/s][A
 32% 32/100 [00:02<00:03, 17.07it/s][A
 34% 34/100 [00:02<00:03, 17.55it/s][A
 36% 36/100 [00:02<00:03, 16.96it/s][A
 38% 38/100 [00:02<00:03, 16.87it/s][A
 40% 40/100 [00:02<00:03, 17.17it/s][A
 42% 42/100 [00:02<00:03, 16.85it/s][A
 44% 44/100 [00:02<00:03, 17.12it/s][A
 46% 46/100 [00:03<00:03, 17.53it/s][A
 48% 48/100 [00:03<00:03, 17.13it/s][A
 50% 50/100 

100% 100/100 [00:12<00:00,  8.32it/s]


[INFO] running xatlas to unwrap UVs for mesh: v=(0, 3) f=(0, 3)


### Display results
* RGB and Depth video are located at `{Workspace}/results/*.mp4`
* Mesh is under `{Workspace}/mesh/`, you could see three files named `mesh.obj`, `mesh.mtl`, and `albedo.png`.

In [None]:
#@title display RGB video
import os
import glob
from IPython.display import HTML
from base64 import b64encode

def get_latest_file(path):
  dir_list = glob.glob(path)
  dir_list.sort(key=lambda x: os.path.getmtime(x))
  return dir_list[-1]

def show_video(video_path, video_width = 600):
   
  video_file = open(video_path, "r+b").read()
  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"

  return HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")
 
rgb_video = get_latest_file(os.path.join(Workspace, 'results', '*_rgb.mp4'))
show_video(rgb_video)
# a DSLR photo of a delicious hamburger