# Example usage of [stable-dreamfusion](https://github.com/ashawkey/stable-dreamfusion)
Pure-pytorch version

### Check the machine

In [None]:
! nvidia-smi

Sun Jan 15 03:08:22 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Setup

In [None]:
#@title install dependencies
! git clone https://github.com/ashawkey/stable-dreamfusion.git

%cd stable-dreamfusion

# install requirements
! pip install -r requirements.txt
! pip install git+https://github.com/NVlabs/nvdiffrast/

Cloning into 'stable-dreamfusion'...
remote: Enumerating objects: 379, done.[K
remote: Counting objects: 100% (53/53), done.[K
remote: Compressing objects: 100% (41/41), done.[K
remote: Total 379 (delta 31), reused 20 (delta 12), pack-reused 326[K
Receiving objects: 100% (379/379), 175.25 KiB | 8.76 MiB/s, done.
Resolving deltas: 100% (220/220), done.
/content/stable-dreamfusion
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch-ema
  Downloading torch_ema-0.3-py3-none-any.whl (5.5 kB)
Collecting ninja
  Downloading ninja-1.11.1-py2.py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (145 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m146.0/146.0 KB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trimesh
  Downloading trimesh-3.18.0-py3-none-any.whl (670 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m670.8/670.8 KB[0m [31m45.6 MB/s[0m eta [36m0:00:00[0m
Colle

### Training & Testing
* It takes about 0.7s per training step, so the default 5000 training steps take around 1 hour to finish. A larger `Training_iters` usually leads to better results.
* If CUDA OOM, try to decrease `Num_steps`, `Upsample_steps`, and `Training_nerf_resolution`.
* If the NeRF fails to learn anything (empty scene, only background), try to decrease `Lambda_entropy` which regularizes the learned opacity.

In [None]:
#@markdown ####**Training Settings:**
Prompt_text = "an apple" #@param {type: 'string'}
Training_iters = 3000 #@param {type: 'integer'}
Learning_rate = 1e-3 #@param {type: 'number'}
Training_nerf_resolution = 64  #@param {type: 'integer'}
# CUDA_ray = True #@param {type: 'boolean'}
# View_dependent_prompt = True #@param {type: 'boolean'}
# FP16 = True #@param {type: 'boolean'}
Seed = 0 #@param {type: 'integer'}
Lambda_entropy = 1e-4 #@param {type: 'number'}
Num_steps = 64 #@param {type: 'number'}
Upsample_steps = 32 #@param {type: 'number'}
Checkpoint = 'latest' #@param {type: 'string'}

#@markdown ---

#@markdown ####**Output Settings:**
Workspace = "trial" #@param{type: 'string'}
# Save_mesh = True #@param {type: 'boolean'}

# processings
Prompt_text = "'" + Prompt_text + "'"

In [None]:
#@title start training
import torch
torch.cuda.empty_cache()

%run main.py -O2 --albedo --text {Prompt_text} --workspace {Workspace} --iters {Training_iters} --lr {Learning_rate} --w {Training_nerf_resolution} --h {Training_nerf_resolution} --seed {Seed} --lambda_entropy {Lambda_entropy} --ckpt {Checkpoint} --save_mesh --num_steps {Num_steps} --upsample_steps {Upsample_steps}

In [None]:
#@markdown ####**Testing Settings:**

Workspace_test = "trial" #@param{type: 'string'}
# Save_mesh = True #@param {type: 'boolean'}

In [None]:
#@title testing 
import torch
torch.cuda.empty_cache()

%run main.py -O2 --test --workspace {Workspace_test} --save_mesh --max_ray_batch 1024

Namespace(H=800, O=False, O2=True, W=800, albedo=False, albedo_iters=1000, angle_front=60, angle_overhead=30, backbone='vanilla', bg_radius=1.4, bound=1, ckpt='latest', cuda_ray=False, density_thresh=10, dir_text=True, dt_gamma=0, eval_interval=10, fovy=60, fovy_range=[40, 70], fp16=False, gui=False, guidance='stable-diffusion', h=64, iters=10000, jitter_pose=False, lambda_entropy=0.0001, lambda_opacity=0, lambda_orient=0.01, lambda_smooth=0, light_phi=0, light_theta=60, lr=0.001, max_ray_batch=1024, max_spp=1, max_steps=512, min_near=0.1, negative='', num_steps=64, radius=3, radius_range=[1.0, 1.5], save_mesh=True, seed=0, suppress_face=False, test=True, text=None, uniform_sphere_rate=0.5, update_extra_interval=16, upsample_steps=32, w=64, workspace='trial')
NeRFNetwork(
  (encoder): FreqEncoder_torch()
  (sigma_net): MLP(
    (net): ModuleList(
      (0): BasicBlock(
        (dense): Linear(in_features=39, out_features=96, bias=True)
        (activation): ReLU(inplace=True)
      )
 

100% 100/100 [17:55<00:00, 10.75s/it]

100% 100/100 [18:06<00:00, 10.87s/it]


[INFO] running xatlas to unwrap UVs for mesh: v=(75367, 3) f=(150348, 3)
[INFO] writing obj mesh to trial/mesh/mesh.obj
[INFO] writing vertices (75367, 3)
[INFO] writing vertices texture coords (115883, 2)
[INFO] writing faces (150348, 3)


### Display results
* RGB and Depth video are located at `{Workspace}/results/*.mp4`
* Mesh is under `{Workspace}/mesh/`, you could see three files named `mesh.obj`, `mesh.mtl`, and `albedo.png`.

In [None]:
#@title display RGB video
import os
import glob
from IPython.display import HTML
from base64 import b64encode

def get_latest_file(path):
  dir_list = glob.glob(path)
  dir_list.sort(key=lambda x: os.path.getmtime(x))
  return dir_list[-1]

def show_video(video_path, video_width = 600):
   
  video_file = open(video_path, "r+b").read()
  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"

  return HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")
 
rgb_video = get_latest_file(os.path.join(Workspace, 'results', '*_rgb.mp4'))
show_video(rgb_video)