[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/diffusion-e2e-ft-jupyter/blob/main/marigold_e2e_ft_depth_jupyter.ipynb)

In [None]:
%cd /content
!git clone https://github.com/camenduru/marigold-e2e-ft-depth-hf
%cd /content/marigold-e2e-ft-depth-hf

!pip -q install diffusers

In [None]:
import numpy as np
import os
from PIL import Image
import torch

from Marigold.marigold import MarigoldPipeline
from diffusers import AutoencoderKL, DDIMScheduler, UNet2DConditionModel
from transformers import CLIPTextModel, CLIPTokenizer

checkpoint_path = "GonzaloMG/marigold-e2e-ft-depth"

dtype = torch.float32
variant = None
unet         = UNet2DConditionModel.from_pretrained(checkpoint_path, subfolder="unet")   
vae          = AutoencoderKL.from_pretrained(checkpoint_path, subfolder="vae")  
text_encoder = CLIPTextModel.from_pretrained(checkpoint_path, subfolder="text_encoder")  
tokenizer    = CLIPTokenizer.from_pretrained(checkpoint_path, subfolder="tokenizer") 
scheduler    = DDIMScheduler.from_pretrained(checkpoint_path, timestep_spacing="trailing", subfolder="scheduler") 
pipe = MarigoldPipeline.from_pretrained(pretrained_model_name_or_path = checkpoint_path,
                                        unet=unet, 
                                        vae=vae, 
                                        scheduler=scheduler, 
                                        text_encoder=text_encoder, 
                                        tokenizer=tokenizer, 
                                        variant=variant, 
                                        torch_dtype=dtype, 
                                        )
pipe = pipe.to('cuda')
pipe.unet.eval()
  
def predict_depth(image, processing_res_choice):
    with torch.no_grad():
        pipe_out = pipe(image, denoising_steps=1, ensemble_size=1, noise="zeros", normals=False, processing_res=processing_res_choice, match_input_res=True)
    pred = pipe_out.depth_np
    pred_colored = pipe_out.depth_colored
    return pred, pred_colored

processing_res_choice = 768
image = Image.open("/content/marigold-e2e-ft-depth-hf/assets/examples/bottles.jpg").convert('RGB')
image_array = np.array(image).astype('uint8')
pil_image = Image.fromarray(image_array)

depth_npy, depth_colored = predict_depth(pil_image, processing_res_choice)
depth_colored