In [1]:
#!git clone https://github.com/ibaiGorordo/Sapiens-Pytorch-Inference.git

Cloning into 'Sapiens-Pytorch-Inference'...
remote: Enumerating objects: 188, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 188 (delta 6), reused 9 (delta 2), pack-reused 169 (from 1)[K
Receiving objects: 100% (188/188), 48.73 MiB | 776.00 KiB/s, done.
Resolving deltas: 100% (112/112), done.


In [2]:
#!pip install -r requirements.txt -q

/Users/cyanos/Code/MPCS/Advanced_Data_Analytics/Sapiens-Pytorch-Inference
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pymc3 3.11.5 requires cachetools>=4.2.1, which is not installed.
pymc3 3.11.5 requires numpy<1.22.2,>=1.15.0, but you have numpy 1.26.4 which is incompatible.
pymc3 3.11.5 requires scipy<1.8.0,>=1.7.3, but you have scipy 1.12.0 which is incompatible.[0m[31m
[0m

In [3]:
import os
import sys
import shutil
from typing import List
import requests
from tqdm import tqdm
from datetime import timedelta
 
from enum import Enum
import time
import cv2
import numpy as np
import matplotlib.pyplot as plt
from imread_from_url import imread_from_url
from scipy.fftpack import dct, idct, fft, ifft
 
from dataclasses import dataclass
from torchvision import transforms
import torch
import torch.nn.functional as F
 
from huggingface_hub import hf_hub_download, hf_hub_url

In [10]:
#NOTE!! THIS TAKES SEVERAL HOURS TO RUN, SO ONLY RUN IF DESIRED
# !wget -nc https://huggingface.co/facebook/sapiens-seg-1b-torchscript/resolve/main/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2 -O models/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2

File ‘models/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2’ already there; not retrieving.
File ‘models/sapiens_1b_goliath_best_Sapiens-1B-Depth-Estimation-Single-Person-Inference-Results.jpggoliath_AP_640_torchscript.pt2’ already there; not retrieving.


In [11]:
class TaskType(Enum):
    DEPTH = "depth"
    NORMAL = "normal"
    SEG = "seg"
    POSE = "pose"
 
 
def download(url: str, filename: str):
    with open(filename, "wb") as f:
        with requests.get(url, stream=True) as r:
            r.raise_for_status()
            total = int(r.headers.get("content-length", 0))
 
            # tqdm has many interesting parameters. Feel free to experiment!
            tqdm_params = {
                "total": total,
                "miniters": 1,
                "unit": "B",
                "unit_scale": True,
                "unit_divisor": 1024,
            }
            with tqdm(**tqdm_params) as pb:
                for chunk in r.iter_content(chunk_size=8192):
                    pb.update(len(chunk))
                    f.write(chunk)
 
 
def download_hf_model(model_name: str, task_type: TaskType, model_dir: str = "models"):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
 
    path = model_dir + "/" + model_name
    if os.path.exists(path):
        return path
 
    print(f"Model {model_name} not found, downloading from Hugging Face Hub...")
 
    model_version = "_".join(model_name.split("_")[:2])
    repo_id = "facebook/sapiens"
    subdirectory = (
        f"sapiens_lite_host/torchscript/{task_type.value}/checkpoints/{model_version}"
    )
 
    # hf_hub_download(repo_id=repo_id, filename=model_name, subfolder=subdirectory, local_dir=model_dir)
    url = hf_hub_url(repo_id=repo_id, filename=model_name, subfolder=subdirectory)
    download(url, path)
    print("Model downloaded successfully to", path)
 
    return path

In [20]:
def create_preprocessor(input_size: tuple[int, int],
                        mean: List[float] = (0.485, 0.456, 0.406),
                        std: List[float] = (0.229, 0.224, 0.225)):
    return transforms.Compose([transforms.ToPILImage(),
                               transforms.Resize(input_size),
                               transforms.ToTensor(),
                               transforms.Normalize(mean=mean, std=std),
                               transforms.Lambda(lambda x: x.unsqueeze(0))
                               ])

In [21]:
class SapiensSegmentationType(Enum):
    SEGMENTATION_1B = (
        "sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2"
    )
 
 
random = np.random.RandomState(11)
 
# --------------------------------**** 28 Classes **** -------------------------------------------------
 
classes = [
    "Background",
    "Apparel",
    "Face Neck",
    "Hair",
    "Left Foot",
    "Left Hand",
    "Left Lower Arm",
    "Left Lower Leg",
    "Left Shoe",
    "Left Sock",
    "Left Upper Arm",
    "Left Upper Leg",
    "Lower Clothing",
    "Right Foot",
    "Right Hand",
    "Right Lower Arm",
    "Right Lower Leg",
    "Right Shoe",
    "Right Sock",
    "Right Upper Arm",
    "Right Upper Leg",
    "Torso",
    "Upper Clothing",
    "Lower Lip",
    "Upper Lip",
    "Lower Teeth",
    "Upper Teeth",
    "Tongue",
]
 
colors = random.randint(0, 255, (len(classes) - 1, 3))
colors = np.vstack((np.array([128, 128, 128]), colors)).astype(
    np.uint8
)  # Add background color
colors = colors[:, ::-1]

In [22]:
class SapiensSegmentation:
    def __init__(
        self,
        type: SapiensSegmentationType = SapiensSegmentationType.SEGMENTATION_1B,
        device: torch.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu"
        ),
        dtype: torch.dtype = torch.float32,
    ):
        path = download_hf_model(type.value, TaskType.SEG)
        model = torch.jit.load(path)
        model = model.eval()
        self.model = model.to(device).to(dtype)
        self.device = device
        self.dtype = dtype
        self.preprocessor = create_preprocessor(
            input_size=(1024, 768)
        )  # Only these values seem to work well
 
    def __call__(self, img: np.ndarray) -> np.ndarray:
        start = time.perf_counter()
 
        # Model expects BGR, but we change to RGB here because the preprocessor will switch the channels also
        input = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        tensor = self.preprocessor(input).to(self.device).to(self.dtype)
 
        with torch.inference_mode():
            results = self.model(tensor)
        segmentation_map = postprocess_segmentation(results, img.shape[:2])
 
        print(f"Segmentation inference took: {time.perf_counter() - start:.4f} seconds")
        return segmentation_map

In [24]:
def postprocess_segmentation(results: torch.Tensor, img_shape: tuple[int, int]) -> np.ndarray:
    result = results[0].cpu()
 
    # Upsample the result to the original image size
    logits = F.interpolate(result.unsqueeze(0), size=img_shape, mode="bilinear").squeeze(0)
 
    # Perform argmax to get the segmentation map
    segmentation_map = logits.argmax(dim=0, keepdim=True)
 
    # Covert to numpy array
    segmentation_map = segmentation_map.float().numpy().squeeze()
 
    return segmentation_map

In [29]:
os.mkdir('processed_data')

In [93]:
frames = ["frame"+str(x) for x in range(30)]
for frame in frames:
    
    img_path = "data/"+frame + ".jpg"
    img = cv2.imread(img_path)
 
    model_type = SapiensSegmentationType.SEGMENTATION_1B
    estimator = SapiensSegmentation(model_type)
 
    start = time.perf_counter()
    segs = estimator(img)

    mask = np.repeat((segs == 2)[:, :, np.newaxis], 3, axis = 2)
    image_of_interest = img * mask

    cv2.imwrite("processed_data/" + frame + ".png", image_of_interest)
 
    # Free the GPU memory
    del estimator  # Delete the model
    torch.cuda.empty_cache()  # Clear the GPU cache

Segmentation inference took: 105.3728 seconds
Time taken: 105.3883 seconds
Segmentation inference took: 110.9610 seconds
Time taken: 110.9866 seconds
Segmentation inference took: 110.1950 seconds
Time taken: 110.2143 seconds
Segmentation inference took: 115.4239 seconds
Time taken: 115.4402 seconds
Segmentation inference took: 120.9569 seconds
Time taken: 120.9769 seconds
Segmentation inference took: 113.1436 seconds
Time taken: 113.1633 seconds
Segmentation inference took: 109.7770 seconds
Time taken: 109.7883 seconds
Segmentation inference took: 105.8808 seconds
Time taken: 105.8906 seconds
Segmentation inference took: 105.7118 seconds
Time taken: 105.7193 seconds
Segmentation inference took: 108.3890 seconds
Time taken: 108.4090 seconds
Segmentation inference took: 109.7243 seconds
Time taken: 109.7397 seconds
Segmentation inference took: 107.5786 seconds
Time taken: 107.5857 seconds
Segmentation inference took: 108.6059 seconds
Time taken: 108.6140 seconds
Segmentation inference to