In [1]:
# coding=utf-8
# Copyright 2024 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import glob
import json
import numpy as np
import os
from omegaconf import OmegaConf
from PIL import Image
from tqdm import tqdm

from transformers import pipeline
import matplotlib
import torchvision.transforms as T
import torch

import pdb


def filter_paths(paths, filter_file=None):
    if filter_file is not None:
        filter_names = set(json.load(open(filter_file)))
        paths = [image for image in paths if os.path.basename(image) in filter_names]
    return paths

def make_folder(folder):
    if not os.path.exists(folder):
        os.makedirs(folder, exist_ok=True)

def get_depth_tensor(pipe, image):
    depth_tensor = pipe(image)["predicted_depth"]
    depth_tensor = torch.nn.functional.interpolate(
        depth_tensor.unsqueeze(1),
        size=image.size[::-1],
        mode="bilinear",
        align_corners=False,
    )[0]
    return depth_tensor

# def main(args):


2024-08-28 22:06:16.896715: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-28 22:06:16.914724: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-28 22:06:16.936569: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-28 22:06:16.943145: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-28 22:06:16.960081: I tensorflow/core/platform/cpu_feature_guar

In [5]:
import torchvision.transforms.v2 as transforms
from utils.dataset import process_frames
height, width = 512, 512
# dataset_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/open-images/val/hed/val_dataset.json"
# dataset_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/open-images/Human_body/openpose/val_dataset.json"
dataset_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/PascalVOC/VOC2012/depth_anything_v2_gray/val_dataset.json"
# dataset_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/PascalVOC/VOC2012/hed/val_dataset.json"
val_dataset = json.load(open(dataset_path))
image_paths = [line['original_image'] for line in val_dataset]
real_images = [Image.open(path).convert("RGB") for path in image_paths]

real_image_path = os.path.dirname(dataset_path)
generate_image_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/svd-train/output_final/output_depth_lora_joint_gray_rank64_nta_convfuse/eval"
generate_image_paths = [os.path.join(generate_image_path, line['file_name']) for line in val_dataset]

In [3]:
def preprocess_image(image):
    image = process_frames([image], height, width,  verbose = False, div = 8, rand_crop=False)[0]
    # image = ToTensor(image)
    # return F.center_crop(image, (256, 256))
    return image

processed_real_images = [preprocess_image(image) for image in real_images]

# torch.Size([10, 3, 256, 256])

fake_images = [Image.open(path).convert("RGB") for path in generate_image_paths]
processed_fake_images = [preprocess_image(image) for image in fake_images]

In [7]:
os.environ["http_proxy"]="http://127.0.0.1:7890"
os.environ["https_proxy"]="http://127.0.0.1:7890"

In [8]:
# Init pipeline
pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Small-hf")
cmap = matplotlib.colormaps.get_cmap('Spectral_r')

real_depth_tensors = []
fake_depth_tensors = []

config.json:   0%|          | 0.00/950 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/99.2M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/775 [00:00<?, ?B/s]

In [11]:
def depth_tensor_to_image(depth_tensor):
    depth_tensor = (depth_tensor - depth_tensor.min()) / (depth_tensor.max() - depth_tensor.min()) * 255.0
    depth_tensor = depth_tensor.to(torch.uint8)
    gray_depth = T.ToPILImage()(depth_tensor)
    return gray_depth

In [16]:

for real_image, fake_image in zip(tqdm(processed_real_images[len(real_depth_tensors):]), processed_fake_images[len(real_depth_tensors):]):
    # Predict pseudo label
    real_depth_tensor = get_depth_tensor(pipe, real_image)
    fake_depth_tensor = get_depth_tensor(pipe, fake_image)
    real_depth_tensors.append(real_depth_tensor)
    fake_depth_tensors.append(fake_depth_tensor)

real_depth_tensors = torch.stack(real_depth_tensors)
fake_depth_tensors = torch.stack(fake_depth_tensors)

torch.save(real_depth_tensors, os.path.join(real_image_path, "depth_tensor.pt"))
torch.save(fake_depth_tensors, os.path.join(generate_image_path, "depth_tensor.pt"))




# if __name__ == "__main__":
#     # conda run -n control python3 annotate_spatial.py --config_path configs/annotate_spatial.yaml
# 	parser = argparse.ArgumentParser(description="")
# 	parser.add_argument("--config_path", type=str, default = None)
# 	args = parser.parse_args()
# 	main(args)

100%|██████████| 594/594 [07:49<00:00,  1.26it/s]


In [2]:
import torchvision.transforms.v2 as transforms
from utils.dataset import process_frames
height, width = 512, 512
# dataset_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/open-images/val/hed/val_dataset.json"
# dataset_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/open-images/Human_body/openpose/val_dataset.json"
dataset_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/PascalVOC/VOC2012/depth_anything_v2_gray/val_dataset.json"
# dataset_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/PascalVOC/VOC2012/hed/val_dataset.json"
val_dataset = json.load(open(dataset_path))

real_image_path = os.path.dirname(dataset_path)
generate_image_path = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/readout_guidance/results/spatial/datv2_gray"
generate_image_paths = [os.path.join(generate_image_path, line['file_name']) for line in val_dataset]

def preprocess_image(image):
    image = process_frames([image], height, width,  verbose = False, div = 8, rand_crop=False)[0]
    # image = ToTensor(image)
    # return F.center_crop(image, (256, 256))
    return image

# torch.Size([10, 3, 256, 256])

fake_images = [Image.open(path).convert("RGB") for path in generate_image_paths]
processed_fake_images = [preprocess_image(image) for image in fake_images]


pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Small-hf")
cmap = matplotlib.colormaps.get_cmap('Spectral_r')
fake_depth_tensors = []

In [3]:
for fake_image in tqdm(processed_fake_images[len(fake_depth_tensors):]):
    # Predict pseudo label
    fake_depth_tensor = get_depth_tensor(pipe, fake_image)
    fake_depth_tensors.append(fake_depth_tensor)

fake_depth_tensors = torch.stack(fake_depth_tensors)

100%|██████████| 1000/1000 [06:39<00:00,  2.50it/s]


In [4]:
fake_depth_tensors.shape

torch.Size([1000, 1, 512, 512])

In [5]:

# torch.save(real_depth_tensors, os.path.join(real_image_path, "depth_tensor.pt"))
torch.save(fake_depth_tensors, os.path.join(generate_image_path, "depth_tensor.pt"))


In [8]:
real_depth_tensors = torch.load(os.path.join(real_image_path, "depth_tensor.pt"))
fake_depth_tensors = torch.load(os.path.join(generate_image_path, "depth_tensor.pt"))

In [11]:
import sys
# sys.path.append("data/deps/ControlNet")
sys.path.append("/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/Marigold")
from tabulate import tabulate
from src.util import metric
from src.util.alignment import (
    align_depth_least_square,
    depth2disparity,
    disparity2depth,
)

from src.util.metric import MetricTracker

eval_metrics = [
    "abs_relative_difference",
    "squared_relative_difference",
    "rmse_linear",
    "rmse_log",
    "log10",
    "delta1_acc",
    "delta2_acc",
    "delta3_acc",
    "i_rmse",
    "silog_rmse",
]

metric_funcs = [getattr(metric, _met) for _met in eval_metrics]

metric_tracker = MetricTracker(*[m.__name__ for m in metric_funcs])
metric_tracker.reset()

output_dir = generate_image_path

per_sample_filename = os.path.join(output_dir, "per_sample_metrics.csv")
# write title
file_names = [item["file_name"].split(".")[0] for item in val_dataset]

with open(per_sample_filename, "w+") as f:
    f.write("filename,")
    f.write(",".join([m.__name__ for m in metric_funcs]))
    f.write("\n")

device = "cuda"

In [13]:
for real_depth_tensor, fake_depth_tensor, pred_name in zip(tqdm(real_depth_tensors), fake_depth_tensors, file_names):
    depth_raw = real_depth_tensor.clone().numpy()
    depth_pred = fake_depth_tensor.clone().numpy()
    depth_raw += 1
    valid_mask = np.ones_like(depth_raw).astype(bool)
    depth_pred, scale, shift = align_depth_least_square(
        gt_arr=depth_raw,
        pred_arr=depth_pred,
        valid_mask_arr=valid_mask,
        return_scale_shift=True,
        # max_resolution=alignment_max_res,
    )

    # clip to d > 0 for evaluation
    depth_pred = np.clip(depth_pred, a_min=1e-6, a_max=None)

    # Evaluate (using CUDA if available)
    sample_metric = []
    depth_pred_ts = torch.from_numpy(depth_pred).to(device)
    depth_raw_ts = torch.from_numpy(depth_raw).to(device)
    valid_mask_ts = torch.from_numpy(valid_mask).to(device)

    for met_func in metric_funcs:
        _metric_name = met_func.__name__
        _metric = met_func(depth_pred_ts, depth_raw_ts, valid_mask_ts).item()
        sample_metric.append(_metric.__str__())
        metric_tracker.update(_metric_name, _metric)

    # Save per-sample metric
    with open(per_sample_filename, "a+") as f:
        f.write(pred_name + ",")
        f.write(",".join(sample_metric))
        f.write("\n")

100%|██████████| 1000/1000 [00:22<00:00, 45.22it/s]


In [14]:
# -------------------- Save metrics to file --------------------
eval_text = f"Evaluation metrics:\n\
of predictions: {generate_image_path}\n\
on dataset: {real_image_path}\n"

eval_text += tabulate(
    [metric_tracker.result().keys(), metric_tracker.result().values()]
)

metrics_filename = "eval_metrics"
metrics_filename += ".txt"

_save_to = os.path.join(output_dir, metrics_filename)
with open(_save_to, "w+") as f:
    f.write(eval_text)
    print(f"Evaluation metrics saved to {_save_to}")

Evaluation metrics saved to /home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/readout_guidance/results/spatial/datv2_gray/eval_metrics.txt


In [14]:
pred_name = val_dataset[0]["file_name"].split(".")[0]
depth_raw = real_depth_tensors[0].clone().numpy()
depth_raw += 1
depth_pred = fake_depth_tensors[0].clone().numpy()
valid_mask = np.ones_like(depth_raw).astype(bool)
depth_pred, scale, shift = align_depth_least_square(
    gt_arr=depth_raw,
    pred_arr=depth_pred,
    valid_mask_arr=valid_mask,
    return_scale_shift=True,
    # max_resolution=alignment_max_res,
)

depth_pred = np.clip(depth_pred, a_min=1, a_max=None)

# Evaluate (using CUDA if available)
sample_metric = []
device = "cuda"
depth_pred_ts = torch.from_numpy(depth_pred).to(device)
depth_raw_ts = torch.from_numpy(depth_raw).to(device)
valid_mask_ts = torch.from_numpy(valid_mask).to(device)
for met_func in metric_funcs:
    _metric_name = met_func.__name__
    _metric = met_func(depth_pred_ts, depth_raw_ts, valid_mask_ts).item()
    sample_metric.append(_metric.__str__())
    metric_tracker.update(_metric_name, _metric)

# Save per-sample metric
with open(per_sample_filename, "a+") as f:
    f.write(pred_name + ",")
    f.write(",".join(sample_metric))
    f.write("\n")

In [15]:
import json
a = json.load(open("/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/MSRVTT/MSR_VTT.json"))

dataset_dict = dict()
for ann in a['annotations']:
    if ann["image_id"] not in dataset_dict:
        dataset_dict[ann["image_id"]] = []
    dataset_dict[ann["image_id"]].append(ann["caption"])

dataset_list = []
dataset_key_list = sorted(list(dataset_dict.keys()), key=lambda x: int(x.strip("video")))

for data in dataset_key_list:
    video_id = int(data.strip("video"))
    dataset_list.append({"video_id": video_id, "text": dataset_dict[data]})


with open("/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/MSRVTT/captions.json", "w") as f:
    json.dump(dataset_dict, f)

from utils.dataset import MSRVTT

video_folder = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/MSRVTT/videos"
caption_file = "/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/MSRVTT/captions.json"
dataset = MSRVTT(video_folder, caption_file,)

In [None]:
import json
with open("/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/CelebA/dataset_celeba_wild_idpairs_fix.json", "r") as f:
    dataset = json.load(f)


In [17]:
dataset[0]

{'image0': '/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/CelebA/img_celeba/119614.jpg',
 'image1': '/home/bml/storage/mnt/v-95c5b44cfcff4e6c/org/data_lxr/data/CelebA/img_celeba/015280.jpg',
 'person_id': 5195,
 'text0': 'arafed woman in a purple dress and a tiara smiling',
 'text1': 'smiling woman with long brown hair and white shirt posing for a picture'}

In [12]:
#!/usr/bin/env python3

# Allow direct execution
import os
import sys

# sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import yt_dlp
import yt_dlp.options

create_parser = yt_dlp.options.create_parser

def parse_patched_options(opts):
    patched_parser = create_parser()
    patched_parser.defaults.update({
        'ignoreerrors': False,
        'retries': 0,
        'fragment_retries': 0,
        'extract_flat': False,
        'concat_playlist': 'never',
    })
    yt_dlp.options.create_parser = lambda: patched_parser
    try:
        return yt_dlp.parse_options(opts)
    finally:
        yt_dlp.options.create_parser = create_parser


default_opts = parse_patched_options([]).ydl_opts


def cli_to_api(opts, cli_defaults=False):
    opts = (yt_dlp.parse_options if cli_defaults else parse_patched_options)(opts).ydl_opts

    diff = {k: v for k, v in opts.items() if default_opts[k] != v}
    if 'postprocessors' in diff:
        diff['postprocessors'] = [pp for pp in diff['postprocessors']
                                  if pp not in default_opts['postprocessors']]
    return diff



from pprint import pprint
# --username oauth2 --password ''
#  --extractor-args "youtube:player_client=default,-web_creator"
args = ["--username", "oauth2" ,"--password", ""]
print('\nThe arguments passed translate to:\n')
pprint(cli_to_api(args))
print('\nCombining these with the CLI defaults gives:\n')
pprint(cli_to_api(args, True))


The arguments passed translate to:

{'password': '', 'username': 'oauth2'}

Combining these with the CLI defaults gives:

{'extract_flat': 'discard_in_playlist',
 'fragment_retries': 10,
 'ignoreerrors': 'only_download',
 'password': '',
 'postprocessors': [{'key': 'FFmpegConcat',
                     'only_multi_video': True,
                     'when': 'playlist'}],
 'retries': 10,
 'username': 'oauth2'}


In [9]:
from tabulate import tabulate
metrics = {"a":"a", "b":"b", "c":"c"}

tabulate(zip(metrics.keys(), metrics.values()))

'-  -\na  a\nb  b\nc  c\n-  -'