In [None]:
import torch
import os
import numpy as np
import pandas as pd
import polars as pl
from torch.nn import functional as F
from byu.data.io import MultithreadOpencvTomogramLoader
from tqdm import tqdm
import cv2

tomo_loader = MultithreadOpencvTomogramLoader(8)

In [None]:
ret = {}
DATA_DIR = '/home/dangnh36/datasets/.comp/byu/external/tomogram/'

tomo_ids = sorted(os.listdir(DATA_DIR))
print('Number of tomos:', len(tomo_ids))
global_hist = None

for tomo_id in tqdm(tomo_ids):
    tomo_dir = os.path.join(DATA_DIR, tomo_id)
    tomo = tomo_loader.load(tomo_dir)
    ori_shape = tuple(tomo.shape)

    tomo = torch.from_numpy(tomo)
    assert tomo.dtype == torch.uint8
    spaced_shape = tuple(tomo.shape)
    print(tomo_id, ori_shape, '-->', spaced_shape, tomo.dtype)
    hist = torch.bincount(tomo.view(-1), minlength=256)
    assert hist.shape[0] == 256
    # print((tomo == 255).sum())
    # hist = hist / hist.sum()
    hist = hist.cpu().numpy()
    ret[tomo_id] = {
        'ori_shape': ori_shape,
        'spaced_shape': spaced_shape,
        'hist': hist.tolist()
    }

In [None]:
import json

with open('/home/dangnh36/datasets/.comp/byu/processed/', 'r') as f:
    ret = json.load(f)

In [None]:
accu = None
avg = None
all_hists = []

for i, (tomo_id, data) in enumerate(ret.items()):
    hist = np.array(data['hist'])
    norm_hist = hist / hist.sum()
    # print(norm_hist.sum())
    if i == 0:
        accu = hist.copy()
        avg = norm_hist.copy()
    else:
        accu += hist
        avg += norm_hist
    all_hists.append(norm_hist)


all_hists = np.array(all_hists)
# all_hists = all_hists.reshape((len(ret), -1, 8)).mean(axis = -1)

accu = accu / accu.sum()
avg  = avg / len(ret)
print(accu.sum(), avg.sum())
print([e.sum() for e in all_hists])

In [None]:
len(ret)

In [None]:
wrong2 = ['aba2014-02-21-14', 'mba2011-02-16-1', 'mba2011-02-16-103', 'mba2011-02-16-106', 'mba2011-02-16-108', 'mba2011-02-16-11', 'mba2011-02-16-111', 'mba2011-02-16-115', 'mba2011-02-16-116', 'mba2011-02-16-12', 'mba2011-02-16-122', 'mba2011-02-16-123', 'mba2011-02-16-129', 'mba2011-02-16-133', 'mba2011-02-16-139', 'mba2011-02-16-141', 'mba2011-02-16-143', 'mba2011-02-16-145', 'mba2011-02-16-147', 'mba2011-02-16-15', 'mba2011-02-16-150', 'mba2011-02-16-153', 'mba2011-02-16-155', 'mba2011-02-16-157', 'mba2011-02-16-160', 'mba2011-02-16-162', 'mba2011-02-16-17', 'mba2011-02-16-170', 'mba2011-02-16-173', 'mba2011-02-16-176', 'mba2011-02-16-19', 'mba2011-02-16-20', 'mba2011-02-16-23', 'mba2011-02-16-26', 'mba2011-02-16-27', 'mba2011-02-16-28', 'mba2011-02-16-29', 'mba2011-02-16-3', 'mba2011-02-16-30', 'mba2011-02-16-32', 'mba2011-02-16-33', 'mba2011-02-16-34', 'mba2011-02-16-35', 'mba2011-02-16-37', 'mba2011-02-16-40', 'mba2011-02-16-42', 'mba2011-02-16-46', 'mba2011-02-16-48', 'mba2011-02-16-52', 'mba2011-02-16-53', 'mba2011-02-16-55', 'mba2011-02-16-60', 'mba2011-02-16-64', 'mba2011-02-16-65', 'mba2011-02-16-67', 'mba2011-02-16-68', 'mba2011-02-16-71', 'mba2011-02-16-75', 'mba2011-02-16-79', 'mba2011-02-16-88', 'mba2011-02-16-90', 'mba2011-02-16-95']
len(wrong2)

In [None]:
np.percentile((all_hists[:, :50].sum(axis = 1) / all_hists[:, 50:].sum(axis = 1)), 99)

In [None]:
CUTOFF = 25

wrong_quantile_tomo_ids = sorted([list(ret.keys())[idx] for idx in np.where(all_hists[:, :CUTOFF].sum(axis=1) / all_hists[:, CUTOFF:].sum(axis = 1) > 0.04)[0]])
# wrong_quantile_tomo_ids = sorted([list(ret.keys())[idx] for idx in np.where(all_hists[:, -CUTOFF:].sum(axis=1) / all_hists[:, :-CUTOFF].sum(axis = 1) > 0.04)[0]])
print(len(wrong_quantile_tomo_ids))
print(wrong_quantile_tomo_ids)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import cv2

for i, tomo_id in enumerate(['mba2011-02-16-1', 'mba2011-02-16-106', 'mba2011-02-16-108', 'mba2011-02-16-11', 'mba2011-02-16-116', 'mba2011-02-16-12', 'mba2011-02-16-123', 'mba2011-02-16-129', 'mba2011-02-16-133', 'mba2011-02-16-139', 'mba2011-02-16-141', 'mba2011-02-16-145', 'mba2011-02-16-153', 'mba2011-02-16-155', 'mba2011-02-16-157', 'mba2011-02-16-160', 'mba2011-02-16-162', 'mba2011-02-16-17', 'mba2011-02-16-176', 'mba2011-02-16-19', 'mba2011-02-16-26', 'mba2011-02-16-27', 'mba2011-02-16-28', 'mba2011-02-16-29', 'mba2011-02-16-32', 'mba2011-02-16-33', 'mba2011-02-16-34', 'mba2011-02-16-40', 'mba2011-02-16-42', 'mba2011-02-16-46', 'mba2011-02-16-48', 'mba2011-02-16-53', 'mba2011-02-16-55', 'mba2011-02-16-60', 'mba2011-02-16-64', 'mba2011-02-16-65', 'mba2011-02-16-67', 'mba2011-02-16-71', 'mba2011-02-16-75', 'mba2011-02-16-79', 'mba2011-02-16-88', 'mba2011-02-16-90', 'mba2011-02-16-95']):
    print(i, tomo_id)
    
    hist = ret[tomo_id]['hist']
    img = cv2.imread(f'/home/dangnh36/datasets/.comp/byu/processed/external/tomogram/{tomo_id}/slice_0050.jpg')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for correct display

    # Create a 1-row, 2-column subplot
    fig, axs = plt.subplots(1, 2, figsize=(12, 4))

    # Histogram
    axs[0].bar(np.arange(0, 256), hist)
    axs[0].set_title('Histogram')

    # Image
    axs[1].imshow(img)
    axs[1].axis('off')
    axs[1].set_title('Slice Image')

    plt.suptitle(f'Tomogram ID: {tomo_id}')
    plt.tight_layout()
    plt.show()

    print('-----------------------\n')


In [None]:
too_light_candidates = ['mba2011-02-16-106', 'mba2011-02-16-108', 'mba2011-02-16-12', 'mba2011-02-16-139', 'mba2011-02-16-145', 'mba2011-02-16-153', 'mba2011-02-16-155', 'mba2011-02-16-162', 'mba2011-02-16-17', 'mba2011-02-16-176', 'mba2011-02-16-19', 'mba2011-02-16-26', 'mba2011-02-16-28', 'mba2011-02-16-33', 'mba2011-02-16-34', 'mba2011-02-16-42', 'mba2011-02-16-46', 'mba2011-02-16-48', 'mba2011-02-16-53', 'mba2011-02-16-60', 'mba2011-02-16-90', 'mba2012-01-12-11', 'mba2012-01-12-21', 'mba2012-01-13-23', 'mba2012-01-13-26', 'mba2012-04-24-20', 'mba2012-08-29-1', 'mba2012-08-29-2', 'mba2012-08-29-23', 'mba2012-08-29-9']

too_light = too_light_candidates[:-9]
print(too_light)

In [None]:
too_dark_candidates = ['mba2011-02-16-1', 'mba2011-02-16-108', 'mba2011-02-16-11', 'mba2011-02-16-116', 'mba2011-02-16-12', 'mba2011-02-16-123', 'mba2011-02-16-129', 'mba2011-02-16-133', 'mba2011-02-16-139', 'mba2011-02-16-141', 'mba2011-02-16-145', 'mba2011-02-16-153', 'mba2011-02-16-155', 'mba2011-02-16-157', 'mba2011-02-16-160', 'mba2011-02-16-162', 'mba2011-02-16-17', 'mba2011-02-16-176', 'mba2011-02-16-19', 'mba2011-02-16-26', 'mba2011-02-16-27', 'mba2011-02-16-28', 'mba2011-02-16-29', 'mba2011-02-16-32', 'mba2011-02-16-33', 'mba2011-02-16-34', 'mba2011-02-16-40', 'mba2011-02-16-53', 'mba2011-02-16-55', 'mba2011-02-16-60', 'mba2011-02-16-64', 'mba2011-02-16-65', 'mba2011-02-16-67', 'mba2011-02-16-71', 'mba2011-02-16-75', 'mba2011-02-16-79', 'mba2011-02-16-88', 'mba2011-02-16-90', 'mba2011-02-16-95', 'mba2011-03-24-6', 'mba2011-12-17-26', 'mba2012-01-13-12', 'mba2012-01-13-28', 'mba2012-01-13-29', 'mba2012-04-22-6', 'mba2012-04-24-17']

too_dark = too_dark_candidates[:39]
print(too_dark)

In [None]:
print(sorted(list(set(too_light).union(set(too_dark)))))

In [None]:
WRONG_QUANTILE_TOMO_IDS = ['mba2011-02-16-1', 'mba2011-02-16-106', 'mba2011-02-16-108', 'mba2011-02-16-11', 'mba2011-02-16-116', 'mba2011-02-16-12', 'mba2011-02-16-123', 'mba2011-02-16-129', 'mba2011-02-16-133', 'mba2011-02-16-139', 'mba2011-02-16-141', 'mba2011-02-16-145', 'mba2011-02-16-153', 'mba2011-02-16-155', 'mba2011-02-16-157', 'mba2011-02-16-160', 'mba2011-02-16-162', 'mba2011-02-16-17', 'mba2011-02-16-176', 'mba2011-02-16-19', 'mba2011-02-16-26', 'mba2011-02-16-27', 'mba2011-02-16-28', 'mba2011-02-16-29', 'mba2011-02-16-32', 'mba2011-02-16-33', 'mba2011-02-16-34', 'mba2011-02-16-40', 'mba2011-02-16-42', 'mba2011-02-16-46', 'mba2011-02-16-48', 'mba2011-02-16-53', 'mba2011-02-16-55', 'mba2011-02-16-60', 'mba2011-02-16-64', 'mba2011-02-16-65', 'mba2011-02-16-67', 'mba2011-02-16-71', 'mba2011-02-16-75', 'mba2011-02-16-79', 'mba2011-02-16-88', 'mba2011-02-16-90', 'mba2011-02-16-95']
len(WRONG_QUANTILE_TOMO_IDS)

In [None]:
WRONG_QUANTILE_TOMO_IDS[1]

In [None]:
# SRC_TOMO_DIR = '/home/dangnh36/datasets/.comp/byu/processed/external_wrong_quantile/tomogram/'
# SRC_META_DIR = '/home/dangnh36/datasets/.comp/byu/processed/external_wrong_quantile/meta/'

# DST_TOMO_DIR = '/home/dangnh36/datasets/.comp/byu/processed/external/tomogram/'
# DST_META_DIR = '/home/dangnh36/datasets/.comp/byu/processed/external/meta/'

# import shutil

# for tomo_id in WRONG_QUANTILE_TOMO_IDS[:]:
#     src_tomo_dir = os.path.join(SRC_TOMO_DIR, tomo_id)
#     assert os.path.isdir(src_tomo_dir)
#     dst_tomo_dir = os.path.join(DST_TOMO_DIR, tomo_id)
#     assert os.path.isdir(dst_tomo_dir)
#     shutil.rmtree(dst_tomo_dir)
#     shutil.move(src_tomo_dir, DST_TOMO_DIR)

#     src_meta_path = os.path.join(SRC_META_DIR, f'{tomo_id}.json')
#     assert os.path.isfile(src_meta_path)
#     dst_meta_path = os.path.join(DST_META_DIR, f'{tomo_id}.json')
#     os.remove(dst_meta_path)
#     shutil.move(src_meta_path, DST_META_DIR)