In [None]:

import numpy as np
import matplotlib.pyplot as plt

import json
from tqdm.notebook import tqdm

from scipy import stats

import glob

from combra import data, angles
import os

import glob
import multiprocessing

import numpy as np
from pathlib import Path
from mpire import WorkerPool
from skimage import io, color, filters, morphology, util
import cv2
import re
import pyarrow as pa
import pyarrow.parquet as pq
from tqdm import tqdm

from combra import data, angles, mvee

In [None]:
# images_folder_path='./data/o_bc_left_4x_1536_1024x1024_N360'

# image_hdf_path = './data/san_256x256_N100_000.h5'
image_hdf_path = './data/san_512x512_N100_000.h5'
images_folder_path=None

dataset = data.PobeditDataset(
                              images_folder_path=images_folder_path,
                              image_hdf_path = image_hdf_path,
                              max_images_num_per_class=10, hdf_images_threshold=11)

In [None]:


types_dict = {
    "Ultra_Co11": "средние зерна",
    "Ultra_Co25": "мелкие зерна",
    "Ultra_Co8": "средне-мелкие зерна",
    "Ultra_Co6_2": "крупные зерна",
    "Ultra_Co15": "средне-мелкие зерна",
}

out = dataset.generate_angles(
    save_path="san_512x512_N100_000",   
    types_dict=types_dict,
    step=[1,2,3,4,5],                     
    workers=20,
    angles_tol=3,               
)

print("Angles outputs:", out)


In [None]:
import pyarrow.parquet as pq
import pyarrow as pa
from pathlib import Path

# Class name mapping
NAME_MAP = {
    "class_0": "Ultra_Co25",
    "class_1": "Ultra_Co11",
    "class_2": "Ultra_Co6_2",
}

def update_meta_names(pq_path):
    """Update meta.name in parquet file, reading only meta column for the mapping."""
    # Read only meta column to get names
    meta_table = pq.read_table(pq_path, columns=["meta"])
    meta_list = meta_table["meta"].to_pylist()
    
    # Check if any names need updating
    needs_update = any(m["name"] in NAME_MAP for m in meta_list)
    if not needs_update:
        return False
    
    # Now read full file and update
    table = pq.read_table(pq_path)
    
    # Update names in meta
    new_meta = []
    for m in meta_list:
        m_copy = dict(m)
        if m_copy["name"] in NAME_MAP:
            m_copy["name"] = NAME_MAP[m_copy["name"]]
        new_meta.append(m_copy)
    
    # Replace meta column
    meta_array = pa.array(new_meta, type=table.schema.field("meta").type)
    col_idx = table.schema.get_field_index("meta")
    table = table.set_column(col_idx, "meta", meta_array)
    
    # Write back
    pq.write_table(table, pq_path)
    return True

# Folders to process
folders = [
    Path("./san_512x512_N100_000"),
    Path("./san_256x256_N100_000"),
]

for folder in folders:
    parquets = sorted(folder.glob("*.parquet"))
    print(f"\nProcessing {len(parquets)} files in {folder}")
    
    for pq_path in parquets:
        updated = update_meta_names(pq_path)
        status = "Updated" if updated else "Skipped (no matching names)"
        print(f"  {status}: {pq_path.name}")

print("\nDone!")


In [None]:
%%time
# ---- read parquet and plot ----
# in_path = "./san_512x512_N100_000/angles_n100_step5.parquet"
# in_path = "./san_512x512_N100_000/angles_n1000_step5.parquet"
in_path = "./san_256x256_N100_000/angles_n10000.parquet"

# "Ultra_Co25/Ultra_Co25-001_angle_270.jpeg", 0

# "Ultra_Co11/Ultra_Co11-100_angle_0.jpeg", 1

# "Ultra_Co6_2/Ultra_Co6_2-001_angle_0.jpeg", 2

# names_dict = {'Ultra_Co11': 'средние зерна',
#               'Ultra_Co25': 'мелкие зерна',
#               'Ultra_Co8': 'средне-мелкие зерна',
#               'Ultra_Co6_2': 'крупные зерна',
#               'Ultra_Co15': 'средне-мелкие зерна'}


angles.angles_plot_base(parquet_path=in_path, N=10, M=7, font_size=20, scatter_size=5,  step=1)


In [None]:
%%time
from pathlib import Path
import pyarrow.parquet as pq
import matplotlib.pyplot as plt
from tqdm import tqdm

# folders with parquet files
in_dirs = [
    Path("./san_256x256_N100_000"),
    Path("./san_512x512_N100_000"),
]

for in_dir in in_dirs:
    out_dir = in_dir / "plots"
    out_dir.mkdir(parents=True, exist_ok=True)

    # find parquet files (file-by-file)
    parquets = sorted(in_dir.glob("*.parquet"))
    print(f"Found {len(parquets)} parquet files in {in_dir.resolve()}")

    for in_path in tqdm(parquets, desc=in_dir.name):
        # read one file
        table = pq.read_table(in_path)
        rows = table.to_pydict()

        # use file stem as title/name
        save_name = in_path.stem

        # plot
        angles.angles_plot_base(
            rows,
            save_name=save_name,
            N=10,
            M=7,
            save=False,
            indices=None,
            font_size=20,
            scatter_size=20,
        )

        # force a clean title from filename
        plt.title(save_name)

        # save
        out_path = out_dir / f"{save_name}.png"
        plt.tight_layout()
        plt.savefig(out_path, dpi=200, bbox_inches="tight")
        plt.close()

    print(f"Saved plots to: {out_dir.resolve()}")


In [None]:
angles.angles_plot_base(rows, save_name="biba", N=10, M=7, save=False, indices=None, font_size=20, scatter_size=20)

In [None]:
mvee.plot_beam_base(rows,save_name='biba',step=5,N=7,M=7,  save=False,indices=None, font_size=20,scatter_size=20)