In [1]:
import os
import re
from dataclasses import dataclass
from glob import glob
from pathlib import Path
from typing import Iterable

import numpy as np
import xarray as xr
import matplotlib.pyplot as plt

In [2]:
basedir = Path("../../group/statconsult/wavelet_extraction/output/")
subdirs = ["RoughCut", "SmoothCut", "SinglePiece_Hanging"]

In [3]:
pattern = re.compile("(\d+)")

In [4]:
# This was for local development after a session expired and I needed to
# keep the directory structure for experiments.
# with open("../filetree.txt", "r") as f:
#     dirs = f.readlines()
# vdir = {
#     dirs[0].strip(): [_.strip() for _ in dirs[1:151]],
#     dirs[151].strip(): [_.strip() for _ in dirs[152:302]],
#     dirs[302].strip(): [_.strip() for _ in dirs[302:]],
# }

In [5]:
@dataclass
class NameComponents:
    cut_state: str
    torque: float
    volts: float
    component: str


def load_data(file_name: str, file_path: Path) -> np.ndarray:
    return np.random.normal(size=(112, 121))


def get_nums(file_name: str, pattern: re.Pattern) -> tuple[float, float]:
    regex = pattern.findall(file_name)
    if len(regex) == 1:
        return (0.0, float(regex[-1]))
    elif len(regex) == 2:
        return (float(regex[0]), float(regex[1]))
    else:
        raise ValueError("No numbers found in pattern")


def find_all_components(file_name: str, list_dir: list[str]) -> list[str]:
    """Pattern matches file_name to all files in dir."""
    filtered_names = filter(lambda x: file_name in x, list_dir)
    return list(filtered_names)


def read_component_files(files: list[str], file_path: Path) -> np.ndarray:
    data = [load_data(file, file_path) for file in files]
    return np.concatenate([d.reshape(1, *d.shape) for d in data], axis=0)


def name_parser(
    file_name: str, cut_state: str | None = None
) -> tuple[str, NameComponents]:
    cut_state = "none" if cut_state is None else cut_state
    name_stem = file_name[:-5]
    comp_ext = file_name.split("_")[-1]
    component, _ = comp_ext.split(".")
    torque, volts = get_nums(file_name, pattern)
    return name_stem, NameComponents(
        cut_state=cut_state, torque=torque, volts=volts, component=component
    )


def get_torque_and_volt_vals(data_dir: Path) -> tuple[list, list]:
    files = glob("*.npy", root_dir=data_dir)
    torques = []
    volts = []
    for f in files:
        _, name = name_parser(f)
        if name.torque not in torques:
            torques.append(name.torque)
        if name.volts not in volts:
            volts.append(name.volts)
    return sorted(torques), sorted(volts)


def extract_data(data_dir: Path) -> np.ndarray:
    copied_dir = glob("*.npy", root_dir=data_dir)
    torques, volts = get_torque_and_volt_vals(data_dir)
    data_shape = load_data(copied_dir[0], data_dir).shape

    data = np.empty((len(torques), len(volts), 3, *data_shape))

    while len(copied_dir) > 0:
        files = find_all_components(copied_dir[0][:-5], copied_dir)
        _, name = name_parser(files[0])
        torque_idx = torques.index(name.torque)
        volts_idx = volts.index(name.volts)
        data[torque_idx, volts_idx] = read_component_files(files, data_dir)
        for file in files:
            file_index = copied_dir.index(file)
            copied_dir.pop(file_index)

    return data

In [6]:
data_labels = ["rough", "smooth", "uncut"]

for j in range(3):
    data_dir = os.path.join(basedir, subdirs[j])
    data = extract_data(data_dir)
    np.save(f"{data_labels[j]}_array.npy", data)


In [7]:
!cp rough_array.npy /scratch/group/statconsult/wavelet_extraction/output/
!cp smooth_array.npy /scratch/group/statconsult/wavelet_extraction/output/
!cp uncut_array.npy /scratch/group/statconsult/wavelet_extraction/output/

cp: cannot create regular file '/scratch/group/statconsult/wavelet_extraction/output/rough_array.npy': Permission denied
cp: cannot create regular file '/scratch/group/statconsult/wavelet_extraction/output/smooth_array.npy': Permission denied
cp: cannot create regular file '/scratch/group/statconsult/wavelet_extraction/output/uncut_array.npy': Permission denied
