In [1]:
import pandas as pd
import numpy as np

In [None]:
def load_group_arrays(filename: str, group_name: str, grp_list: list[str], index_col: str = "DATE_V", encoding: str = "ISO-8859-1") -> dict[str, np.ndarray]:
    # Ensure DATE_V is included
    columns_to_load = [index_col] + grp_list
    df = pd.read_csv(filename, usecols=columns_to_load, encoding=encoding)
    # Optional: set DATE_V as index if needed
    df.set_index(index_col, inplace=True)

    # Build dictionary with suffix
    array_dict = {f"{group_name}_{col}": df[col].to_numpy() for col in grp_list}
    return array_dict

In [None]:
filename = "xl_dtv_all_snm_tst.csv"
group_name = "grp_tst"
grp_list =["Eliquist","Magtien"]
array_dict = load_group_arrays(filename, group_name , grp_list, index_col = "DATE_V", encoding = "ISO-8859-1")


In [None]:

array_dict["index_col"]

In [None]:
array_dict["grp_tst_Eliquist"]

In [None]:
grp_tst_Magtien

In [None]:
grp_tst_Magtien


# Thee folowing adds the index 

In [2]:
def load_group_arrays(filename: str, group_name: str, grp_list: list[str], index_col: str = "DATE_V", encoding: str = "ISO-8859-1") -> dict[str, np.ndarray]:
    columns_to_load = [index_col] + grp_list
    df = pd.read_csv(filename, usecols=columns_to_load, encoding=encoding)
    df.set_index(index_col, inplace=True)

    array_dict = {f"{group_name}_{col}": df[col].to_numpy() for col in grp_list}
    array_dict[f"{group_name}_{index_col}"] = df.index.to_numpy()  # include index as array
    return array_dict




In [3]:
filename = "xl_dtv_all_snm_tst.csv"
group_name = "grp_tst"
grp_list =["Eliquist","Magtien"]
array_dict = load_group_arrays(filename, group_name , grp_list, index_col = "DATE_V", encoding = "ISO-8859-1")


***array_dict*** Is a working dictionary where  dtv is the index and selected dat_cols in assemble of np.arrays
1. they all have same length and are stored in a ***master csv file array*** with ***"dtv"*** is the first col.
2. ***Time Alignment*** It ensures all arrays—whether supplements, motion, or biochemical—are synchronized to the same timeline. That’s essential for cross-correlation, lag analysis, and time-shifting.
3. Plotting & Visualization You’ll need DATE_V as the x-axis for any time-series plots. Without it, your arrays are just floating sequences.
4. Diagnostics & Gaps You can detect missing data, irregular sampling, or temporal drift by comparing ***"dtv"*** across groups.
5. Batch Operations If you’re merging arrays from different domains (e.g., Motion, Supplements, Sleep), having a shared **"dtv"*** lets you align them cleanly—even if some arrays are sparse.
.................array_dict[f"{group_name}_DATE_V"] = df.index.to_numpy()
---------------------------------------------------------------------
{
    "grp_tst_DATE_V": np.array([...]),
    "grp_tst_Eliquist": np.array([...]),
    "grp_tst_Magtien": np.array([...]),
    "grp_tst_GPLC": np.array([...])
}
-------------------------------------------------------------------------------------------

***Let’s scaffold a clean, modular TimeSeriesGroup class that fits your architecture and handles:***

✅ Selective column loading from a master CSV

✅ Index alignment via DATE_V

✅ Dictionary-style access to NumPy arrays

✅ Metadata logging for diagnostics and traceability

In [None]:
import pandas as pd
import numpy as np

class TimeSeriesGroup:
    def __init__(self, filename: str, group_name: str, columns: list[str], index_col: str = "DATE_V", encoding: str = "ISO-8859-1"):
        self.group_name = group_name
        self.index_col = index_col
        self.filename = filename
        self.encoding = encoding
        self.columns = columns
        self.array_dict = {}

        self._load_data()

    def _load_data(self):
        cols_to_load = [self.index_col] + self.columns
        df = pd.read_csv(self.filename, usecols=cols_to_load, encoding=self.encoding)
        df.set_index(self.index_col, inplace=True)

        # Store index as array
        self.array_dict[f"{self.group_name}_{self.index_col}"] = df.index.to_numpy()

        # Store each column as array
        for col in self.columns:
            arr = df[col].to_numpy()
            self.array_dict[f"{self.group_name}_{col}"] = arr

    def get_array(self, name: str) -> np.ndarray:
        return self.array_dict.get(name)

    def summary(self):
        print(f"📦 Group: {self.group_name}")
        for key, arr in self.array_dict.items():
            print(f"  {key}: shape={arr.shape}, dtype={arr.dtype}, nulls={np.isnan(arr).sum()}")
