In [8]:
import pandas as pd
import numpy as np

# Create array_dict from an array in a .csv file

## Read the file with protection from oddball encoders.

In [25]:
def safe_csv_read(filename, usecols=None, index_col=None):
    ## Read the file with protection from oddball encoders.
    try:
        return pd.read_csv(filename, usecols=usecols, index_col=index_col)
    except UnicodeDecodeError:
        return pd.read_csv(filename, usecols=usecols, index_col=index_col, encoding="ISO-8859-1")


## *Load the array using* ***safe_csv_read()*** *set index,convert data list to np.arrays & build* ***array_dict***

In [39]:
def load_group_arrays(filename: str, group_name: str, grp_list: list[str], index_col: str = "dtv") -> dict[str, np.ndarray]:
    ## *Load the array using* ***safe_csv_read()*** *set index,convert data list to np.arrays & build* ***array_dict***
    
    columns_to_load = [index_col] + grp_list
    df = safe_csv_read(filename, usecols=columns_to_load)
    # before safe_csv_read df = pd.read_csv(filename, usecols=columns_to_load, encoding=encoding)
    df.set_index(index_col, inplace=True)

    array_dict = {f"{group_name}_{col}": df[col].to_numpy() for col in grp_list}
    array_dict[f"{group_name}_{index_col}"] = df.index.to_numpy()  # include index as array
    return array_dict

## *Set* ***filename, group_name, grp_list*** then *call* ***load_group_arrays()*** *to compute np.arrays & build* ***array_dict***

In [40]:
## *Set* ***filename, group_name, grp_list*** then *call* ***load_group_arrays()*** *to compute np.arrays & build* ***array_dict***
filename = "xl_dtv_all_snm_tst.csv"
group_name = "grp_tst"
grp_list =["Eliquist","Magtien"]
# before group array   array_dict = load_group_arrays(filename, group_name , grp_list, index_col = "DATE_V", encoding = "ISO-8859-1")
array_dict = load_group_arrays(filename, group_name , grp_list, index_col = "dtv")

In [48]:
array_dict

{'grp_tst_Eliquist': array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1]),
 'grp_tst_Magtien': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]),
 'grp_tst_dtv': array([45849, 45850, 45851, 45852, 45853, 45854, 45855, 45856, 45857,
        45858])}

In [50]:
array_dict['grp_tst_dtv'][5]

np.int64(45854)

In [54]:
array_dict['grp_tst_Magtien'][9]

np.int64(10)

In [58]:
dtv_array = array_dict["grp_tst_dtv"]
grp_tst_dtv = 45852

# Find the index position
row_index = np.where(dtv_array == grp_tst_dtv)[0]

if row_index.size > 0:
    print(f"✅ Row index for {grp_tst_dtv}: {row_index[0]}")
else:
    print(f"⚠️ Date {grp_tst_dtv} not found in 'grp_tst_dtv'")


✅ Row index for 45852: 3


In [59]:
array_dict['grp_tst_Magtien'][row_index]

array([4])

# Discriptions of ***array_dict***

***array_dict*** Is a working dictionary where  dtv is the index and selected dat_cols which are np.arrays
1. dtv is a col of integers representing the number of days since jan 1 1900 and ends with today.
2. dtv is used as the index for all dat_cols
3. all dat_cols have cls attributes common to all the values in that dat-col
5. all dat_cols have cls methods that instructions to certain statistics of the dat_col values.
6. dat_col attributes and methods are read by a . behind dat_col ie dat_col.method or attribute. ***see*** *https://copilot.microsoft.com/shares/pages/MZyaioewJp2wR4mSwfShP* for examples.
7. all have same length as the ***dtv col*** and are stored in a ***xl csv file array*** with ***"dtv"*** as the first col and str headers as the first row.
8. ***Time Alignment*** It ensures all arrays—whether supplements, motion, or biochemical—are synchronized to the same timeline. That’s essential for cross-correlation, lag analysis, and time-shifting.
9. Plotting & Visualization You’ll need DATE_V as the x-axis for any time-series plots. Without it, your arrays are just floating sequences.
10. Diagnostics & Gaps You can detect missing data, irregular sampling, or temporal drift by comparing ***"dtv"*** across groups.
11. Batch Operations If you’re merging arrays from different domains (e.g., Motion, Supplements, Sleep), having a shared **"dtv"*** lets you align them cleanly—even if some arrays are sparse.
.................array_dict[f"{group_name}_DATE_V"] = df.index.to_numpy()
---------------------------------------------------------------------
{
    "grp_tst_DATE_V": np.array([...]),
    "grp_tst_Eliquist": np.array([...]),
    "grp_tst_Magtien": np.array([...]),
    "grp_tst_GPLC": np.array([...])
}
-------------------------------------------------------------------------------------------

***Let’s scaffold a clean, modular TimeSeriesGroup class that fits your architecture and handles:***

✅ Selective column loading from a master CSV

✅ Index alignment via DATE_V

✅ Dictionary-style access to NumPy arrays

✅ Metadata logging for diagnostics and traceability

In [None]:
import pandas as pd
import numpy as np

class TimeSeriesGroup:
    def __init__(self, filename: str, group_name: str, columns: list[str], index_col: str = "DATE_V", encoding: str = "ISO-8859-1"):
        self.group_name = group_name
        self.index_col = index_col
        self.filename = filename
        self.encoding = encoding
        self.columns = columns
        self.array_dict = {}

        self._load_data()

    def _load_data(self):
        cols_to_load = [self.index_col] + self.columns
        df = pd.read_csv(self.filename, usecols=cols_to_load, encoding=self.encoding)
        df.set_index(self.index_col, inplace=True)

        # Store index as array
        self.array_dict[f"{self.group_name}_{self.index_col}"] = df.index.to_numpy()

        # Store each column as array
        for col in self.columns:
            arr = df[col].to_numpy()
            self.array_dict[f"{self.group_name}_{col}"] = arr

    def get_array(self, name: str) -> np.ndarray:
        return self.array_dict.get(name)

    def summary(self):
        print(f"📦 Group: {self.group_name}")
        for key, arr in self.array_dict.items():
            print(f"  {key}: shape={arr.shape}, dtype={arr.dtype}, nulls={np.isnan(arr).sum()}")


# Installing an input system to load data manually

## *Load the array using* ***safe_csv_read()*** *set index,convert data list to np.arrays & build* ***array_dict***

In [None]:
import pandas as pd
import numpy as np

# Define the new day and values
new_day = "2025-08-30"
new_values = {
    "Eliquist": 5.0,
    "Magtien": 3.2,
    "GPLC": 1.8
}

# Create a new DataFrame with dtv as index
df_new = pd.DataFrame(new_values, index=[new_day])
df_new.index.name = "dtv"

# Reindex to match master dtv timeline
master_dtv = array_dict["Motion_dtv"]
df_new = df_new.reindex(master_dtv)

# Append each column to array_dict
for col in new_values:
    array_dict[f"Motion_{col}"] = df_new[col].to_numpy()
