In [4]:
import scipy.io
import torch
from thoi.measures.gaussian_copula import multi_order_measures, nplets_measures
from thoi.heuristics import simulated_annealing, greedy, simulated_annealing_multi_order
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from functools import partial
import time
device = "cuda" if torch.cuda.is_available() else "cpu"

Helper Functions

In [5]:
def preprocess(array):
    reshaped_data = np.stack(array[0], axis=0)
    reshaped_data = reshaped_data.transpose(0, 2, 1)
    mean = np.mean(reshaped_data, axis=1, keepdims=True)
    std = np.std(reshaped_data, axis=1, keepdims=True)
    reshaped_normalized = (reshaped_data - mean) / std
    return reshaped_normalized

def print_time(t_i, t_f):
    elapsed_time_seconds = t_f - t_i
    hours = int(elapsed_time_seconds // 3600)
    minutes = int((elapsed_time_seconds % 3600) // 60)
    seconds = int(elapsed_time_seconds % 60)
    print("Elapsed time: {:02d}:{:02d}:{:02d}".format(hours, minutes, seconds))

Data

In [9]:
path_anesthesia = "data_ts_Anesthesia_Cleaned.mat"
data_anesthesia = scipy.io.loadmat(path_anesthesia)
ts_aw = data_anesthesia["ts_aw"]
ts_keta = data_anesthesia["ts_keta"]
ts_lpp = data_anesthesia["ts_lpp"]
ts_dpp = data_anesthesia["ts_dpp"]
ts_selv2 = data_anesthesia["ts_selv2"]
ts_selv4 = data_anesthesia["ts_selv4"]

ts_aw = preprocess(data_anesthesia["ts_aw"])
ts_keta = preprocess(data_anesthesia["ts_keta"])
ts_lpp = preprocess(data_anesthesia["ts_lpp"])
ts_dpp = preprocess(data_anesthesia["ts_dpp"])
ts_selv2 = preprocess(data_anesthesia["ts_selv2"])
ts_selv4 = preprocess(data_anesthesia["ts_selv4"])

print(ts_aw.shape,   "\n",
      ts_selv2.shape,"\n",
      ts_selv4.shape,"\n",
      ts_lpp.shape,  "\n",
      ts_dpp.shape,  "\n",
      ts_keta.shape)

(24, 500, 82) 
 (18, 500, 82) 
 (11, 500, 82) 
 (21, 500, 82) 
 (23, 500, 82) 
 (22, 500, 82)


This is the function to minimize with Simmulated Annealing. We feed it to : *simulated_annealing(metric=cohen_d)*. 

It takes the obtained metrics (DTC, TC, O and S) for each batch and calculates Cohen's d between awake and All the other states in terms of O-Information.

In [7]:

def weighted_sum(batched_measures):
    """
    Weighted sum of Cohen's d values across different altered states.

    Args:
        batched_measures (torch.Tensor): Shape (batch_size, S, 4), where:
            - batch_size: Number of n-plets in the batch.
            - S: Number of subjects.
            - 4: Measures (TC, DTC, O, S).

    Returns:
        torch.Tensor: Weighted sum of Cohen's d values (batch_size,).
    """
    # Drug groups' sizes and cumulative sums. Awake has 24 subjects (check previous cell)
    sizes = torch.tensor(
        [24, 22, 21, 23, 18, 11], dtype=torch.float32, device=batched_measures.device
    )
    cumsum = torch.cumsum(
        sizes, dim=0
    ).long()  
    weights = sizes[1:] / sizes[1:].sum()

    # Awake group (first 24 subjects)
    o_Awake = batched_measures[:, :24, 2]  # Extract O-information for awake state

    mean_Awake = o_Awake.mean(dim=1)
    std_Awake = o_Awake.std(dim=1, unbiased=True)
    F = torch.zeros(
        batched_measures.shape[0], device=batched_measures.device
    )

    start = 24  # Start index after awake
    for w, end in zip(weights, cumsum[1:]):  # Skip the first (awake) weight
        o_Drug = batched_measures[:, start:end, 2]
        mean_Drug = o_Drug.mean(dim=1)
        std_Drug = o_Drug.std(dim=1, unbiased=True)
        pooled_std = torch.sqrt(
            ((o_Awake.size(1) - 1) * std_Awake**2 + (o_Drug.size(1) - 1) * std_Drug**2)
            / (o_Awake.size(1) + o_Drug.size(1) - 2)
        )
        cohen_d = (mean_Drug - mean_Awake) / pooled_std
        F += w * cohen_d # w is the weight corresponding to this state, more subjects -> higher w
        start = int(end) 
    return F

In [None]:
n_repeats = 2 # ideally closer to 100
batch_size = 1000
X = torch.tensor(np.vstack([ts_aw, ts_keta, ts_lpp, ts_dpp, ts_selv2, ts_selv4])) # we feed the states of interest
T = [X.shape[1]] * X.shape[0]

cohen_list = []
t_i_general = time.time()
for order in range(2, 10):
    t_i = time.time()
    print("order", order, "batch_size:", batch_size)
    min_nplet, min_scores = simulated_annealing(
        X=X,
        order=order,
        device=device,
        T=T,
        largest=False,
        metric=weighted_sum, # Pass the custom metric
        repeat=n_repeats,
        batch_size=batch_size,
    )
    t_f = time.time()
    print(f"Min done, order {order}")
    print_time(t_i, t_f)
    a_ = min_nplet[min_scores.argmin().item()].detach().cpu().tolist()
    a_.sort()
    b_ = min_scores[min_scores.argmin().item()].detach().cpu().tolist()
    cohen_list.append(["min", order, a_, b_])
    torch.cuda.empty_cache()
    t_i = time.time()
    max_nplet, max_scores = simulated_annealing(
        X=X,
        order=order,
        device=device,
        T=T,
        largest=True,
        metric=weighted_sum,  # Pass the custom metric
        repeat=n_repeats,
        batch_size=batch_size,
    )
    t_f = time.time()
    print(f"Max done, order: {order}")
    print_time(t_i, t_f)
    a_ = max_nplet[max_scores.argmax().item()].detach().cpu().tolist()
    a_.sort()
    b_ = max_scores[max_scores.argmax().item()].detach().cpu().tolist()
    cohen_list.append(["max", order, a_, b_])
    torch.cuda.empty_cache()
    print("Total elapsed time:")
    print_time(t_i_general, t_f)

    cols = ["task", "order", "best_nplet", "best_score"]
    cohen_df = pd.DataFrame(cohen_list, columns=cols)
    cohen_df.to_csv(f"weighted_sum_df.csv", index=False)

Example results

In [11]:
cohen_df.head(10)

Unnamed: 0,task,order,best_nplet,best_score
0,min,2,"[2, 81]",-0.708376
1,max,2,"[14, 70]",0.743011
2,min,3,"[39, 72, 80]",-2.092881
3,max,3,"[8, 22, 29]",1.378799
4,min,4,"[26, 27, 29, 80]",-2.658715
5,max,4,"[16, 23, 27, 75]",1.16048
6,min,5,"[4, 27, 29, 35, 68]",-2.643903
7,max,5,"[1, 23, 34, 48, 80]",0.920489
8,min,6,"[7, 27, 33, 59, 69, 70]",-3.103645
9,max,6,"[9, 18, 34, 36, 44, 45]",1.031921
