# REST-ASMR: Statistical Analysis & Technical Validation

>  **PREREQUISITE:** This notebook provides the visual and statistical validation for the REST-ASMR dataset. Before running any cells below, ensure to execute the backend orchestrator by running `!python main.py`
> 
> The orchestrator processes the raw signals, extracts the deep learning features, and populates the `./data/features/` and `./data/` directories with the necessary artifacts that this notebook analyzes.

---

### Behavioral Inter-Subject Agreement (ISA)

**Methodology:**
To verify that the ASMR tingling sensations were stimulus-locked with a cross-participant consensus, a Leave-One-Out Inter-Subject Agreement (LOO-ISA) was calculated using Fisher's Z-transformed Pearson's $r$ and bootstrap resaampling to ascertain if the group agreement was significantly greater than zero.

This code outputs the 95% Confidence Intervals (CIs) and $p$-values reported in the manuscript.

In [None]:
import numpy as np
import glob
import os
import scipy.stats
import warnings

warnings.filterwarnings("ignore")

LABEL_FOLDER = "./data/features"

def calculate_bootstrapped_isa():
    
    print(" BOOTSTRAPPED BEHAVIORAL INTER-SUBJECT AGREEMENT (ISA) ")
   
    
    label_files = glob.glob(os.path.join(LABEL_FOLDER, "y_*.npy"))
    if not label_files:
        print("No label files found.")
        return

    all_vids = ["vid1", "vid2", "vid3", "vid4", "vid5", "vid6", "vid7", "vid8"]
    video_labels = {vid: [] for vid in all_vids}
    
    for f in label_files:
        filename = os.path.basename(f)
        vid = filename.replace(".npy", "").split("_")[2]
        video_labels[vid].append(np.load(f))
        
    for condition, vids in [("ASMR (Tingle)", ["vid1", "vid2", "vid3", "vid4"]), 
                            ("Nature (Pleasantness)", ["vid5", "vid6", "vid7", "vid8"])]:
        print(f"\n  {condition}   ")
        
        for vid in vids:
            arrays = video_labels[vid] 
            if len(arrays) < 2:
                print("Wrong")
                continue
            
                
            
            min_len = min(len(arr) for arr in arrays)
            stacked_arrays = np.vstack([arr[:min_len] for arr in arrays])
            
            num_subjects = stacked_arrays.shape[0]
            subject_z_scores = []
            
           
            for i in range(num_subjects):
                target_sub = stacked_arrays[i]
                other_subs_idx = [j for j in range(num_subjects) if j != i]
                mean_others = np.mean(stacked_arrays[other_subs_idx], axis=0)
                
                if np.std(target_sub) > 0 and np.std(mean_others) > 0:
                    r, _ = scipy.stats.pearsonr(target_sub, mean_others)
                    if not np.isnan(r):
                        subject_z_scores.append(np.arctanh(r))
                        
            if not subject_z_scores:
                print(f"  > {vid.upper()}: Insufficient variance for ISA.")
                continue

            
            np.random.seed(42) 
            n_iterations = 10000
            z_array = np.array(subject_z_scores)
            
            
            bootstrap_means = [np.mean(np.random.choice(z_array, size=len(z_array), replace=True)) 
                               for _ in range(n_iterations)]
            
            
            mean_z = np.mean(bootstrap_means)
            mean_r = np.tanh(mean_z)
            
            
            ci_lower_z, ci_upper_z = np.percentile(bootstrap_means, [2.5, 97.5])
            ci_lower_r = np.tanh(ci_lower_z)
            ci_upper_r = np.tanh(ci_upper_z)
            
         
            p_val = np.sum(np.array(bootstrap_means) <= 0) / n_iterations
            
            
            sig_star = "***" if p_val < 0.001 else "**" if p_val < 0.01 else "*" if p_val < 0.05 else "ns"
            
            print(f"  > {vid.upper()} Mean ISA (r) : {mean_r:.4f} {sig_star}")
            print(f"      95% CI       : [{ci_lower_r:.4f}, {ci_upper_r:.4f}]")
            print(f"      p-value      : {p_val:.4f} (Bootstrapped)")



if __name__ == "__main__":
    calculate_bootstrapped_isa()

### Physiological Signal Integrity & Cardiovascular Response


**Methodology:**
This section validates the biological efficacy of the stimuli by assessing the subject-wise parasympathetic cardiovascular deceleration associated with the ASMR state. It utilizes a 5-s sliding window approach with Spearman rank correlation and paired-samples t-test to stastically quantify the difference in ASMR vs. Nature induced relaxation.

This code outputs the statistical significance ($p$-value) and generates the manuscript **Figure**.

In [None]:
import numpy as np
import pandas as pd
import glob
import os
import scipy.stats
import scipy.signal
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

RAW_PPG_FOLDER = "./data/ppg"
LABEL_FOLDER = "./data/features" 
LOG_FOLDER = "./data/log"
WINDOW_SEC = 5.0   
STEP_SEC = 1.0     

def load_raw_ppg(filepath):
   
    encodings = ['shift_jis', 'utf-8', 'cp1252']
    for enc in encodings:
        try:
            df = pd.read_csv(filepath, sep='\t', skiprows=8, encoding=enc, on_bad_lines='skip')
            if df.shape[1] >= 3:
                df = df.iloc[:, [0, 2]] 
                df.columns = ['Time_Min', 'Val']
                df = df[pd.to_numeric(df['Time_Min'], errors='coerce').notnull()]
                df['Time_Sec'] = df['Time_Min'].astype(float) * 60.0
                return df
            else:
                print("Wrong shape")
        except:
            print("Wrong!")
            continue
    return None

def get_window_features(chunk, fs=2000.0):
   
  
    sos = scipy.signal.butter(2, [0.5, 4.0], btype='bandpass', fs=fs, output='sos')
    filtered = scipy.signal.sosfiltfilt(sos, chunk)
    
   
    peaks, _ = scipy.signal.find_peaks(filtered, distance=int(fs*0.35), prominence=0.5)
  
    
    if len(peaks) < 4: return np.nan, np.nan 
    
    
    rr_intervals = np.diff(peaks) / fs * 1000.0   
    
    
    valid_rr = rr_intervals[(rr_intervals > 300) & (rr_intervals < 1300)]
    
    if len(valid_rr) < 3: return np.nan, np.nan
    
    hr = 60000.0 / np.mean(valid_rr)  
    rmssd = np.sqrt(np.mean(np.diff(valid_rr)**2))
    
    return hr, rmssd

def compare_asmr_nature_physiology():
    print(" STARTING SLIDING WINDOW COMPARISON (ASMR vs NATURE) ")
    
   
    timeline_map = {} 
    log_files = glob.glob(os.path.join(LOG_FOLDER, "*.log"))
    for log_f in log_files:
        try:
            sub_id = int(os.path.basename(log_f).split('-')[0])
            with open(log_f, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines()
            header_idx = next(i for i, l in enumerate(lines) if l.startswith("Subject"))
            ldf = pd.read_csv(log_f, sep='\t', skiprows=header_idx)
            vids = ldf[ldf['Event Type'] == 'Video'].reset_index(drop=True)
            for i, row in vids.iterrows():
                dur = 55.7 if (i==0) else 60.0
                timeline_map[(sub_id, row['Code'])] = (row['Time']/10000.0, dur)
        except: 
            print("Wrong!!")
            continue

    results = []
    subjects = sorted(list(set([k[0] for k in timeline_map.keys()])))
    
    for sub in tqdm(subjects, desc="Analyzing Subjects"):
        raw_path = os.path.join(RAW_PPG_FOLDER, f"{str(sub).zfill(3)}_csv.txt")
        if not os.path.exists(raw_path): 
            print("WRONG")
            continue
        
        df_ppg = load_raw_ppg(raw_path)
        if df_ppg is None: 
            print("WRONG")
            continue
        
        
        data_buckets = {
            'ASMR': {'hr': [], 'hrv': [], 'rating': []},
            'Nature': {'hr': [], 'hrv': [], 'rating': []}
        }
        
        all_videos = ["vid1", "vid2", "vid3", "vid4", "vid5", "vid6", "vid7", "vid8"]
        
        for vid in all_videos:
            if (sub, vid) not in timeline_map: continue
            
           
            category = 'Nature' if vid in ["vid5", "vid6", "vid7", "vid8"] else 'ASMR'
            
            start_sec, dur_sec = timeline_map[(sub, vid)]
            
           
            y_path = os.path.join(LABEL_FOLDER, f"y_{sub}_{vid}.npy")
            if not os.path.exists(y_path): continue
            y_data = np.load(y_path)
            
            
            curr_time = start_sec + 5.0
            end_time = start_sec + dur_sec
            
            while curr_time + WINDOW_SEC < end_time:
                
                mask = (df_ppg['Time_Sec'] >= curr_time) & (df_ppg['Time_Sec'] < curr_time + WINDOW_SEC)
                chunk = df_ppg.loc[mask, 'Val'].values
                
                
              
                rel_start = curr_time - (start_sec + 5.0)
                idx_start = int(rel_start * 10)    #as labels are in 10 hz and rel_start in s
                idx_end = int((rel_start + WINDOW_SEC) * 10)
                
                if idx_end <= len(y_data) and len(chunk) > 1000:
                    hr, hrv = get_window_features(chunk)
                    avg_rating = np.mean(y_data[idx_start:idx_end])
                    
                    if not np.isnan(hr) and not np.isnan(hrv):
                        data_buckets[category]['hr'].append(hr)
                        data_buckets[category]['hrv'].append(hrv)
                        data_buckets[category]['rating'].append(avg_rating)
                
                curr_time += STEP_SEC

       
        for cat in ['ASMR', 'Nature']:
            hrs = data_buckets[cat]['hr']
            hrvs = data_buckets[cat]['hrv']
            rats = data_buckets[cat]['rating']
            
            if len(hrs) > 10 and np.std(rats) > 0:
                r_hr, p_hr = scipy.stats.spearmanr(hrs, rats)
                r_hrv, p_hrv = scipy.stats.spearmanr(hrvs, rats)
                
                results.append({
                    'Subject': sub,
                    'Condition': cat,
                    'r_HR': r_hr,
                    'r_HRV': r_hrv,
                    'p_HR': p_hr
                })

    
    df_res = pd.DataFrame(results)
    
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
   
    sns.boxplot(x='Condition', y='r_HR', data=df_res, ax=ax, palette="Set2")
    sns.stripplot(x='Condition', y='r_HR', data=df_res, color='black', alpha=0.5, ax=ax)
    
    
    ax.set_title("Correlation: HR vs. Rating (ASMR vs Nature)", fontweight='bold')
    ax.set_ylabel("Spearman Correlation (r)")
    ax.axhline(0, color='red', linestyle='--')
    
   
    plt.tight_layout()
    #plt.savefig("asmr_hr_stat_test.svg")
    plt.show()
    
    asmr_hr = df_res[df_res['Condition']=='ASMR'].set_index('Subject')['r_HR']
    nat_hr = df_res[df_res['Condition']=='Nature'].set_index('Subject')['r_HR']
    
   
    common = pd.concat([asmr_hr, nat_hr], axis=1, join='inner')
    common.columns = ['ASMR', 'Nature']
    
    t_stat, p_val = scipy.stats.ttest_rel(common['ASMR'], common['Nature'])
    
    print("\n STATISTICAL COMPARISON (Paired T-Test) ")
    print(f"Mean HR Correlation (ASMR):   {common['ASMR'].mean():.4f}")
    print(f"Mean HR Correlation (Nature): {common['Nature'].mean():.4f}")
    print(f"Difference P-Value: {p_val:.4f}, t-stat: {t_stat} ")
    asmr_z = np.arctanh(common['ASMR'])
    nat_z = np.arctanh(common['Nature'])
    
   
    t_stat2, p_val2 = scipy.stats.ttest_rel(asmr_z, nat_z)
    print(f"Corrected Difference P-Value: {p_val2:.4f}, t-stat: {t_stat2} ")
    if p_val < 0.05:
        print("SIGNIFICANT DIFFERENCE found between physiological response to ASMR vs Nature.")
    else:
        print("No significant global difference in correlation magnitude.")

if __name__ == "__main__":
    compare_asmr_nature_physiology()

### Group-Level Temporal Agreement

**Visualization Methodology:**
This visualizes the temporal consensus of the ASMR tingling sensation across all active participants for the two highest-performing stimuli (Video 2 and Video 3). The figure displays **Mean Signal** with a solid line and the **Standard Error of Mean** using the shaded region.


This code outputs the manuscript Figure.

In [None]:
import numpy as np
import glob
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings("ignore")

LABEL_FOLDER = "./data/features"

def get_mean_timeseries(vid_target):
    label_files = glob.glob(os.path.join(LABEL_FOLDER, f"y_*_{vid_target}.npy"))
    if not label_files:
        return None, None, None
        
    subject_arrays = []
    for f in label_files:
        y_data = np.load(f)
        if np.max(y_data) > 0: 
            subject_arrays.append(y_data)

    if not subject_arrays: return None, None, None

    min_len = min(len(arr) for arr in subject_arrays)
    stacked_data = np.vstack([arr[:min_len] for arr in subject_arrays])
    
    mean_signal = np.mean(stacked_data, axis=0)
    sem_signal = np.std(stacked_data, axis=0) / np.sqrt(stacked_data.shape[0])
    time_axis = np.arange(min_len) / 10.0 
    
    return time_axis, mean_signal, sem_signal

def plot_dual_asmr_timeseries():
    
    
    t_vid2, mean_vid2, sem_vid2 = get_mean_timeseries("vid2")
    t_vid3, mean_vid3, sem_vid3 = get_mean_timeseries("vid3")
    
    if t_vid2 is None or t_vid3 is None:
        print("Data missing")
        return

    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharey=True)
    sns.set_theme(style="whitegrid")
    
    
    axes[0].plot(t_vid2, mean_vid2, color='firebrick', linewidth=2.5, label='Mean Intensity')
    axes[0].fill_between(t_vid2, np.maximum(0, mean_vid2 - sem_vid2), mean_vid2 + sem_vid2, color='lightcoral', alpha=0.4, label='Standard Error')
    axes[0].set_title("A. Group-Level Temporal Agreement: Video 2", fontweight='bold')
    axes[0].set_ylabel("Mean Subjective Rating (0-3)", fontweight='bold')
    axes[0].set_xlabel("Trial Time (Seconds)", fontweight='bold')
    axes[0].set_ylim(0, 3.0)
    axes[0].legend(loc="upper left")

    
    axes[1].plot(t_vid3, mean_vid3, color='darkred', linewidth=2.5, label='Mean Intensity')
    axes[1].fill_between(t_vid3, np.maximum(0, mean_vid3 - sem_vid3), mean_vid3 + sem_vid3, color='indianred', alpha=0.4, label='Standard Error')
    axes[1].set_title("B. Group-Level Temporal Agreement: Video 3", fontweight='bold')
    axes[1].set_xlabel("Trial Time (Seconds)", fontweight='bold')
    axes[1].legend(loc="upper left")

    plt.tight_layout()
    #plt.savefig("Figure1_DualASMR_TimeSeries.svg", dpi=300)
    plt.show()

if __name__ == "__main__":
    plot_dual_asmr_timeseries()