In [2]:
import scipy.io
import numpy as np
import pandas as pd

# Function to extract data and compute correlations
def process_scatters(file_path, scatter_type):
    # Load .mat file
    mat_contents = scipy.io.loadmat(file_path)
    
    # Extract structure
    scatters = mat_contents.get(scatter_type)
    
    if scatters is not None and isinstance(scatters, np.ndarray):
        scatters = scatters[0, 0]  # Unpack MATLAB struct
        
        x = scatters['x'].flatten()  # Extract x values (20,)
        ys = scatters['ys']  # Shape (10,10,20) -> [corr_levels, experiments, points]
        target_corr_values = scatters['target_corr_values'].flatten()  # (10,)
        corr_values = scatters['corr_values']  # (10,10)

        # Store results
        data_list = []
        for c in range(len(target_corr_values)):  # Loop over correlation levels
            for exp in range(ys.shape[1]):  # Loop over experiments
                y_values = ys[c, exp, :].flatten()  # Extract y values for (c, exp)
                computed_corr = np.corrcoef(x, y_values)[0, 1]  # Compute correlation

                # Append data
                data_list.append({
                    "scatter_type": scatter_type[:3],
                    "corr_target": target_corr_values[c],
                    "corr_computed": computed_corr,
                    "exp": exp,
                    "corr": c
                })

        # Convert to DataFrame
        df = pd.DataFrame(data_list)
        return df
    else:
        return None

# Process both files
pos_df = process_scatters("pos_scatters.mat", "pos_scatters")
neg_df = process_scatters("neg_scatters.mat", "neg_scatters")

# Combine data from both files
combined_df = pd.concat([pos_df, neg_df], ignore_index=True)

# Save to CSV
combined_df.to_csv("scatters.csv", index=False)