In [1]:
dataPath = './data/'

In [4]:
import os
import numpy as np
from scipy.io.wavfile import write
from scipy.interpolate import interp1d
import pandas as pd

In [5]:

def export_columns_to_wav(df,
                          columns,
                          duration_seconds=10,
                          sample_rate=100,
                          output_dir="output_wavs"):
    """
    Export selected DataFrame columns to WAV files for use in modulation.

    Args:
        df (pd.DataFrame): Must contain 'Age' column and other numeric values.
        columns (list): Column names to export as WAV.
        duration_seconds (int): Total WAV duration in seconds.
        sample_rate (int): Sampling rate in Hz.
        output_dir (str): Directory to save WAV files.
    """
    os.makedirs(output_dir, exist_ok=True)

    if 'Age' not in df.columns:
        raise ValueError("DataFrame must contain an 'Age' column.")

    df = df.sort_values('Age')

    num_samples = duration_seconds * sample_rate
    # Reverse time: from max Age (earliest) to min Age (latest, e.g. 0)
    time_axis = np.linspace(df['Age'].max(), df['Age'].min(), num=num_samples)

    for col in columns:
        if col not in df.columns:
            print(f"Skipping missing column: {col}")
            continue

        series = df[['Age', col]].dropna()
        interp_fn = interp1d(series['Age'], series[col], kind='linear', bounds_error=False, fill_value="extrapolate")
        resampled = interp_fn(time_axis)

        # Normalize to [-1, 1]
        resampled -= np.mean(resampled)
        max_val = np.max(np.abs(resampled))
        if max_val > 0:
            resampled /= max_val

        # Save as float32 WAV
        audio_data = np.clip(resampled, -1.0, 1.0).astype(np.float32)
        output_path = os.path.join(output_dir, f"{col.replace('/', '_')}.wav")
        write(output_path, sample_rate, audio_data)
        print(f"Saved: {output_path}")


In [7]:
df = pd.read_csv(dataPath + 'filtered_data3.csv')
df

Unnamed: 0,Age,BIO_ExtinctionIntensity (%),BIO_OriginationIntensity(%),BIO_Difference_Cubic,SEA_Modern land sea level (C = 176.6 106km2/km),TEM_GAT,TEM_dT,CO2_pCO2 (ppm),O2_Mid O2%,O2_Range_Normalized,...,SR_Range_Normalized,LIP_LIP_PDF,MAG_INT_mean,MAG_INT_std,MAG_POL_FREQUENCY,MAG_POL_FREQUENCY.3,ZIR_Count_PDF_d18O,ZIR_Interpolated_mean_d18O,ZIR_Count_PDF_Hf,ZIR_Interpolated_mean_Hf
0,252.00,1.000000,0.000000,0.506887,0.263456,0.805210,0.032628,1.000000,1.000000,0.930485,...,0.969434,0.600,0.008044,0.000039,0.090909,0.1000,0.017316,0.499154,0.752941,0.660253
1,251.95,1.000000,0.000000,0.506988,0.263456,0.805657,0.032042,1.000000,0.998872,0.931869,...,0.934624,0.610,0.008299,0.000033,0.095455,0.1020,0.017316,0.499154,0.694118,0.670752
2,251.90,1.000000,0.000000,0.507089,0.263456,0.806103,0.031457,1.000000,0.997744,0.933254,...,0.899814,0.620,0.008553,0.000026,0.100000,0.1040,0.017316,0.499154,0.694118,0.670752
3,251.85,1.000000,0.000000,0.507190,0.263456,0.806549,0.030872,1.000000,0.996615,0.934638,...,0.868058,0.630,0.008828,0.000025,0.104545,0.1060,0.017316,0.499154,0.694118,0.670752
4,251.80,1.000000,0.000000,0.507291,0.263456,0.806995,0.030287,1.000000,0.995487,0.936023,...,0.836303,0.640,0.009102,0.000025,0.109091,0.1080,0.017316,0.499154,0.694118,0.670752
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5036,0.20,0.002184,0.141072,0.912523,0.012354,0.022142,0.963248,0.058102,0.003722,0.152117,...,0.040715,0.016,0.586923,0.117646,0.154545,0.3720,0.333333,0.662915,0.082353,0.826446
5037,0.15,0.002184,0.141072,0.910671,0.012417,0.023525,0.960951,0.058095,0.002792,0.151117,...,0.044493,0.012,0.531968,0.073930,0.150000,0.3715,0.350649,0.653437,0.082353,0.826446
5038,0.10,0.002184,0.141072,0.908819,0.012480,0.024909,0.958654,0.058088,0.001861,0.150116,...,0.048271,0.008,0.477013,0.030214,0.145455,0.3710,0.372294,0.638354,0.082353,0.826446
5039,0.05,0.002184,0.141072,0.906966,0.012543,0.026293,0.956357,0.058081,0.000931,0.149115,...,0.040902,0.004,0.377562,0.030358,0.140909,0.3705,0.372294,0.638354,0.082353,0.826446


In [None]:
export_columns_to_wav(df,
                        ['BIO_ExtinctionIntensity (%)'],
                        duration_seconds=4844,
                        sample_rate=100,
                        output_dir='./data/')

TypeError: 'float' object cannot be interpreted as an integer