In [1]:
import os 
import re
import sys

import pandas as pd
import numpy as np

import parselmouth
from parselmouth.praat import call

In [2]:
wav_dir = '/Users/james/Desktop/SNL_vocal_analysis/Data/wav_files'
output_path= '/Users/james/Desktop/SNL_vocal_analysis/repo/pipeline_csv'

# Set parameters
f0min = 75 # 75 for males, 100 for females
f0max = 500 # 300 for males, 500 for females
time_step = .01

In [3]:
"""
Variables:
----------
wav_dir : str
    A path to the directory containing the audio files.
    
chopped_files : list
    A list of tuples containing the ID and file path of each chopped file found in `wav_dir`.
    
full_wav_files : list
    A list of tuples containing the ID, disclosure, and file path of each full wav file found in `wav_dir`.
"""


# Differentiate the chopped files from the full files
chopped_files = []
full_wav_files = []

# Walk through the directory: wav_dir
for root, subdirs, files in os.walk(wav_dir):
    # Remove DS_Store file from directory (hidden file created by macOS, stores metadata on folder)
    if '.DS_Store' in files:
        ds_path = os.path.join(root, '.DS_Store')
        os.remove(ds_path)
        
    for file in files:
        # Get file_path and ID using index slicing
            file_path = os.path.join(root, file)
            regex = re.compile('/')
            ID = regex.split(file_path)[7][1:] 

            # select neutral data and store in chopped_files with ID
            if 'chopped' in file_path:
                chopped_files.append([ID, file_path])
        
            # select full wav files and store in full_wav_files with ID and disclosure
            elif 'chopped' not in file_path:
                # index slicing to pull disclosure from file_path 
                disclosure = file_path[-8:-4]
                full_wav_files.append([ID, disclosure, file_path])

In [4]:
"""
This function pads an array of intensity values to match the length of an array of pitch values.

Functions:
----------
padding_fun(pitch_values, intensity_values)
    Pads the intensity values array to match the length of the pitch values array.

Parameters:
----------
pitch_values : array_like
    An array of pitch values.
    
intensity_values : array_like
    An array of intensity values.

Returns:
----------
int_padded : array_like
    The intensity values array padded to match the length of the pitch values array.
"""

def padding_fun(pitch_values, intensity_values):
    # Get the difference between pitch and len_array (probably dont have to check for larger int_array)
    array_dif= len(pitch_values) - len(intensity_values)
           
    # Find the length of padding by floor division
    pad_len = array_dif//2
         
    # Check if array_dif is odd, append 0 to end of intensity array
    if array_dif % 2 == 1:
        intensity_values = np.concatenate((intensity_values, [0]))
            
    # Pad front and back of intensity array  
    int_padded = np.pad(intensity_values, pad_width=pad_len, mode= 'constant', constant_values=0)

    return int_padded

In [6]:
"""
This function computes the logarithm of non-zero pitch values of a given sound object, along with their indices and the indices of zero pitch values. 

Functions:
----------
get_neu_pitch(sound, timestep, f0min, f0max)
    Computes the logarithm of non-zero pitch values of a given sound object, along with their indices and the indices of zero pitch values.

Parameters:
----------
sound : parselmouth.Sound object
    The sound object to analyze.
    
timestep : float
    The time step to use for the pitch analysis.
    
f0min : float
    The minimum fundamental frequency to detect.
    
f0max : float
    The maximum fundamental frequency to detect.

Returns:
----------
pitch_model : array_like
    The logarithm of non-zero pitch values.
    
y0_pitch_ind : array_like
    An array of indices where the pitch value is zero.
    
pitch_values : array_like
    The pitch values of the sound object.
"""

def get_neu_pitch(sound, timestep, f0min, f0max):    
    pitch = sound.to_pitch(time_step, f0min, f0max)
    pitch_values = pitch.selected_array['frequency']
    
    # remove zero_pitches: use enumerate to get index of where pitch is zero
    
    #y0= Zero Pitch 
    #n0= Pitch W/ non-zero value

    # y0_pitch_ind is an array of indexs where pitch == 0, for the given sound object
    
    # these 2 lines should probably be just 1 loop fix later
    y0_pitch_ind = [ind for ind, x in enumerate(pitch_values) if x == 0]
    pitch_w_nans= [np.nan if val == 0 else val for val in pitch_values]

    n0_pitch = [x for x in pitch_w_nans if not np.isnan(x)]
    
    pitch_model = np.log(n0_pitch)
    
    return [pitch_model, y0_pitch_ind, pitch_values]


In [300]:
    """
    Returns the intensity model for a given neutral sound object using log values of non-zero intensity values.
    This function takes in a Sound object, f0min (minimum f0 value), time_step, y0_pitch_ind (list of indices where 
    pitch is zero) and pitch_values (list of pitch values for the given sound object).
    
    Parameters:
    sound (Sound): A Sound object representing the neutral speech signal.
    f0min (int): The minimum frequency value to detect the pitch in the sound.
    time_step (float): The time step to use for analyzing the sound.
    y0_pitch_ind (list): A list of indices where pitch is zero.
    pitch_values (list): A list of pitch values for the given sound object.
    
    Returns:
    int_model (ndarray): A numpy array containing log values of non-zero intensity values for the given sound object.
    
    """
def get_neu_intensity(sound, f0min, time_step, y0_pitch_ind, pitch_values):
    intensity= sound.to_intensity(f0min, time_step)
    intensity_values = intensity.values[0]

    # padding function to uses np.pad to pad zeros to intensity values. The goal is to have pitch and intensity values equal lengths so I can remove intensity values using pitch indexs
    padded_int_values = padding_fun(pitch_values, intensity_values)

    # convert 0_pitch_index to nan
    first_conversion = [np.nan if i in y0_pitch_ind else x for i,x in enumerate(padded_int_values)]
    # convert intensity 0 to nans
    nan_ints = [np.nan if value == 0 else value for value in first_conversion]
    
    
    n0_ints = [x for x in nan_ints if not np.isnan(x)]
    int_model = np.log(n0_ints)
    
    return int_model
#get_neu_intensity(sound, f0min, time_step, y0_pitch_ind, pitch_values)

In [301]:
"""
get_neu_hf500(sound, y0_pitch_ind, frame_duration=0.025, frame_shift=0.01)

Extracts the LTAS score from the high-frequency portion of the sound signal.

Parameters:
- sound (parselmouth.Sound): The sound signal to process.
- y0_pitch_ind (list): The list of indices where the pitch is 0 in the pitch array.
- frame_duration (float): The duration of the frames in seconds (default: 0.025).
- frame_shift (float): The time shift between adjacent frames in seconds (default: 0.01).

Returns:
- ltas_return (list): The LTAS scores for each window in the signal that contains nonzero pitch.
"""

def get_neu_hf500(sound, y0_pitch_ind, frame_duration=.025, frame_shift=.01):
    # Get the sample rate and the signal data using NumPy

    sample_rate = sound.sampling_frequency
    signal = np.array(sound.values[0])

    # 25ms window length
    # 10ms step
    frame_len = int(sample_rate * frame_duration)
    frame_step = int(sample_rate* frame_shift)

    # buffer the audio signal a la Bone
    frames = []
    for i in range(0, len(signal), frame_step):
        if i + frame_len < len(signal):
            frame = signal[i:i+frame_len]
            frames.append(frame)

    # window is 100 buffers or 1 second of aggregated buffers
    window_sz = 100
    windows = []

    # create 1s windows with a step of 10ms (one new buffer)
    for i in range(0, len(frames)):
        if i < (len(frames) + window_sz):
            windows.append(frames[i:i+window_sz])

    #windows is an array of frames extacted from sound object
    
    # convert y0_pitch to nans
    n0_hf500 = [np.nan if i in y0_pitch_ind else x for i,x in enumerate(windows)]

    # I am unsure about the indexing being done in the matlab code... chatGPT says everyting is working as intended but who knows

    ltas_scores = []
    # chatgpt_converted time domain averaging methodology
    for window in n0_hf500:
        if np.isnan(window).all() == 0:
            # kept some of the bone variables to follow along prob will change them
            linfbe = np.abs(np.fft.fft(window))
            nFFT = linfbe.shape[0]
            n500 = int(np.ceil(nFFT/sample_rate*500) + 1)
            linfbe = [np.mean(x) for x in linfbe]
            n80 = int(np.ceil(nFFT/sample_rate*80))


            hi = np.sum(linfbe[n500+1:int(np.ceil(nFFT/2))+1])
            low = np.sum(linfbe[n80:n500])
            
            sys.stderr = open(os.devnull, 'w')
            try:
                ratio = np.log(hi/low)
                ltas_scores.append(ratio)
            except ZeroDivisionError:
                pass
                sys.stderr = sys.__stderr__
    ltas_return = [x for x in ltas_scores if x != -np.inf]

    
            # if low == 0:
            #         ratio = np.nan    # divide by zero
            # else:
            #     ratio = np.log(hi/low)
    return ltas_return

In [302]:
###########################################
## old neu_500 function
# def get_neu_hf500(sound, y0_pitch_ind, frame_duration=.025, frame_shift=.010):
   
    ## part 1 break sound object into 25ms windows
    frames = []
    
    start = 0
    while start < sound.xmax-frame_duration:
        end = start + frame_duration
        frame = sound.extract_part(
            # start, end, windowing function
            from_time=round(start,3), 
            to_time=round(end,3),
            window_shape=parselmouth.WindowShape.HANNING
            )

        # appending start and stop time? why make a fame countner
        frames.append([round(start+.01, 3), frame])
        start += frame_shift

    # standard overlap is 50% of window size BONE uses 60% (15ms)
    # during ltas extraction, acoustic is buffered into 25ms with 15ms overlap
    # then the time is converted to 1 second
    
    # ltas is returned on a 1 second timescale? pitch_time * 100
    # ele_in_window = int(samplerate * window_size)
    # num_ele_overlap= int(samplerate*overlap)
    
    # # BONE buffers the wav data into 25ms window with 15ms overlap (matlab adds zeros if no overlap values available)
    # # input my buffer function:
    # buffer(sound, ele_in_window, num_ele_overlap)
    
    
    # convert y0_pitch_inds to miliseconds to remove y0_pitch_values
    # python indexing starts at zero
    y0_ms = [round(x/100+.01, 3) for x in y0_pitch_ind]

    n0_frames = []
    for frame in frames:
        # only convert n0_pitches
        if frame[0] not in y0_ms:
            
            spect = frame[1].to_spectrum()
            # get Nyquist frequency
            top = call(spect, "Get highest frequency")
            
            # get above/below 500 band energy

            lo= call(spect, "Get band energy", 80, 500)
            
            hi = call(spect, "Get band energy", 500, top)
            
            # Find the ratio
            if lo == 0:
                ratio = np.nan    # divide by zero
            else:
                ratio = hi/lo
            n0_frames.append(ratio)

    # only use n0 values for neutral model
    n0_ltas= [x for x in n0_frames if not np.isnan(x)]
    ltas_return = np.log(n0_ltas)
    
    return(ltas_return)

#get_neu_hf500(sound, y0_pitch_ind, frame_duration=.025, frame_shift=.010)


IndentationError: unexpected indent (2400475302.py, line 6)

In [None]:
# This creates the neutral model df by taking in each feature stream and making the length equal to the longest feature stream ()

    """
    Creates a pandas DataFrame from the given ID, pitch, intensity, and hf500 arrays. 

    Args:
    ID (str): the identifier string
    pitch (array-like): array containing pitch values
    intensity (array-like): array containing intensity values
    hf500 (array-like): array containing hf500 values

    Returns:
    pandas.DataFrame: a DataFrame containing the input data with NaN values appended 
    to match the length of the longest array.
    """


def create_neu_df(ID, pitch, intensity, hf500):
    length = max(len(pitch), len(intensity), len(hf500))
    pitch = np.concatenate((pitch, [np.nan] * (length - len(pitch))))
    intensity = np.concatenate((intensity, [np.nan] * (length - len(intensity))))
    hf500 = np.concatenate((hf500, [np.nan] * (length - len(hf500))))
    df = pd.DataFrame({
        'ID': [ID] * length,
        'pitch': pitch,
        'intensity': intensity,
        'hf500': hf500
    })
    return df


In [None]:
chopped_data = []
for item in chopped_files:
    # open wav file from saved file_path and create a sound object
    sound = parselmouth.Sound(item[1])
    
    # get pitch information: [med_log_pitch, ind of non_zero pitch, pitch_values_with0]
    pitch_results = get_neu_pitch(sound, time_step, f0min, f0max)
    # extra line but makes it easier to read :)
    pitch_model, y0_pitch_ind, pitch_values = pitch_results
    
    ## get median_log_intensity
    int_model = get_neu_intensity(sound, f0min, time_step, y0_pitch_ind, pitch_values)
    
    hf500_model = get_neu_hf500(sound, y0_pitch_ind, frame_duration=.025, frame_shift=.010)
    
    chopped_data.append([item[0], pitch_model, int_model, hf500_model])

neu_model_list = []
for ele in chopped_data:
    participant_neu_df = create_neu_df(ele[0], ele[1], ele[2], ele[3])
    neu_model_list.append(participant_neu_df)
neu_model_df= pd.concat(neu_model_list)

neu_model_df.head()

Unnamed: 0,ID,pitch,intensity,hf500
0,136,4.774348,3.879275,2.895045
1,136,4.803909,3.891917,2.793474
2,136,4.8095,3.921083,2.569832
3,136,4.797723,3.90574,2.664221
4,136,4.771462,3.883262,2.90227


In [None]:
print(len(neu_model_df))

52859


In [None]:
neu_model_df.tail(10)

Unnamed: 0,ID,pitch,intensity,hf500
300,172,5.408676,4.30964,1.078176
301,172,5.407422,4.300821,0.867584
302,172,5.405218,4.288087,0.804154
303,172,5.404681,4.274145,0.542225
304,172,5.403655,4.263244,0.383101
305,172,5.402449,,-0.06247
306,172,5.400044,,-0.285312
307,172,5.397343,,
308,172,5.3935,,
309,172,5.389975,,


In [42]:
def mat_buffer(x, n, p):
    """Emulate the buffer function in Python.
    x: input vector
    n: frame size
    p: overlap/underlap amount
    """
    # Create an empty matrix to store the frames
    buffers = []
    for i in range(0, len(x)-n+1, n-p):
        frame = x[i:i+n]
        buffers.append(frame)
        
    return buffers

In [43]:
"""
    Given a sound object, time_step, f0min and f0max parameters, returns the median log pitch of each 1 second 
    window (with a step of 10ms) for the sound object, along with the index of the pitch buffers that have only NaN values, 
    pitch_values and corresponding time values.
    
    Parameters:
    sound (parselmouth.Sound): A sound object
    time_step (float): The step size for pitch analysis in seconds.
    f0min (int): Minimum F0 to be analyzed in Hz.
    f0max (int): Maximum F0 to be analyzed in Hz.
    
    Returns:
    list: Returns a list containing median log pitch of each 1 second window for the sound object.
    list: Returns a list containing the index of the pitch buffers that have only NaN values.
    list: Returns a list containing all the pitch values for the sound object.
    list: Returns a list containing the corresponding time values for the pitch values.
    """
def get_pitch(sound, time_step, f0min, f0max):
    
    # get pitch_values from sound object
    pitch = sound.to_pitch(time_step, f0min, f0max)
    pitch_values = pitch.selected_array['frequency']

    pitch_w_nans= [np.nan if x == 0 else x for x in pitch_values]
    #y0_pitch_inds = [i for i, x in enumerate(pitch_values) if x == 0]
    # Following Bone, I converted zeros to NaN and now I am aggregating pitch_values into 1 second windows step 10ms and finding the log median of each
    
    # create buffers (one second windows of pitch_values)
  
    # smoothing feature stream (loses 1 second of data?)
    pitch_buffers = mat_buffer(pitch_w_nans, 100, 99)
    
    med_pitch_logs = []
    p_time = []
    y0_pbuffs = []
    for index, buff in enumerate(pitch_buffers):
        if np.isnan(buff).all() == 1:
            y0_pbuffs.append(index)
        
        med_pitch_logs.append(np.log(np.nanmedian(buff)))
        p_time.append(round(index/100 + .01, 3))
        
        
            
            # return this instead of y0_pitch_inds?
        
    return [med_pitch_logs, y0_pbuffs, pitch_values, p_time]

In [44]:
    """
    This function computes the intensity values of a given sound object and pads the intensity values to equal the pitch values. It then removes the unvoiced frames using y0_pbuffs (which is only done in neutral modeling) and calculates the median log intensity for each second.

    Parameters:
    sound (Sound): A parselmouth Sound object
    time_step (float): The time step used to calculate pitch values
    f0min (int): The minimum pitch frequency
    y0_pbuffs (list): A list of indexes of pitch buffers that contain only zeros
    pitch_values (list): A list of pitch values

    Returns:
    list: A list containing two lists. The first list contains the median log intensity for each second of the sound. The second list contains the padded intensity values.

    """

def get_intensity(sound, time_step, f0min, y0_pbuffs, pitch_values):
 # get intensity_values
    intensity = sound.to_intensity(f0min, time_step)
    intensity_values = intensity.values[0]
    
    # pad intensities to equal pitch_values
    padded_int_values = padding_fun(pitch_values, intensity_values)
    
    # remove unvoiced frames using y0_pitch [this is only done in neutral modeling]
    # I need to remove y0_pitch_buffers for full data
    ints_w_nans = [np.nan if x== 0 else x for x in padded_int_values]
    
    int_buffers = mat_buffer(ints_w_nans, 100, 99)
  
    med_int_log = []
    
    for buff in int_buffers:
        med_int_log.append(np.log(np.nanmedian(buff)))
    
    final_ints = [np.nan if i in y0_pbuffs else value for i, value in enumerate(med_int_log)]
    
    # returning padded_int_values in case additional analysis is wanted
    return [final_ints, padded_int_values]

In [53]:
    """
    Computes the HF/LF ratio for a given sound object.

    Args:
        sound (Sound): The sound object containing the audio signal.
        y0_pbuffs (list): The list of indices where the pitch values are 0.
        med_pitch_logs (list): The list of the log median pitch values for each 1s window.
        frame_duration (float): The duration of each frame (in seconds).
        frame_shift (float): The amount of time to shift each frame by (in seconds).

    Returns:
        list: The list of the HF/LF ratio for each frame in the sound object.
    """

## my iteration of bone time series
def get_hf500(sound, y0_pbuffs, med_pitch_logs, frame_duration= .025, frame_shift= .01):
    # writing a loop to append frames to 1s windows
    # Get the sample rate and the signal data using NumPy

    sample_rate = sound.sampling_frequency
    signal = np.array(sound.values[0])

    # 25ms window length
    # 10ms step
    frame_len = int(sample_rate * frame_duration)
    frame_step = int(sample_rate* frame_shift)

    # buffer the audio signal a la Bone
    frames = []
    for i in range(0, len(signal), frame_step):
        if i + frame_len < len(signal):
            frame = signal[i:i+frame_len]
            frames.append(frame)

    # window is 100 buffers or 1 second of aggregated buffers
    window_sz = 100
    windows = []
    
    # create 1s windows with a step of 10ms (one new buffer)
    for i in range(0, len(frames)):
        if i < (len(frames) + window_sz):
            windows.append(frames[i:i+window_sz])

    # lining up to pitch values; caused by different smoothing
    
    smoothing_dif = len(windows) - len(med_pitch_logs)
    windows= windows[:-smoothing_dif]
    
    


    #windows is an array of frames extacted from sound object
    

    # convert y0_pitch to nans
    n0_hf500 = [np.nan if i in y0_pbuffs else x for i,x in enumerate(windows)]

    # I am unsure about the indexing being done in the matlab code... chatGPT says everyting is working as intended but who knows

    ltas_scores = []
    # chatgpt_converted time domain averaging methodology
    for window in n0_hf500:
        if np.isnan(window).all() == 0:
            # kept some of the bone variables to follow along 
            linfbe = np.abs(np.fft.fft(window))
            nFFT = linfbe.shape[0]
            n500 = int(np.ceil(nFFT/sample_rate*500) + 1)
            linfbe = [np.mean(x) for x in linfbe]
            n80 = int(np.ceil(nFFT/sample_rate*80))


            hi = np.sum(linfbe[n500+1:int(np.ceil(nFFT/2))+1])
            low = np.sum(linfbe[n80:n500])
            
            # nobody wants to see a div_by_zero printed
            sys.stderr = open(os.devnull, 'w')
            try:
                ratio = np.log(hi/low)
                
            except ZeroDivisionError:
                ratio= np.nan
            sys.stderr = sys.__stderr__
                
            ltas_scores.append(ratio)
            
        else: 
            ltas_scores.append(np.nan)
    return ltas_scores


In [39]:
# ###########################################
# ## this is the old way I did it 
# # get_hf500

# def get_hf500(sound, y0_pitch_ind, frame_duration=.025, frame_shift=.01):
   
#     ## part 1 break sound object into 25ms windows
#     frames = []
    
#     start = 0
#     while start < sound.xmax-frame_duration:
        
#         # rounding for floating point addition
#         start = round(start, 3)
#         end = round(start + frame_duration, 3)
        
       
#         frame = sound.extract_part(
#             # start, end, windowing function
#             from_time= start, 
#             to_time= end,
#             window_shape=parselmouth.WindowShape.HANNING
#             )
        
#         frames.append([round((start+.01), 3), frame])
#         start += frame_shift
        
        
        
#     # standard overlap is 50% of window size BONE uses 60% (15ms)
#     # during ltas extraction, acoustic is buffered into 25ms with 15ms overlap
#     # then the time is converted to 1 second
    
#     # ltas is returned on a 1 second timescale? pitch_time * 100
#     # ele_in_window = int(samplerate * window_size)
#     # num_ele_overlap= int(samplerate*overlap)
    
#     # # BONE buffers the wav data into 25ms window with 15ms overlap (matlab adds zeros if no overlap values available)
#     # # input my buffer function:
#     # buffer(sound, ele_in_window, num_ele_overlap)
    
#     y0_ms = [round(x/100+.01, 3) for x in y0_pitch_ind]

#     #ltas_time = []
#     ltas_scores = []
    
#     # #convert sound to spectrum object
#     for frame in frames:
#         #ltas_time.append(frame[0])
#         if frame[0] not in y0_ms:
#             spect = frame[1].to_spectrum()

#             # get Nyquist frequency
#             top = call(spect, "Get highest frequency")
            
#             # get above/below 500 band energy

#             lo= call(spect, "Get band energy", 80, 500)
           
#             hi = call(spect, "Get band energy", 500, top)
            
#             # Find the ratio
#             if lo == 0:
#                 ratio = np.nan    # divide by zero
#             else:
#                 ratio = hi/lo
            
#             ltas_scores.append(ratio)
#         else:
#             ltas_scores.append(np.nan)
#     return(ltas_scores)
#     #return([ltas_time, ltas_score])


In [40]:
"""
The create_df function creates a Pandas DataFrame with columns for ID, disclosure, time, pitch, intensity, and hf500. The input parameters are as follows:

ID: a string representing the ID of the subject or participant
disclosure: a string representing the disclosure condition (e.g., neutral, negative, positive)
time: a list of time values for each data point in the DataFrame
pitch: a list of pitch values for each data point in the DataFrame
intensity: a list of intensity values for each data point in the DataFrame
hf500: a list of ratio of high frequency energy to low frequency energy values for each data point in the DataFrame
"""

def create_df(ID, disclosure, time, pitch, intensity, hf500):
    length = len(pitch)
    
    df = pd.DataFrame({
        'ID': [ID] * length,
        'disclosure' : [disclosure] * length,
        'time' : time,
        'pitch': pitch,
        'intensity': intensity,
        'hf500': hf500
    })
    return df

In [73]:
## have each file return a time value, merge dataframes on time

files_completed=0
## should probably change the structure, feature stream values should be returned as dataframes so i can easily merge everything on time
wav_data = []

for info_lst in full_wav_files:
    
    # # locate the neutral data
    # part_neu_data = neu_model_df.loc[neu_model_df['ID'] == info_lst[0]]
    # part_neu_pitch = part_neu_data.loc[:,'pitch'].values
    # part_neu_int = part_neu_data.loc[:,'intensity'].values
    # part_neu_hf500 = part_neu_data.loc[:,'hf500'].values
    # open wav file and create a sound object
    sound = parselmouth.Sound(info_lst[2])
    
    # get pitch information: [med_log_pitch, ind of non_zero pitch, pitch_values_with0]
    pitch_results = get_pitch(sound, time_step, f0min, f0max)
    med_pitch_logs, y0_pbuffs, pitch_values, time_pitch = pitch_results

    # score each med_pitch value against the neu model

    neu_pitch = [x for x in med_pitch_logs if not np.isnan(x)]
    
    # pitch_scores = []
    # for p_item in med_pitch_logs:
    #     if np.isnan(p_item) == 0:
    #         pitch_scores.append(np.mean([1 if p_item > x else -1 for x in neu_pitch]))
            
    #     else:
    #         pitch_scores.append(np.nan)
    # this test says the 2 methods return equal arrays (the second one is way faster changing it to not2)
    # print(np.testing.assert_array_almost_equal(pitch_scores, pitch_scores2, err_msg='meh'))
    
    """
    The code works by first creating a boolean array of the same shape as med_pitch_logs by comparing each element in med_pitch_logs to neu_pitch. 
    If the value in med_pitch_logs is greater than neu_pitch, the corresponding element in the boolean array is set to True, otherwise it is set to False. 
    The boolean array is then converted to a numeric array by replacing True values with 1 and False values with -1. The np.where function is used to perform this operation. 
    The np.mean function is then used to compute the mean of this numeric array along the second axis (i.e., the mean of each row). 
    This gives a single pitch score for each window. Finally, the code replaces any scores corresponding to missing pitch values (represented as NaN in med_pitch_logs) with NaN values.
    """
    
    pitch_scores = np.mean(np.where(np.array(med_pitch_logs)[:, np.newaxis] > neu_pitch, 1, -1), axis=1)
    pitch_scores[np.isnan(med_pitch_logs)] = np.nan
    
    # ## get median_log_intensity
    med_int_logs, padded_int_values = get_intensity(sound, time_step, f0min, y0_pbuffs, pitch_values)
    
    neu_int = [x for x in med_int_logs if not np.isnan(x)]

    # int_scores = []
    # for i_item in med_int_logs:
    #     if np.isnan(i_item).all() == 0:
    #         int_scores.append(np.mean([1 if i_item > x else -1 for x in neu_int]))
            
    #     else:
    #         int_scores.append(np.nan)
    
    int_scores = np.mean(np.where(np.array(med_int_logs)[:, np.newaxis] > neu_int, 1, -1), axis=1)
    int_scores[np.isnan(med_int_logs)] = np.nan


    #-------------------------------------------------------------------
    # everything below this line is incomplete

    # ltas extraction takes 1min everything else takes less than 10 seconds together
    
    hf500= get_hf500(sound, y0_pbuffs, med_pitch_logs, frame_duration=.025, frame_shift=.01)

    hf500_scores= []

    # following bone methodology by not using first 100 frames of HF500 to align to pitch_values (I dont like it though, logic doesnt seem "correct")
    # still confused as to if we drop the first 100 or last 100
    # acoustic(:,1:featuretbmed(1)*100-1)=[]

    neu_hf = [x for x in hf500 if not np.isnan(x)]

    # for h_item in hf500:
    #     if np.isnan(h_item) == 0:
    #         hf500_scores.append(np.mean([1 if h_item > x else -1 for x in neu_hf]))
    #     else:
    #         hf500_scores.append(np.nan)
    
    
    hf500_scores = np.mean(np.where(np.array(hf500)[:, np.newaxis] > neu_hf, 1, -1), axis=1)
    hf500_scores[np.isnan(hf500)] = np.nan
    

    # append [item[0], item[1], med_pitch_logs, med_int_logs, HF500] to data

    file_df = create_df(info_lst[0], info_lst[1], time_pitch, pitch_scores, int_scores, hf500_scores)
    wav_data.append(file_df)
    
    files_completed += 1
    print('files left = ' , len(full_wav_files) - files_completed)

all_dfs= pd.concat(wav_data)



files left =  905
files left =  904
files left =  903
files left =  902
files left =  901
files left =  900
files left =  899
files left =  898
files left =  897
files left =  896
files left =  895
files left =  894
files left =  893
files left =  892
files left =  891
files left =  890
files left =  889
files left =  888
files left =  887
files left =  886
files left =  885
files left =  884
files left =  883
files left =  882
files left =  881
files left =  880
files left =  879
files left =  878
files left =  877
files left =  876
files left =  875
files left =  874
files left =  873
files left =  872
files left =  871
files left =  870
files left =  869
files left =  868
files left =  867
files left =  866
files left =  865
files left =  864
files left =  863
files left =  862
files left =  861
files left =  860
files left =  859
files left =  858
files left =  857
files left =  856
files left =  855
files left =  854
files left =  853
files left =  852
files left =  851
files left

In [74]:
all_dfs.head(10)
# I need to change the logic, I should not be altering individaul indicies where pitch is 0 I should be altering where pitch_buffer is nan

Unnamed: 0,ID,disclosure,time,pitch,intensity,hf500
0,136,pos2,0.01,,,
1,136,pos2,0.02,,,
2,136,pos2,0.03,,,
3,136,pos2,0.04,,,
4,136,pos2,0.05,,,
5,136,pos2,0.06,,,
6,136,pos2,0.07,,,
7,136,pos2,0.08,-0.587219,-0.956896,-0.151095
8,136,pos2,0.09,-0.608251,-0.956896,-0.071127
9,136,pos2,0.1,-0.587219,-0.956896,-0.026868


In [None]:
all_dfs.tail()

Unnamed: 0,ID,disclosure,time,pitch,intensity,hf500
17989,172,neu2,179.9,0.264516,-0.819355,0.206452
17990,172,neu2,179.91,0.264516,-0.819355,0.36129
17991,172,neu2,179.92,0.264516,-0.819355,0.432258
17992,172,neu2,179.93,0.264516,-0.819355,0.522581
17993,172,neu2,179.94,0.258065,-0.825806,0.748387


In [3]:
# making a csv just in case
#all_dfs.to_csv('12_hours.csv', index=False) 
all_dfs = pd.read_csv('12_hours.csv')

In [1]:
## older version use the one below
# import scipy.stats as stats

# all_arousal_dfs = []

# window_len = 11 # 100ms smoothing (taking the median value of an even amount == no bueno)

# # Select unique combinations of ID and Condition
# unique_df = all_dfs.drop_duplicates(subset=['ID', 'disclosure'])
# unq_id_dis= unique_df[['ID', 'disclosure']]

# for i, row in unq_id_dis.iterrows():
    
#     unq_id = row['ID']
#     unq_dis = row['disclosure']
#     # selecting files by unqiue (ID, disclosure)
#     id_dis_df = all_dfs.loc[(all_dfs['ID']== unq_id) & (all_dfs['disclosure']== unq_dis)]

#     # smooths each column using pd.rolling (this smooths to 1 second)
#     smooth = id_dis_df[['pitch', 'intensity', 'hf500']].rolling(window=window_len, center=True).median()
    
#     means = smooth.mean(axis=1).values

#     p_weight, _ = stats.spearmanr(smooth[['pitch']], means, nan_policy='omit')
#     i_weight, _ = stats.spearmanr(smooth[['intensity']], means, nan_policy='omit')
#     h_weight, _ = stats.spearmanr(smooth[['hf500']], means, nan_policy='omit')

#     weighted_scores = ((smooth[['pitch']].values * p_weight) + (smooth[['intensity']].values * i_weight) + (smooth[['hf500']].values * h_weight)) / (abs(p_weight) + abs(i_weight) + abs(h_weight))

#     id_dis_df['arousal'] = weighted_scores
#     # 2 second smoothing

#     id_dis_df['arousal_smooth'] = id_dis_df[['arousal']].rolling(window=201, center= True).median()
    
#     # aggregate to 1 second
#     second = 1
#     arousal_scores = []
#     for index in range(0, len(id_dis_df), 100):
#         start = index
        
#         temp = id_dis_df.iloc[start: start+99]
        
#         arousal_score_1s = temp['arousal_smooth'].mean()
#         arousal_scores.append([second, arousal_score_1s])
#         second+=1
    
#     arousal_df_1s = pd.DataFrame(arousal_scores, columns=['time', 'arousal_score'])
#     arousal_df_1s.insert(0 ,'ID', [unq_id]*len(arousal_df_1s))
#     arousal_df_1s.insert(1 ,'disclosure', [unq_dis]*len(arousal_df_1s))

#     all_arousal_dfs.append(arousal_df_1s)
    
# final_df = pd.concat(all_arousal_dfs)
# print(final_df.head())
# print(final_df.tail())


In [2]:
def smooth_with_nan(s):
    center_idx = (len(s) - 1) // 2  # calculate index of center value
    center_val = s.iloc[center_idx]

    if pd.isna(center_val):  # check if center value is nan
        return np.nan
    else:
        return np.median(s)

In [36]:
import scipy.stats as stats

all_arousal_dfs = []

unique_df = all_dfs.drop_duplicates(subset=['ID', 'disclosure'])
unq_id_dis= unique_df[['ID', 'disclosure']]

for i, row in unq_id_dis.iterrows():
    
    unq_id = row['ID']
    unq_dis = row['disclosure']
    # selecting files by unqiue (ID, disclosure)
    id_dis_df = all_dfs.loc[(all_dfs['ID']== unq_id) & (all_dfs['disclosure']== unq_dis)]

    # smooths each column using pd.rolling (this smooths to 1 second)
    smooth = id_dis_df[['pitch', 'intensity', 'hf500']].rolling(window=101, min_periods= 1, center=True).median()
    
    means = smooth.mean(axis=1).values

    p_weight, _ = stats.spearmanr(smooth[['pitch']], means, nan_policy='omit')
    i_weight, _ = stats.spearmanr(smooth[['intensity']], means, nan_policy='omit')
    h_weight, _ = stats.spearmanr(smooth[['hf500']], means, nan_policy='omit')

    weighted_scores = ((smooth[['pitch']].values * p_weight) + (smooth[['intensity']].values * i_weight) + (smooth[['hf500']].values * h_weight)) / (abs(p_weight) + abs(i_weight) + abs(h_weight))

    id_dis_df['arousal'] = weighted_scores
    # 2 second smoothing

    id_dis_df['arousal_smooth'] = id_dis_df['arousal'].rolling(window= 201, min_periods=1, center=True).apply(smooth_with_nan)
    

    # aggregating to 1 second
    second = 1
    arousal_scores = []
    for index in range(0, len(id_dis_df), 100):
        start = index
        
        temp = id_dis_df.iloc[start: start+99]
        
        arousal_score_1s = temp['arousal_smooth'].mean()
        arousal_scores.append([second, arousal_score_1s])
        second+=1
    
    arousal_df_1s = pd.DataFrame(arousal_scores, columns=['time', 'arousal_score'])
    arousal_df_1s.insert(0 ,'ID', [unq_id]*len(arousal_df_1s))
    arousal_df_1s.insert(1 ,'disclosure', [unq_dis]*len(arousal_df_1s))

    all_arousal_dfs.append(arousal_df_1s)
    
final_df = pd.concat(all_arousal_dfs)
print(final_df.head())
print(final_df.tail())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  id_dis_df['arousal'] = weighted_scores
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  id_dis_df['arousal_smooth'] = id_dis_df['arousal'].rolling(window= 201, min_periods=1, center=True).apply(smooth_with_nan)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  id_dis_df['arousal'] = weighted_scores
A va

    ID disclosure  time  arousal_score
0  136       pos2     1      -0.501698
1  136       pos2     2            NaN
2  136       pos2     3            NaN
3  136       pos2     4       0.038202
4  136       pos2     5       0.132063
      ID disclosure  time  arousal_score
175  172       neu2   176       0.040761
176  172       neu2   177       0.001345
177  172       neu2   178       0.100481
178  172       neu2   179       0.156744
179  172       neu2   180      -0.011615


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  id_dis_df['arousal_smooth'] = id_dis_df['arousal'].rolling(window= 201, min_periods=1, center=True).apply(smooth_with_nan)


In [37]:
print(final_df[0:25])

     ID disclosure  time  arousal_score
0   136       pos2     1      -0.501698
1   136       pos2     2            NaN
2   136       pos2     3            NaN
3   136       pos2     4       0.038202
4   136       pos2     5       0.132063
5   136       pos2     6       0.172669
6   136       pos2     7       0.176735
7   136       pos2     8       0.175705
8   136       pos2     9       0.353846
9   136       pos2    10       0.500581
10  136       pos2    11       0.194197
11  136       pos2    12      -0.156276
12  136       pos2    13      -0.110111
13  136       pos2    14       0.154069
14  136       pos2    15       0.261251
15  136       pos2    16       0.319608
16  136       pos2    17       0.587070
17  136       pos2    18       0.460876
18  136       pos2    19       0.354113
19  136       pos2    20       0.191118
20  136       pos2    21      -0.351221
21  136       pos2    22      -0.174463
22  136       pos2    23      -0.254317
23  136       pos2    24      -0.166285


In [38]:
final_df.to_csv('arousal_scores_1s_continuous_new.csv')

In [40]:
n0_final_df= final_df[~final_df.isnull().any(axis=1)]
n0_final_df.to_csv('arousal_scores_1s_n0.csv')