In [None]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

In [None]:
filepath_csv = r"C:\Users\Alvin\Desktop\UT\fall2019\research project\dep final proj\CSV files"
filepath_audio = r"C:\Users\Alvin\Desktop\UT\fall2019\research project\dep final proj\Audio files"
csv = os.listdir(filepath_csv)
audio = os.listdir(filepath_audio)

csvpath = []
for csv in csv:
    name = filepath_csv + "\\" + csv
    csvpath.append(name)
    
audiopath = []
for audio in audio:
    name = filepath_audio + "\\" + audio
    audiopath.append(name)
    
# combine audio and annotation file paths in a list
# make sure length of list1 is smaller or equal to list2

def combo(csvpath, audiopath):
    file = []
    for i in range(0,len(csvpath)):
        combine = []
        combine.append(csvpath[i])
        combine.append(audiopath[i])
        file.append(combine)
    return file

filepath = combo(csvpath, audiopath)
labelpath = r"C:\Users\Alvin\Desktop\UT\fall2019\research project\dep final proj\dep_QIDS.csv"

In [None]:
# convert original annotated csv files into the format we want 
def convert_csv(csvpath):
    df = pd.read_csv(csvpath, header=None)
    if df.shape[1]==8:
        df=df.iloc[:,1:]
    elif df.shape[1]==9:
        df=df.iloc[:,2:]
    col_name=["start","start_sec","end","end_sec","duration", "duration_sec","interviewer"]
    df.columns=col_name
    # convert columns
    start=[i for i in df[df.interviewer=="Interviewee"].start_sec]
    end=[i for i in df[df.interviewer=="Interviewee"].end_sec]
    duration=[end[i]-start[i] for i in range(0,len(start))]
    cumulative=[sum(duration[:i]) for i in range(1, len(duration)+1)]
    start_trans=[i/df[df.interviewer=="Interviewer"].end_sec[-1:] for i in start]
    start_trans=[i/float(df[df.interviewer=="Interviewer"].end_sec[-1:]) for i in start]
    end_trans=[i/float(df[df.interviewer=="Interviewer"].end_sec[-1:]) for i in end]
    # put new columns in new data frame
    df1 = pd.DataFrame({"start": start, "end": end, "duration": duration, "cumulative": cumulative,
                    "start_trans": start_trans, "end_trans": end_trans, "participant":"interviewee_speaking"})
    return df1

In [None]:
# map annotated csv episodes to audio input for mapped array and sampling rate
def mapped_array_sr(dataframe, audiopath):
    start = [x for x in dataframe.start_trans]
    end = [x for x in dataframe.end_trans]
    y, sr = librosa.load(audiopath, sr=None)
    emp=np.array([])
    for i in range(0,len(start)):  
        y_start = int(start[i]*len(y))
        y_end = int(end[i]*len(y))
        y_new = y[y_start-1:y_end-1]
        emp=np.append(emp, y_new)
    new_array=emp[1:]
    return new_array, sr

In [None]:
# functions to extract acoustic features
def mfccs_(y, sr, n_mfcc, hop_size, window_size):
    mfccs = librosa.feature.mfcc(y, sr, n_mfcc=n_mfcc, hop_length=int(hop_size*sr), n_fft=int(window_size*sr))
    mfcc_name=[]
    for i in range(0,len(mfccs[:,0])):
        title = "mfcc" + str(i+1)
        mfcc_name.append(title)
    mfccs = mfccs.transpose()
    return mfccs, mfcc_name

def picthes_magnitudes(y, sr, hop_size, window_size):
    pitches, magnitudes = librosa.piptrack(y, sr, hop_length=int(hop_size*sr), n_fft=int(window_size*sr))
    pitches = pitches.transpose()
    magnitudes = magnitudes.transpose()
    pitch = []
    magnitude = []
    for i in range(0,len(pitches[:,])):
        pitch.append(pitches[i,].mean())
    for i in range(0,len(magnitudes[:,])):
        magnitude.append(magnitudes[i,].mean())
    return pitch, magnitude

def chroma_energy_mean(y, sr):
    chroma = librosa.feature.chroma_cens(y, sr)
    chroma_energy_mean = chroma.mean()
    return chroma_energy_mean

def zero_cross(y, sr, hop_size):
    zero_cross = librosa.feature.zero_crossing_rate(y, frame_length=2048, hop_length=int(hop_size*sr), center=True)
    zero_cross = zero_cross.transpose()
    return zero_cross

def mel_freq_spec(y, sr, hop_size, window_size, n_mels):
    spec = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=int(hop_size*sr), n_fft=int(window_size*sr), n_mels=n_mels)
    db_spec = librosa.power_to_db(spec, ref=np.max)
    mel_freq = db_spec.transpose()
    mel_name=[]
    for i in range(0,len(mel_freq[0,:])):
        title = "mel_freq" + str(i+1)
        mel_name.append(title)
    return mel_freq, mel_name

In [None]:
# get structured features and label 
def format_features(filepath, labelpath):
    # add label 
    df_label = pd.read_csv(labelpath)
    df_label = df_label[df_label.Available.isna()==False]
    df_label = df_label["Record ID"]
    # add features
    features = ["mfcc1", "mfcc2", "mfcc3", "mfcc4", "mfcc5", "mfcc6", "mfcc7", "mfcc8", "mfcc9", "mfcc10", "mfcc11",
                "mfcc12","mfcc13", "mel_freq1","mel_freq2","mel_freq3","mel_freq4","mel_freq5","mel_freq6","mel_freq7","mel_freq8",
                "Record ID", "pitch","magnitude","chroma_ener_avg", "zero_cross_rate"]
    df_emp = pd.DataFrame(columns=features)
    nrow = []
    # get features by calling above functions
    for name in filepath:
        frame = convert_csv(name[0])
        new_array, sr = mapped_array_sr(frame, name[1])
        mfcc, mfcc_name = mfccs_(new_array, sr, 13, 0.03, 0.06)
        mel_freq, mel_name = mel_freq_spec(new_array, sr, 0.03, 0.06, 8)
        pitch, magnitude = picthes_magnitudes(new_array, sr, 0.03, 0.06)
        chroma_ener_mean = chroma_energy_mean(new_array, sr)
        zero_crs = zero_cross(new_array, sr, 0.03)
        # create pandas dataframe 
        df_iter = pd.DataFrame(mfcc, columns=mfcc_name)
        df_mel = pd.DataFrame(mel_freq, columns=mel_name)
        df = pd.concat([df_iter, df_mel], axis=1)
        df["Record ID"] = int(name[1][-8:-4])
        df["pitch"] = pitch
        df["magnitude"] = magnitude
        df["chroma_ener_avg"] = chroma_ener_mean
        df["zero_cross_rate"] = zero_crs
        df_emp = pd.concat([df_emp, df], axis=0, ignore_index=True)
        # get each participant row end position
        row_n = len(mfcc[:,0])
        nrow.append(row_n)
    # get delta of features
    for i in list(df_emp.columns):
        if "Record ID" not in i:
            name=i+"_delta"
            df_emp[name]=float(0)
            for j in range(1, len(df_emp)):
                df_emp[name][j]=float(df_emp[i][j]) - float(df_emp[i][j-1])
    # combine features and label
    df_emp = df_emp.merge(df_label, on="Record ID")
    df_emp["Record Id"] = df_emp["Record ID"]
    df_emp.drop(["Record ID"], axis=1, inplace=True)
    return df_emp

In [None]:
df = format_features(filepath, labelpath)

In [None]:
df

In [None]:
# save formatted features with participant Id to a csv file 
df.to_csv(r"C:\Users\Alvin\Desktop\UT\fall2019\research project\dep final proj\CBT_LLLT_features.csv", index=False)