### Classifier

In [21]:
import glob
import re
import parselmouth
from parselmouth.praat import call
import numpy as np
from sklearn.model_selection import cross_validate
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
#pitch, intensity, duration, f0 for vowels, jitter, and shimmer

def divError(n, d): #to solve division by zero error
    return n / d if d else 0

singing = []
speaking = []

for wav_file in glob.glob("/Users/adrizhao/Documents/NLP/nlp-final-project/singing/*.wav"):
    singvspeak = 0
    
    # Get duration, mean pitch, mean intensity
    
    sound = parselmouth.Sound(wav_file)
    pitch = call(sound, "To Pitch", 0, 75, 600) 
    meanpitch = call(pitch, "Get mean", 0, 0, "Hertz")
    intensity = call(sound, "To Intensity", 75, 0, "yes")
    meanintensity = call(intensity, "Get mean", 0, 0, "energy")
    duration = call(sound, "Get total duration")
    
    # get mean F0 (vowels only)
    formant = call(sound, "To Formant (burg)", 0, 5, 5500, 0.025, 50)
    tg_file = re.sub("wav", "TextGrid", wav_file)
    textgrid = call("Read from file", tg_file)
    intv = call(textgrid, "Get number of intervals", 2)
    vowels = 0
    f_zero = 0
    for i in range(1, intv):
        phone = call(textgrid, "Get label of interval", 2, i)
        phone = str(phone)
        if re.match('[AEIOU]', phone.upper()):
            vowels += 1
            vowel_onset = call(textgrid, "Get starting point", 2, i)
            vowel_offset = call(textgrid, "Get end point", 2, i)
            midpoint = vowel_onset + ((vowel_offset - vowel_onset) / 2)
            f_zero += call(formant, "Get value at time", 2, midpoint, "Hertz", "Linear")
                
    filename = re.sub("^.*\/", "", wav_file)
    tg_file = re.sub("wav", "TextGrid", filename)
    textgrid = call("Read from file", "singing/" + tg_file)
    intv = call(textgrid, "Get number of intervals", 2)
    sound = parselmouth.Sound( "singing/" + filename)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", 75, 600)
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    jitter = 0.0
    shimmer = 0.0
    hnr = 0.0
    count = 0
    for i in range(1, intv):
        phone = call(textgrid, "Get label of interval", 2, i)
        phone = str(phone)
        if re.match('[AEIOU]', phone.upper()):
            vowel_onset = call(textgrid, "Get starting point", 2, i)
            vowel_offset = call(textgrid, "Get end point", 2, i)
            j = call(pointProcess, "Get jitter (local)", vowel_onset, vowel_offset, 0.0001, 0.03, 1.3)
            s = call([sound, pointProcess], "Get shimmer (local)", vowel_onset, vowel_offset, 0.0001, 0.02, 1.3, 1.6)
            h = call(harmonicity, "Get mean", vowel_onset, vowel_offset)
            if j >= 0 and s >= 0 and h >= 0:
                count += 1
                jitter += j
                shimmer += s
                hnr += h
    singing.append([meanpitch, meanintensity, duration, divError(f_zero, vowels), divError(jitter, count), divError(shimmer, count), divError(hnr,count), 0])

for wav_file in glob.glob("/Users/adrizhao/Documents/NLP/nlp-final-project/speaking/*.wav"):
    singvspeak = 1
    # Get duration, mean pitch, mean intensity
    
    sound = parselmouth.Sound(wav_file)
    pitch = call(sound, "To Pitch", 0, 75, 600) 
    meanpitch = call(pitch, "Get mean", 0, 0, "Hertz")
    intensity = call(sound, "To Intensity", 75, 0, "yes")
    meanintensity = call(intensity, "Get mean", 0, 0, "energy")
    duration = call(sound, "Get total duration")
    
    # get mean F0 (vowels only)
    formant = call(sound, "To Formant (burg)", 0, 5, 5500, 0.025, 50)
    tg_file = re.sub("wav", "TextGrid", wav_file)
    textgrid = call("Read from file", tg_file)
    intv = call(textgrid, "Get number of intervals", 2)
    vowels = 0
    f_zero = 0
    for i in range(1, intv):
        phone = call(textgrid, "Get label of interval", 2, i)
        phone = str(phone)
        if re.match('[AEIOU]', phone.upper()):
            vowels += 1
            vowel_onset = call(textgrid, "Get starting point", 2, i)
            vowel_offset = call(textgrid, "Get end point", 2, i)
            midpoint = vowel_onset + ((vowel_offset - vowel_onset) / 2)
            f_zero += call(formant, "Get value at time", 2, midpoint, "Hertz", "Linear")
            
    filename = re.sub("^.*\/", "", wav_file)
    tg_file = re.sub("wav", "TextGrid", filename)
    textgrid = call("Read from file", "speaking/" + tg_file)
    intv = call(textgrid, "Get number of intervals", 2)
    sound = parselmouth.Sound( "speaking/" + filename)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", 75, 600)
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    jitter = 0.0
    shimmer = 0.0
    hnr = 0.0
    count = 0
    for i in range(1, intv):
        phone = call(textgrid, "Get label of interval", 2, i)
        phone = str(phone)
        if re.match('[AEIOU]', phone.upper()):
            vowel_onset = call(textgrid, "Get starting point", 2, i)
            vowel_offset = call(textgrid, "Get end point", 2, i)
            j = call(pointProcess, "Get jitter (local)", vowel_onset, vowel_offset, 0.0001, 0.03, 1.3)
            s = call([sound, pointProcess], "Get shimmer (local)", vowel_onset, vowel_offset, 0.0001, 0.02, 1.3, 1.6)
            h = call(harmonicity, "Get mean", vowel_onset, vowel_offset)
            if j >= 0 and s >= 0 and h >= 0:
                count += 1
                jitter += j
                shimmer += s
                hnr += h
                   
    speaking.append([meanpitch, meanintensity, duration, divError(f_zero, vowels), divError(jitter, count), divError(shimmer, count), divError(hnr,count), 1])


In [14]:
### PUT EVERYTHING IN NUMPY ARRAYS

# put everything in a single numpy array
npdata = np.array(singing + speaking)

# create the class labels: singing = 0, speaking = 1
singing_labels = np.zeros(len(singing), dtype=int)
speaking_labels = np.ones(len(speaking), dtype=int)
nptarget = np.concatenate([singing_labels, speaking_labels])

In [15]:
# Classifiers imports
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_validate
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [16]:
# You can run this cell without modifying the code

# Create a Naive Bayes classifier
gnb = GaussianNB()

# Select some scoring metrics
scoring_metrics = ['accuracy', 'precision', 'recall', 'f1']

# Train a Naive Bayes model with 5-fold cross validation for the mean R feature.
scoresformore = cross_validate(gnb, npdata, nptarget, cv=5, scoring=scoring_metrics)

# Print out each of the metrics for each of the 5 folds and their means.
for score_name, score_value in scoresformore.items():
    print(score_name, score_value, np.mean(score_value))
    
# Print out each of the metrics for each of the 5 folds and their means.
for score_name, score_value in scoresformore.items():
    if "test" in score_name:
        print(score_name, "\t", np.round(np.mean(score_value),4))

In [17]:
qda = QuadraticDiscriminantAnalysis()
scoring_metrics = ['accuracy', 'precision', 'recall', 'f1']

scoresqda = cross_validate(qda, npdata, nptarget, cv=5, scoring=scoring_metrics)

for score_name, score_value in scoresqda.items():
    if "test" in score_name:
        print(score_name, "\t", np.round(np.mean(score_value),4))

NameError: name 'npD' is not defined

In [None]:
# randomly select a subset of your data (size = 10)
testid = [1, 1]
while len(testid) != len(set(testid)):
    testid = np.random.randint(0, npdata.shape[0], 10)

# Get your testing data
#print(testid)
testset = npmoredata[testid, :]
testtarget = nptarget[testid]
#print(testset.shape)

# Get your training data
trainset = np.delete(npmoredata, testid, 0)
traintarget = np.delete(nptarget, testid, 0)
#print(trainset.shape)

# Build model using fit()
model = GaussianNB()
model.fit(trainset, traintarget)

# Apply model to test set using predict()
expected = testtarget
predicted = model.predict(testset)
#print(predicted)
#print(npfilename)
count = 0
for index in testid:
    print(npfilename[index], ",", predicted[count])
    count+=1

# Print a classification report
print(metrics.classification_report(expected, predicted))

# Print a confusion matrix (true positive, false positives, etc.)
print(metrics.confusion_matrix(expected, predicted))

In [None]:
count = 0
for index in testid:
    print(npfilename[index], ",", predicted[count])
    count+=1
    
#singing = 0, speaking = 1