In [1]:
import pandas as pd
df = pd.read_csv('../input/sep28k/SEP-28k_labels.csv')
df.head()

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,WordRep,DifficultToUnderstand,Interjection,NoStutteredWords,NaturalPause,Music,NoSpeech
0,HeStutters,0,0,31900320,31948320,0,0,0,0,0,0,0,0,3,1,0,0
1,HeStutters,0,1,31977120,32025120,0,0,0,0,0,0,0,0,3,1,0,0
2,HeStutters,0,2,34809760,34857760,0,0,0,0,0,0,0,0,3,0,0,0
3,HeStutters,0,3,35200640,35248640,0,0,1,0,0,0,0,0,2,0,0,0
4,HeStutters,0,4,35721920,35769920,0,0,0,0,0,0,0,0,3,0,0,0


In [2]:
df.shape

(28177, 17)

In [3]:
df.columns

Index(['Show', 'EpId', 'ClipId', 'Start', 'Stop', 'Unsure', 'PoorAudioQuality',
       'Prolongation', 'Block', 'SoundRep', 'WordRep', 'DifficultToUnderstand',
       'Interjection', 'NoStutteredWords', 'NaturalPause', 'Music',
       'NoSpeech'],
      dtype='object')

## Adding Name Column

In [4]:
df['Name'] = df[df.columns[0:3]].apply(
    lambda x: '_'.join(x.dropna().astype(str)),
    axis=1
)
df.head()

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,WordRep,DifficultToUnderstand,Interjection,NoStutteredWords,NaturalPause,Music,NoSpeech,Name
0,HeStutters,0,0,31900320,31948320,0,0,0,0,0,0,0,0,3,1,0,0,HeStutters_0_0
1,HeStutters,0,1,31977120,32025120,0,0,0,0,0,0,0,0,3,1,0,0,HeStutters_0_1
2,HeStutters,0,2,34809760,34857760,0,0,0,0,0,0,0,0,3,0,0,0,HeStutters_0_2
3,HeStutters,0,3,35200640,35248640,0,0,1,0,0,0,0,0,2,0,0,0,HeStutters_0_3
4,HeStutters,0,4,35721920,35769920,0,0,0,0,0,0,0,0,3,0,0,0,HeStutters_0_4


In [5]:
df['Name']

0                HeStutters_0_0
1                HeStutters_0_1
2                HeStutters_0_2
3                HeStutters_0_3
4                HeStutters_0_4
                  ...          
28172    WomenWhoStutter_109_35
28173    WomenWhoStutter_109_36
28174    WomenWhoStutter_109_37
28175    WomenWhoStutter_109_38
28176    WomenWhoStutter_109_39
Name: Name, Length: 28177, dtype: object

## Removing Empty Audios and there Dataset Entries

In [6]:
import os
os.stat("../input/sep28k/clips/stuttering-clips/clips/HeStutters_0_9.wav").st_size

# empty files have st_size of 44

44

In [7]:
os.stat("../input/sep28k/clips/stuttering-clips/clips/HeStutters_1_1.wav").st_size

# non empty file

96044

## Put empty filenames in a list and ignore them while feature extraction and training

In [8]:
import os

CLIPS_DIR = "../input/sep28k/clips/stuttering-clips/clips/"
ignore_list = []
for filename in os.listdir(CLIPS_DIR):
    file_path = CLIPS_DIR + filename
    if 'FluencyBank' not in filename:
        if os.stat(file_path).st_size == 44:
            ignore_list.append(filename)
            filename = filename[:-4]

print(len(ignore_list))


255


In [9]:
df.shape

# 255 rows removed from df as well

(28177, 18)

## MFCC Feature Extraction

In [10]:
import os
import librosa
import numpy as np
from tqdm.notebook import tqdm

CLIPS_DIR = "../input/sep28k/clips/stuttering-clips/clips/"  # Define CLIPS_DIR

features = {}

for filename in tqdm(os.listdir(CLIPS_DIR)):
    filename = filename[:-4]
    if 'FluencyBank' not in filename and filename + '.wav' not in ignore_list:
        audio, sample_rate = librosa.load(CLIPS_DIR + filename + '.wav', res_type='kaiser_fast', sr=None)
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13).T, axis=0)
        features[filename] = mfccs

print(len(features))
print(len(features.get('HeStutters_1_1', [])))


  0%|          | 0/32321 [00:00<?, ?it/s]

27922
13


In [11]:
print(len(features))

27922


In [12]:
len(features['HeStutters_1_1'])

13

## Making Dataset from Features

In [13]:
import pandas as pd  # Import pandas

df_features = pd.DataFrame.from_dict(features)
df_features = df_features.transpose()
df_features = df_features.reset_index()

df_features


Unnamed: 0,index,0,1,2,3,4,5,6,7,8,9,10,11,12
0,HeStutters_0_17,-383.235901,49.986271,2.692035,14.354122,1.777423,-2.420321,-12.342792,1.071969,-9.256073,-6.607180,-0.808944,-9.527816,-5.364853
1,HeStutters_0_18,-360.241272,66.061157,-1.453862,11.259468,-4.462043,-0.253932,-5.415998,-6.300482,-7.769227,-6.499992,-2.834178,-6.035948,-7.483030
2,HeStutters_0_19,-293.655060,73.833069,1.900996,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,HeStutters_0_20,-300.604645,52.587791,-1.105800,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549484,-9.037821,-6.599053
4,HeStutters_0_26,-322.675629,39.411560,-26.036314,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27917,WomenWhoStutter_9_5,-331.060883,8.423903,-32.912952,25.618410,-23.380974,2.367110,-14.927206,-5.307973,0.028713,-7.770761,-5.659303,-3.837937,-1.988657
27918,WomenWhoStutter_9_6,-287.171539,24.612038,11.593970,-2.373056,-19.041912,1.381283,-8.410913,-6.094383,-21.232750,1.682820,-18.440485,5.257690,-14.749624
27919,WomenWhoStutter_9_7,-284.419800,69.935951,4.447363,25.512445,-23.518826,-3.872554,2.403313,-14.279770,-22.613018,5.954080,-23.500149,-1.513232,-22.199068
27920,WomenWhoStutter_9_8,-359.677979,48.367874,14.129634,3.370608,-19.779858,1.081613,-1.617015,-4.654689,-21.594164,-1.051908,-16.132729,8.522468,-16.694765


In [14]:
df_features = df_features.reset_index()

In [15]:
df_features

Unnamed: 0,level_0,index,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0,HeStutters_0_17,-383.235901,49.986271,2.692035,14.354122,1.777423,-2.420321,-12.342792,1.071969,-9.256073,-6.607180,-0.808944,-9.527816,-5.364853
1,1,HeStutters_0_18,-360.241272,66.061157,-1.453862,11.259468,-4.462043,-0.253932,-5.415998,-6.300482,-7.769227,-6.499992,-2.834178,-6.035948,-7.483030
2,2,HeStutters_0_19,-293.655060,73.833069,1.900996,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,3,HeStutters_0_20,-300.604645,52.587791,-1.105800,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549484,-9.037821,-6.599053
4,4,HeStutters_0_26,-322.675629,39.411560,-26.036314,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27917,27917,WomenWhoStutter_9_5,-331.060883,8.423903,-32.912952,25.618410,-23.380974,2.367110,-14.927206,-5.307973,0.028713,-7.770761,-5.659303,-3.837937,-1.988657
27918,27918,WomenWhoStutter_9_6,-287.171539,24.612038,11.593970,-2.373056,-19.041912,1.381283,-8.410913,-6.094383,-21.232750,1.682820,-18.440485,5.257690,-14.749624
27919,27919,WomenWhoStutter_9_7,-284.419800,69.935951,4.447363,25.512445,-23.518826,-3.872554,2.403313,-14.279770,-22.613018,5.954080,-23.500149,-1.513232,-22.199068
27920,27920,WomenWhoStutter_9_8,-359.677979,48.367874,14.129634,3.370608,-19.779858,1.081613,-1.617015,-4.654689,-21.594164,-1.051908,-16.132729,8.522468,-16.694765


In [16]:
df_features = df_features.sort_values(by='index')
df_features

Unnamed: 0,level_0,index,0,1,2,3,4,5,6,7,8,9,10,11,12
3568,3568,HVSA_0_0,-672.897095,42.572784,-9.060439,17.092854,5.601506,11.969574,5.087627,1.287422,-1.221630,5.906602,0.396098,2.618646,2.006739
3569,3569,HVSA_0_1,-463.573730,69.182426,-18.914116,22.859167,0.735867,-7.253482,-7.202471,-5.319007,-5.854909,4.434800,-2.941299,0.067581,-10.047693
3570,3570,HVSA_0_10,-452.379944,58.461529,-26.529568,10.806607,1.040504,-11.684420,-15.975951,-17.110933,-8.533495,3.804466,-0.904714,1.546399,-5.672768
3571,3571,HVSA_0_100,-454.134888,85.565834,-16.623932,23.212677,-7.808691,-7.204673,-8.445450,-17.406025,-16.752821,-5.885376,0.342237,1.163814,-5.125270
3572,3572,HVSA_0_101,-419.757965,73.650093,-20.263416,30.691795,-5.919299,-15.747583,-17.799961,-13.380462,-10.132166,-2.647573,1.943859,-0.861621,-5.781013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27917,27917,WomenWhoStutter_9_5,-331.060883,8.423903,-32.912952,25.618410,-23.380974,2.367110,-14.927206,-5.307973,0.028713,-7.770761,-5.659303,-3.837937,-1.988657
27918,27918,WomenWhoStutter_9_6,-287.171539,24.612038,11.593970,-2.373056,-19.041912,1.381283,-8.410913,-6.094383,-21.232750,1.682820,-18.440485,5.257690,-14.749624
27919,27919,WomenWhoStutter_9_7,-284.419800,69.935951,4.447363,25.512445,-23.518826,-3.872554,2.403313,-14.279770,-22.613018,5.954080,-23.500149,-1.513232,-22.199068
27920,27920,WomenWhoStutter_9_8,-359.677979,48.367874,14.129634,3.370608,-19.779858,1.081613,-1.617015,-4.654689,-21.594164,-1.051908,-16.132729,8.522468,-16.694765


In [17]:
df_features['index']

3568                HVSA_0_0
3569                HVSA_0_1
3570               HVSA_0_10
3571              HVSA_0_100
3572              HVSA_0_101
                ...         
27917    WomenWhoStutter_9_5
27918    WomenWhoStutter_9_6
27919    WomenWhoStutter_9_7
27920    WomenWhoStutter_9_8
27921    WomenWhoStutter_9_9
Name: index, Length: 27922, dtype: object

In [18]:
df['Name']

0                HeStutters_0_0
1                HeStutters_0_1
2                HeStutters_0_2
3                HeStutters_0_3
4                HeStutters_0_4
                  ...          
28172    WomenWhoStutter_109_35
28173    WomenWhoStutter_109_36
28174    WomenWhoStutter_109_37
28175    WomenWhoStutter_109_38
28176    WomenWhoStutter_109_39
Name: Name, Length: 28177, dtype: object

## Applying Inner Join on the dataframes

In [19]:
df_features.rename(columns = {'index':'Name'}, inplace = True)
df_features

Unnamed: 0,level_0,Name,0,1,2,3,4,5,6,7,8,9,10,11,12
3568,3568,HVSA_0_0,-672.897095,42.572784,-9.060439,17.092854,5.601506,11.969574,5.087627,1.287422,-1.221630,5.906602,0.396098,2.618646,2.006739
3569,3569,HVSA_0_1,-463.573730,69.182426,-18.914116,22.859167,0.735867,-7.253482,-7.202471,-5.319007,-5.854909,4.434800,-2.941299,0.067581,-10.047693
3570,3570,HVSA_0_10,-452.379944,58.461529,-26.529568,10.806607,1.040504,-11.684420,-15.975951,-17.110933,-8.533495,3.804466,-0.904714,1.546399,-5.672768
3571,3571,HVSA_0_100,-454.134888,85.565834,-16.623932,23.212677,-7.808691,-7.204673,-8.445450,-17.406025,-16.752821,-5.885376,0.342237,1.163814,-5.125270
3572,3572,HVSA_0_101,-419.757965,73.650093,-20.263416,30.691795,-5.919299,-15.747583,-17.799961,-13.380462,-10.132166,-2.647573,1.943859,-0.861621,-5.781013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27917,27917,WomenWhoStutter_9_5,-331.060883,8.423903,-32.912952,25.618410,-23.380974,2.367110,-14.927206,-5.307973,0.028713,-7.770761,-5.659303,-3.837937,-1.988657
27918,27918,WomenWhoStutter_9_6,-287.171539,24.612038,11.593970,-2.373056,-19.041912,1.381283,-8.410913,-6.094383,-21.232750,1.682820,-18.440485,5.257690,-14.749624
27919,27919,WomenWhoStutter_9_7,-284.419800,69.935951,4.447363,25.512445,-23.518826,-3.872554,2.403313,-14.279770,-22.613018,5.954080,-23.500149,-1.513232,-22.199068
27920,27920,WomenWhoStutter_9_8,-359.677979,48.367874,14.129634,3.370608,-19.779858,1.081613,-1.617015,-4.654689,-21.594164,-1.051908,-16.132729,8.522468,-16.694765


In [20]:
df_final = pd.merge(df, df_features, how='inner', on='Name')
df_final

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
0,HeStutters,0,17,6970880,7018880,0,0,0,0,0,...,14.354122,1.777423,-2.420321,-12.342792,1.071969,-9.256073,-6.607180,-0.808944,-9.527816,-5.364853
1,HeStutters,0,18,8215200,8263200,0,0,0,1,0,...,11.259468,-4.462043,-0.253932,-5.415998,-6.300482,-7.769227,-6.499992,-2.834178,-6.035948,-7.483030
2,HeStutters,0,19,9985280,10033280,0,0,1,0,0,...,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549484,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27917,WomenWhoStutter,109,35,25793600,25841600,0,0,0,0,0,...,20.143105,-26.160034,3.709442,-15.719243,2.436149,-1.615795,-5.105469,-10.272826,-3.074089,-6.375842
27918,WomenWhoStutter,109,36,26168480,26216480,0,0,1,1,3,...,29.588446,-10.896355,-14.222120,-9.167952,-1.676773,-8.374957,-6.297094,1.265485,-4.014944,-0.196149
27919,WomenWhoStutter,109,37,3569440,3617440,0,0,0,0,0,...,9.234311,-27.815411,-5.439772,-17.093634,-8.942756,-1.638183,-3.736795,-0.968334,-7.699897,0.135829
27920,WomenWhoStutter,109,38,28479840,28527840,0,0,1,0,0,...,26.443159,-24.452452,-10.057508,-10.132120,-4.451964,-1.437817,-11.729813,-6.692399,0.137548,-3.664661


In [21]:
df_final.head()

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
0,HeStutters,0,17,6970880,7018880,0,0,0,0,0,...,14.354122,1.777423,-2.420321,-12.342792,1.071969,-9.256073,-6.60718,-0.808944,-9.527816,-5.364853
1,HeStutters,0,18,8215200,8263200,0,0,0,1,0,...,11.259468,-4.462043,-0.253932,-5.415998,-6.300482,-7.769227,-6.499992,-2.834178,-6.035948,-7.48303
2,HeStutters,0,19,9985280,10033280,0,0,1,0,0,...,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.3149,-5.230086,1.173894,-10.094394,-5.409696
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549484,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.44283,-9.905901,-7.322884,-6.587358


In [22]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27922 entries, 0 to 27921
Data columns (total 32 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Show                   27922 non-null  object 
 1   EpId                   27922 non-null  int64  
 2   ClipId                 27922 non-null  int64  
 3   Start                  27922 non-null  int64  
 4   Stop                   27922 non-null  int64  
 5   Unsure                 27922 non-null  int64  
 6   PoorAudioQuality       27922 non-null  int64  
 7   Prolongation           27922 non-null  int64  
 8   Block                  27922 non-null  int64  
 9   SoundRep               27922 non-null  int64  
 10  WordRep                27922 non-null  int64  
 11  DifficultToUnderstand  27922 non-null  int64  
 12  Interjection           27922 non-null  int64  
 13  NoStutteredWords       27922 non-null  int64  
 14  NaturalPause           27922 non-null  int64  
 15  Mu

In [23]:
# df_final['Stutter'] = df['WordRep'] + df['SoundRep'] + df['Prolongation'] + df['Interjection']
# df_final

In [24]:
# df_final['Stutter'].value_counts()

In [25]:
# df_final = df_final[df_final.Stutter != 0]
# Remove Non-Stuttered Clips

# df_final.loc[df_final['Stutter'] >= 1.0, 'Stutter'] = 1.0
# df_final['Stutter'].value_counts()

In [26]:
df_final = df_final[df_final.PoorAudioQuality == 0]
df_final

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
0,HeStutters,0,17,6970880,7018880,0,0,0,0,0,...,14.354122,1.777423,-2.420321,-12.342792,1.071969,-9.256073,-6.607180,-0.808944,-9.527816,-5.364853
1,HeStutters,0,18,8215200,8263200,0,0,0,1,0,...,11.259468,-4.462043,-0.253932,-5.415998,-6.300482,-7.769227,-6.499992,-2.834178,-6.035948,-7.483030
2,HeStutters,0,19,9985280,10033280,0,0,1,0,0,...,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549484,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27917,WomenWhoStutter,109,35,25793600,25841600,0,0,0,0,0,...,20.143105,-26.160034,3.709442,-15.719243,2.436149,-1.615795,-5.105469,-10.272826,-3.074089,-6.375842
27918,WomenWhoStutter,109,36,26168480,26216480,0,0,1,1,3,...,29.588446,-10.896355,-14.222120,-9.167952,-1.676773,-8.374957,-6.297094,1.265485,-4.014944,-0.196149
27919,WomenWhoStutter,109,37,3569440,3617440,0,0,0,0,0,...,9.234311,-27.815411,-5.439772,-17.093634,-8.942756,-1.638183,-3.736795,-0.968334,-7.699897,0.135829
27920,WomenWhoStutter,109,38,28479840,28527840,0,0,1,0,0,...,26.443159,-24.452452,-10.057508,-10.132120,-4.451964,-1.437817,-11.729813,-6.692399,0.137548,-3.664661


In [27]:
df_final = df_final[df_final.DifficultToUnderstand == 0]
df_final

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
0,HeStutters,0,17,6970880,7018880,0,0,0,0,0,...,14.354122,1.777423,-2.420321,-12.342792,1.071969,-9.256073,-6.607180,-0.808944,-9.527816,-5.364853
1,HeStutters,0,18,8215200,8263200,0,0,0,1,0,...,11.259468,-4.462043,-0.253932,-5.415998,-6.300482,-7.769227,-6.499992,-2.834178,-6.035948,-7.483030
2,HeStutters,0,19,9985280,10033280,0,0,1,0,0,...,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549484,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27916,WomenWhoStutter,109,34,24781440,24829440,0,0,0,0,0,...,5.831603,-24.096716,-9.137781,-8.294982,-17.235239,-3.540516,-5.074908,-3.828185,-5.524163,-5.444092
27917,WomenWhoStutter,109,35,25793600,25841600,0,0,0,0,0,...,20.143105,-26.160034,3.709442,-15.719243,2.436149,-1.615795,-5.105469,-10.272826,-3.074089,-6.375842
27919,WomenWhoStutter,109,37,3569440,3617440,0,0,0,0,0,...,9.234311,-27.815411,-5.439772,-17.093634,-8.942756,-1.638183,-3.736795,-0.968334,-7.699897,0.135829
27920,WomenWhoStutter,109,38,28479840,28527840,0,0,1,0,0,...,26.443159,-24.452452,-10.057508,-10.132120,-4.451964,-1.437817,-11.729813,-6.692399,0.137548,-3.664661


In [28]:
df_final = df_final[df_final.Music == 0]
df_final

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
0,HeStutters,0,17,6970880,7018880,0,0,0,0,0,...,14.354122,1.777423,-2.420321,-12.342792,1.071969,-9.256073,-6.607180,-0.808944,-9.527816,-5.364853
1,HeStutters,0,18,8215200,8263200,0,0,0,1,0,...,11.259468,-4.462043,-0.253932,-5.415998,-6.300482,-7.769227,-6.499992,-2.834178,-6.035948,-7.483030
2,HeStutters,0,19,9985280,10033280,0,0,1,0,0,...,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549484,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27916,WomenWhoStutter,109,34,24781440,24829440,0,0,0,0,0,...,5.831603,-24.096716,-9.137781,-8.294982,-17.235239,-3.540516,-5.074908,-3.828185,-5.524163,-5.444092
27917,WomenWhoStutter,109,35,25793600,25841600,0,0,0,0,0,...,20.143105,-26.160034,3.709442,-15.719243,2.436149,-1.615795,-5.105469,-10.272826,-3.074089,-6.375842
27919,WomenWhoStutter,109,37,3569440,3617440,0,0,0,0,0,...,9.234311,-27.815411,-5.439772,-17.093634,-8.942756,-1.638183,-3.736795,-0.968334,-7.699897,0.135829
27920,WomenWhoStutter,109,38,28479840,28527840,0,0,1,0,0,...,26.443159,-24.452452,-10.057508,-10.132120,-4.451964,-1.437817,-11.729813,-6.692399,0.137548,-3.664661


In [29]:
df_final = df_final[df_final.NoSpeech == 0]
df_final

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
0,HeStutters,0,17,6970880,7018880,0,0,0,0,0,...,14.354122,1.777423,-2.420321,-12.342792,1.071969,-9.256073,-6.607180,-0.808944,-9.527816,-5.364853
1,HeStutters,0,18,8215200,8263200,0,0,0,1,0,...,11.259468,-4.462043,-0.253932,-5.415998,-6.300482,-7.769227,-6.499992,-2.834178,-6.035948,-7.483030
2,HeStutters,0,19,9985280,10033280,0,0,1,0,0,...,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549484,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27916,WomenWhoStutter,109,34,24781440,24829440,0,0,0,0,0,...,5.831603,-24.096716,-9.137781,-8.294982,-17.235239,-3.540516,-5.074908,-3.828185,-5.524163,-5.444092
27917,WomenWhoStutter,109,35,25793600,25841600,0,0,0,0,0,...,20.143105,-26.160034,3.709442,-15.719243,2.436149,-1.615795,-5.105469,-10.272826,-3.074089,-6.375842
27919,WomenWhoStutter,109,37,3569440,3617440,0,0,0,0,0,...,9.234311,-27.815411,-5.439772,-17.093634,-8.942756,-1.638183,-3.736795,-0.968334,-7.699897,0.135829
27920,WomenWhoStutter,109,38,28479840,28527840,0,0,1,0,0,...,26.443159,-24.452452,-10.057508,-10.132120,-4.451964,-1.437817,-11.729813,-6.692399,0.137548,-3.664661


In [30]:
df_final.to_csv("sep28k-mfcc.csv",index=False)

## Model Creation & Training for SoundRep Classification

In [31]:
import pandas as pd
df = pd.read_csv("./sep28k-mfcc.csv")

In [32]:
df = df[df['NoStutteredWords'] != 0]
df = df[df['NaturalPause'] == 0]
df = df[df['Interjection'] == 0]
df = df[df['Prolongation'] == 0]
df = df[df['WordRep'] == 0]
df = df[df['Block'] == 0]
df

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549485,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
8,HeStutters,1,4,28351360,28399360,0,0,0,0,0,...,26.878878,-11.660662,3.515312,-27.021893,-11.834179,-7.582844,-7.805756,-12.518568,-1.051659,-7.828529
18,HeStutters,1,105,6492320,6540320,0,0,0,0,2,...,9.991777,6.028349,-7.291534,-25.489399,-14.340752,-14.337446,1.463035,-16.014809,-0.031667,0.534200
19,HeStutters,1,107,6638560,6686560,0,0,0,0,0,...,29.173650,7.216802,5.993933,-13.808914,-6.984493,-11.859511,-2.531782,-16.870176,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,WomenWhoStutter,109,24,16179040,16227040,0,0,0,0,0,...,26.002850,-16.124636,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,WomenWhoStutter,109,25,16224160,16272160,0,0,0,0,0,...,8.484956,-29.523212,-2.754524,-15.226074,-7.238212,1.549138,-8.778445,-10.499596,-2.756171,-6.887868
20860,WomenWhoStutter,109,31,20476320,20524320,0,0,0,0,0,...,16.823687,-23.059584,-6.954722,-10.113501,-13.114476,0.588607,-15.267632,-10.101979,-0.440983,0.197351
20862,WomenWhoStutter,109,33,24747360,24795360,0,0,0,0,0,...,18.849451,-23.086061,2.158868,-8.471072,1.332367,2.440633,-1.968005,-7.462197,0.142006,-4.275124


In [33]:
df.loc[df['SoundRep'] >= 1.0, 'SoundRep'] = 1.0
df['SoundRep'].value_counts()

SoundRep
0    2471
1     156
Name: count, dtype: int64

In [34]:
X = df.iloc[: , -13:]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
3,-300.60464,52.587790,-1.105800,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549485,-9.037821,-6.599053
4,-322.67563,39.411560,-26.036314,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
8,-418.44520,103.886830,-30.266285,26.878878,-11.660662,3.515312,-27.021893,-11.834179,-7.582844,-7.805756,-12.518568,-1.051659,-7.828529
18,-399.46472,92.754060,-31.619010,9.991777,6.028349,-7.291534,-25.489399,-14.340752,-14.337446,1.463035,-16.014809,-0.031667,0.534200
19,-475.87134,93.217606,3.080706,29.173650,7.216802,5.993933,-13.808914,-6.984493,-11.859511,-2.531782,-16.870176,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,-396.81604,47.699257,-28.230510,26.002850,-16.124636,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,-366.09630,59.089510,-23.645912,8.484956,-29.523212,-2.754524,-15.226074,-7.238212,1.549138,-8.778445,-10.499596,-2.756171,-6.887868
20860,-316.00027,42.245804,-40.056350,16.823687,-23.059584,-6.954722,-10.113501,-13.114476,0.588607,-15.267632,-10.101979,-0.440983,0.197351
20862,-350.42902,27.453860,-35.155910,18.849451,-23.086061,2.158868,-8.471072,1.332367,2.440633,-1.968005,-7.462197,0.142006,-4.275124


In [35]:
y = df['SoundRep']
y

3        0
4        0
8        0
18       1
19       0
        ..
20853    0
20854    0
20860    0
20862    0
20867    0
Name: SoundRep, Length: 2627, dtype: int64

In [36]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.4, random_state=42)

In [37]:
from collections import Counter
Counter(y_train)

Counter({0: 1480, 1: 96})

In [38]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(criterion='entropy',random_state=5)
clf.fit(X_train, y_train)
clf.score(X_test,y_test) * 100

89.5337773549001

In [39]:
import numpy as np
from collections import Counter
from sklearn.tree import DecisionTreeClassifier

# Assuming X_train, X_test, y_train, y_test are already defined
clf = DecisionTreeClassifier(criterion='entropy', random_state=5)
clf.fit(X_train, y_train)
print(clf.score(X_test, y_test) * 100)

y_pred = np.array(clf.predict(X_test))
y_actual = np.array(y_test)
print(Counter(y_pred))
print(Counter(y_actual))


89.5337773549001
Counter({0: 981, 1: 70})
Counter({0: 991, 1: 60})


## Model Creation & Training for WordRep Classification

In [40]:
import pandas as pd
df = pd.read_csv('./sep28k-mfcc.csv')

In [41]:
df.columns

Index(['Show', 'EpId', 'ClipId', 'Start', 'Stop', 'Unsure', 'PoorAudioQuality',
       'Prolongation', 'Block', 'SoundRep', 'WordRep', 'DifficultToUnderstand',
       'Interjection', 'NoStutteredWords', 'NaturalPause', 'Music', 'NoSpeech',
       'Name', 'level_0', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       '10', '11', '12'],
      dtype='object')

In [42]:
df.shape

(20868, 32)

In [43]:
df = df[df['NoStutteredWords'] != 0]
df = df[df['NaturalPause'] == 0]
df = df[df['Interjection'] == 0]
df = df[df['Prolongation'] == 0]
df = df[df['SoundRep'] == 0]
df = df[df['Block'] == 0]
df

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549485,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
8,HeStutters,1,4,28351360,28399360,0,0,0,0,0,...,26.878878,-11.660662,3.515312,-27.021893,-11.834179,-7.582844,-7.805756,-12.518568,-1.051659,-7.828529
11,HeStutters,1,8,28539840,28587840,0,0,0,0,0,...,13.250941,4.572205,0.690116,-12.168562,-10.612210,-17.491869,-2.735056,-9.947231,-9.058744,-8.354891
19,HeStutters,1,107,6638560,6686560,0,0,0,0,0,...,29.173650,7.216802,5.993933,-13.808914,-6.984493,-11.859511,-2.531782,-16.870176,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,WomenWhoStutter,109,24,16179040,16227040,0,0,0,0,0,...,26.002850,-16.124636,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,WomenWhoStutter,109,25,16224160,16272160,0,0,0,0,0,...,8.484956,-29.523212,-2.754524,-15.226074,-7.238212,1.549138,-8.778445,-10.499596,-2.756171,-6.887868
20860,WomenWhoStutter,109,31,20476320,20524320,0,0,0,0,0,...,16.823687,-23.059584,-6.954722,-10.113501,-13.114476,0.588607,-15.267632,-10.101979,-0.440983,0.197351
20862,WomenWhoStutter,109,33,24747360,24795360,0,0,0,0,0,...,18.849451,-23.086061,2.158868,-8.471072,1.332367,2.440633,-1.968005,-7.462197,0.142006,-4.275124


In [44]:
df.loc[df['WordRep'] >= 1.0, 'WordRep'] = 1.0
df['WordRep'].value_counts()

WordRep
0    2471
1     192
Name: count, dtype: int64

In [45]:
X = df.iloc[: , -13:]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
3,-300.60464,52.587790,-1.105800,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549485,-9.037821,-6.599053
4,-322.67563,39.411560,-26.036314,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
8,-418.44520,103.886830,-30.266285,26.878878,-11.660662,3.515312,-27.021893,-11.834179,-7.582844,-7.805756,-12.518568,-1.051659,-7.828529
11,-494.81573,84.972070,1.017862,13.250941,4.572205,0.690116,-12.168562,-10.612210,-17.491869,-2.735056,-9.947231,-9.058744,-8.354891
19,-475.87134,93.217606,3.080706,29.173650,7.216802,5.993933,-13.808914,-6.984493,-11.859511,-2.531782,-16.870176,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,-396.81604,47.699257,-28.230510,26.002850,-16.124636,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,-366.09630,59.089510,-23.645912,8.484956,-29.523212,-2.754524,-15.226074,-7.238212,1.549138,-8.778445,-10.499596,-2.756171,-6.887868
20860,-316.00027,42.245804,-40.056350,16.823687,-23.059584,-6.954722,-10.113501,-13.114476,0.588607,-15.267632,-10.101979,-0.440983,0.197351
20862,-350.42902,27.453860,-35.155910,18.849451,-23.086061,2.158868,-8.471072,1.332367,2.440633,-1.968005,-7.462197,0.142006,-4.275124


In [46]:
y = df['WordRep']
y

3        0
4        0
8        0
11       1
19       0
        ..
20853    0
20854    0
20860    0
20862    0
20867    0
Name: WordRep, Length: 2663, dtype: int64

In [47]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.4, random_state=42)

from collections import Counter
Counter(y_train)

Counter({0: 1476, 1: 121})

In [48]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(criterion='gini',random_state=5)
# clf.fit(X_over, y_over)
# clf.fit(X_smote, y_smote)
# clf.fit(X_pipe, y_pipe)
clf.fit(X_train, y_train)
clf.score(X_test,y_test) * 100

86.11632270168855

In [49]:
y_pred = np.array(clf.predict(X_test))
y_actual = np.array(y_test)
print(Counter(y_pred))
print(Counter(y_actual))

Counter({0: 977, 1: 89})
Counter({0: 995, 1: 71})


## Model Creation & Training for Prolongation Classification

In [50]:
import pandas as pd
df = pd.read_csv('./sep28k-mfcc.csv')

In [51]:
df.columns

Index(['Show', 'EpId', 'ClipId', 'Start', 'Stop', 'Unsure', 'PoorAudioQuality',
       'Prolongation', 'Block', 'SoundRep', 'WordRep', 'DifficultToUnderstand',
       'Interjection', 'NoStutteredWords', 'NaturalPause', 'Music', 'NoSpeech',
       'Name', 'level_0', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       '10', '11', '12'],
      dtype='object')

In [52]:
df.shape

(20868, 32)

In [53]:
df = df[df['NoStutteredWords'] != 0]
df = df[df['NaturalPause'] == 0]
df = df[df['Interjection'] == 0]
df = df[df['WordRep'] == 0]
df = df[df['SoundRep'] == 0]
df = df[df['Block'] == 0]
df

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
2,HeStutters,0,19,9985280,10033280,0,0,1,0,0,...,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549485,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
8,HeStutters,1,4,28351360,28399360,0,0,0,0,0,...,26.878878,-11.660662,3.515312,-27.021893,-11.834179,-7.582844,-7.805756,-12.518568,-1.051659,-7.828529
19,HeStutters,1,107,6638560,6686560,0,0,0,0,0,...,29.173650,7.216802,5.993933,-13.808914,-6.984493,-11.859511,-2.531782,-16.870176,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,WomenWhoStutter,109,24,16179040,16227040,0,0,0,0,0,...,26.002850,-16.124636,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,WomenWhoStutter,109,25,16224160,16272160,0,0,0,0,0,...,8.484956,-29.523212,-2.754524,-15.226074,-7.238212,1.549138,-8.778445,-10.499596,-2.756171,-6.887868
20860,WomenWhoStutter,109,31,20476320,20524320,0,0,0,0,0,...,16.823687,-23.059584,-6.954722,-10.113501,-13.114476,0.588607,-15.267632,-10.101979,-0.440983,0.197351
20862,WomenWhoStutter,109,33,24747360,24795360,0,0,0,0,0,...,18.849451,-23.086061,2.158868,-8.471072,1.332367,2.440633,-1.968005,-7.462197,0.142006,-4.275124


In [54]:
df.loc[df['Prolongation'] >= 1.0, 'Prolongation'] = 1.0
df['Prolongation'].value_counts()

Prolongation
0    2471
1     914
Name: count, dtype: int64

In [55]:
X = df.iloc[: , -13:]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
2,-293.65506,73.833070,1.900996,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094394,-5.409696
3,-300.60464,52.587790,-1.105800,22.788187,-2.626715,-5.491357,-18.113766,0.016458,-17.751234,-10.870637,-3.549485,-9.037821,-6.599053
4,-322.67563,39.411560,-26.036314,31.284817,-20.774286,-5.054771,-8.849819,-4.670222,-6.783212,-8.442830,-9.905901,-7.322884,-6.587358
8,-418.44520,103.886830,-30.266285,26.878878,-11.660662,3.515312,-27.021893,-11.834179,-7.582844,-7.805756,-12.518568,-1.051659,-7.828529
19,-475.87134,93.217606,3.080706,29.173650,7.216802,5.993933,-13.808914,-6.984493,-11.859511,-2.531782,-16.870176,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,-396.81604,47.699257,-28.230510,26.002850,-16.124636,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,-366.09630,59.089510,-23.645912,8.484956,-29.523212,-2.754524,-15.226074,-7.238212,1.549138,-8.778445,-10.499596,-2.756171,-6.887868
20860,-316.00027,42.245804,-40.056350,16.823687,-23.059584,-6.954722,-10.113501,-13.114476,0.588607,-15.267632,-10.101979,-0.440983,0.197351
20862,-350.42902,27.453860,-35.155910,18.849451,-23.086061,2.158868,-8.471072,1.332367,2.440633,-1.968005,-7.462197,0.142006,-4.275124


In [56]:
y = df['Prolongation']
y

2        1
3        0
4        0
8        0
19       0
        ..
20853    0
20854    0
20860    0
20862    0
20867    0
Name: Prolongation, Length: 3385, dtype: int64

In [57]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.4, random_state=42)

from collections import Counter
Counter(y_train)

Counter({0: 1493, 1: 538})

In [58]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
knn.score(X_test, y_test) * 100

66.83899556868538

In [59]:
import numpy as np
y_pred = np.array(knn.predict(X_test))
y_actual = np.array(y_test)
print(Counter(y_pred))
print(Counter(y_actual))

Counter({0: 1119, 1: 235})
Counter({0: 978, 1: 376})


In [60]:
import IPython.display as ipd

In [90]:
ipd.Audio('../input/sep28k/clips/stuttering-clips/clips/StutterTalk_64_78.wav')

## Train and Save Models

In [70]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pickle

# Assuming your DataFrame and feature extraction process is done correctly
df = pd.read_csv('./sep28k-mfcc.csv')
X = df.iloc[:, -13:]

# Train Sound Repetition model
y_soundrep = df['SoundRep']
X_train_sr, X_test_sr, y_train_sr, y_test_sr = train_test_split(X, y_soundrep, test_size=0.4, random_state=42)
clf_soundrep = DecisionTreeClassifier(criterion='entropy', random_state=5)
clf_soundrep.fit(X_train_sr, y_train_sr)
with open('soundrep_model.pkl', 'wb') as f:
    pickle.dump(clf_soundrep, f)

# Train Word Repetition model
y_wordrep = df['WordRep']
X_train_wr, X_test_wr, y_train_wr, y_test_wr = train_test_split(X, y_wordrep, test_size=0.4, random_state=42)
clf_wordrep = DecisionTreeClassifier(criterion='gini', random_state=5)
clf_wordrep.fit(X_train_wr, y_train_wr)
with open('wordrep_model.pkl', 'wb') as f:
    pickle.dump(clf_wordrep, f)

# Train Prolongation model
y_prolongation = df['Prolongation']
X_train_pr, X_test_pr, y_train_pr, y_test_pr = train_test_split(X, y_prolongation, test_size=0.4, random_state=42)
knn_prolongation = KNeighborsClassifier(n_neighbors=3)
knn_prolongation.fit(X_train_pr, y_train_pr)
with open('prolongation_model.pkl', 'wb') as f:
    pickle.dump(knn_prolongation, f)

print("Models saved successfully.")


Models saved successfully.


## Verify File Existence

In [71]:
import os

model_files = ['soundrep_model.pkl', 'wordrep_model.pkl', 'prolongation_model.pkl']

for model_file in model_files:
    if os.path.isfile(model_file):
        print(f"{model_file} found.")
    else:
        print(f"{model_file} not found.")


soundrep_model.pkl found.
wordrep_model.pkl found.
prolongation_model.pkl found.


## Load Models and Predict

In [99]:
import librosa
import numpy as np
import pickle
import os
from pydub import AudioSegment
import requests

def download_file(url, destination):
    response = requests.get(url)
    with open(destination, 'wb') as f:
        f.write(response.content)

def convert_to_wav(input_path, output_path):
    audio = AudioSegment.from_file(input_path)
    audio.export(output_path, format='wav')

def extract_mfcc(file_path, max_pad_len=130):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast', sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)

    # Padding or trimming to ensure consistent length
    if mfccs.shape[1] < max_pad_len:
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfccs = mfccs[:, :max_pad_len]

    return np.mean(mfccs.T, axis=0)

# Download MP3 file from URL
external_audio_url = 'https://firebasestorage.googleapis.com/v0/b/fluencyapp-684d1.appspot.com/o/voiceRecordings%2FmCMTtAF4KPN09QaPiY89ybpE3Vr1%2F11.mp3?alt=media&token=0144317f-34c1-4a9d-8a46-0685a64faa2c'
temp_mp3_path = 'temp_audio.mp3'
temp_wav_path = 'temp_audio.wav'

download_file(external_audio_url, temp_mp3_path)

# Convert downloaded MP3 to WAV
convert_to_wav(temp_mp3_path, temp_wav_path)

# Load the trained models
model_files = {
    'soundrep_model.pkl': 'soundrep_model',
    'wordrep_model.pkl': 'wordrep_model',
    'prolongation_model.pkl': 'prolongation_model'
}

models = {}

for file, name in model_files.items():
    try:
        with open(file, 'rb') as f:
            models[name] = pickle.load(f)
            print(f"{file} loaded successfully.")
    except FileNotFoundError:
        print(f"{file} not found.")

# Load the original DataFrame to get column names
df = pd.read_csv('./sep28k-mfcc.csv')
column_names = df.columns[-13:]

# Ensure the MFCC features are in the correct shape (1, 13) and convert to DataFrame
mfcc_features = extract_mfcc(temp_wav_path)
mfcc_features = mfcc_features.reshape(1, -1)
mfcc_df = pd.DataFrame(mfcc_features, columns=column_names)

# Predict stuttering types
soundrep_prediction = models['soundrep_model'].predict(mfcc_df) if 'soundrep_model' in models else [0]
wordrep_prediction = models['wordrep_model'].predict(mfcc_df) if 'wordrep_model' in models else [0]
prolongation_prediction = models['prolongation_model'].predict(mfcc_df) if 'prolongation_model' in models else [0]

# Determine if the person has stuttering and its type
stuttering_types = []
if soundrep_prediction[0] == 1:
    stuttering_types.append('Sound Repetition')
if wordrep_prediction[0] == 1:
    stuttering_types.append('Word Repetition')
if prolongation_prediction[0] == 1:
    stuttering_types.append('Prolongation')

if stuttering_types:
    print(f'The person has stuttering. Types: {", ".join(stuttering_types)}')
else:
    print('The person does not have stuttering.')

# Clean up temporary files
os.remove(temp_mp3_path)
os.remove(temp_wav_path)


soundrep_model.pkl loaded successfully.
wordrep_model.pkl loaded successfully.
prolongation_model.pkl loaded successfully.
The person has stuttering. Types: Sound Repetition, Word Repetition


In [100]:
#!pip install requests
