# __Project - Speech-to-Text & Accent Classification__

In [1]:
import pandas as pd
from pathlib import Path
import librosa
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

### __Dataframes & Cleaning__

In [2]:
validated = pd.read_csv('./cv-corpus-10.0-delta-2022-07-04/en/validated.tsv', delimiter='\t')
print(f'Rows: {validated.shape[0]}, Columns: {validated.shape[1]}')
#validated.head()

Rows: 9590, Columns: 10


In [3]:
# NaNs: age (2583), gender (2603), accents (1083)
val = validated.drop(labels=['segment'], axis=1)
print(f'num of NaN rows count: {val.isnull().any(axis=1).sum()}')

val_clean = val.dropna()
print(f'Rows: {val_clean.shape[0]}, Columns: {val_clean.shape[1]}')
val_clean.head()

num of NaN rows count: 3010
Rows: 6580, Columns: 9


Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accents,locale
4,06c53aec09bca54d5587147ee1bfcc0a9e2b4162fb75da...,common_voice_en_32825553.mp3,Finally he returns home to rebuild and rehabil...,2,0,fourties,male,Irish English,en
5,0716c31f93060f09df88184c88e347d6e191286ef6d3b6...,common_voice_en_32661886.mp3,It has also been identified in Soviet involvem...,2,0,fourties,male,England English,en
8,07f310f0cc039ba964ea9c79e060ca7dee1abf0483339d...,common_voice_en_32681548.mp3,Eleven of the losses were by six points are fe...,2,0,fourties,male,United States English,en
13,09f6eb49ac59d66919189af0aa0c173b8fbd9996c736a4...,common_voice_en_32269930.mp3,He was the son of Admiral Henrik Bielke.,4,0,fifties,male,England English,en
35,13846ce67d4be59aa0ed0d4afe1b1c28e708c61fc83c85...,common_voice_en_32813159.mp3,The monastery is constructed with stone and mu...,2,0,thirties,male,United States English,en


In [4]:
invalidated = pd.read_csv('./cv-corpus-10.0-delta-2022-07-04/en/invalidated.tsv', delimiter='\t')
print(f'Rows: {invalidated.shape[0]}, Columns: {invalidated.shape[1]}')
#invalidated.head()

Rows: 1529, Columns: 10


In [5]:
# NaNs: age (420), gender (422), accents (206)
inval_clean = invalidated.drop(labels=['segment'], axis=1)
print(f'num of NaN rows count: {inval_clean.isnull().any(axis=1).sum()}')

inval_clean.dropna(inplace=True)
print(f'Rows: {inval_clean.shape[0]}, Columns: {inval_clean.shape[1]}')
inval_clean.head()

num of NaN rows count: 475
Rows: 1054, Columns: 9


Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accents,locale
1,a96bca870789b6b102052c7fd5af48aecb4e67b3cf6fb7...,common_voice_en_31834425.mp3,The sculpture honors Jalisco women.,0,2,twenties,male,United States English,en
2,a96bca870789b6b102052c7fd5af48aecb4e67b3cf6fb7...,common_voice_en_31834540.mp3,The pictures had already been published before...,0,2,twenties,male,United States English,en
10,372293e65cdab88771e028a4351651ab2eff64438ddafc...,common_voice_en_31844342.mp3,It was by then owned by brewer Poul Alexander ...,0,4,fifties,male,"German English,Non native speaker",en
11,372293e65cdab88771e028a4351651ab2eff64438ddafc...,common_voice_en_31844404.mp3,The latter location got the name from the give...,2,2,fifties,male,"German English,Non native speaker",en
12,372293e65cdab88771e028a4351651ab2eff64438ddafc...,common_voice_en_31844442.mp3,More books together are in development.,0,2,fifties,male,"German English,Non native speaker",en


In [6]:
# InvaLidated: Use at end for outputs

In [7]:
other = pd.read_csv('./cv-corpus-10.0-delta-2022-07-04/en/other.tsv', delimiter='\t')
print(f'Rows: {other.shape[0]}, Columns: {other.shape[1]}')
#other.head()

Rows: 52820, Columns: 10


In [8]:
# age (26497), gender (26576), accents (23665)
other_clean = other.drop(labels=['segment'], axis=1)
print(f'num of NaN rows count: {other_clean.isnull().any(axis=1).sum()}')

other_clean.dropna(inplace=True)
print(f'Rows: {other_clean.shape[0]}, Columns: {other_clean.shape[1]}')
other_clean.head()

num of NaN rows count: 31458
Rows: 21362, Columns: 9


Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accents,locale
132,a2576ccf04ad8c0dbe2b29f3843c1ed984d8bd91480766...,common_voice_en_31833623.mp3,It contains panels of filigree and stamped sil...,1,0,fifties,male,Canadian English,en
133,a2576ccf04ad8c0dbe2b29f3843c1ed984d8bd91480766...,common_voice_en_31833624.mp3,Shulgin performed seminal work into the descri...,0,0,fifties,male,Canadian English,en
134,a2576ccf04ad8c0dbe2b29f3843c1ed984d8bd91480766...,common_voice_en_31833625.mp3,Fetter was born to Frank Whitson Fetter and El...,1,0,fifties,male,Canadian English,en
135,a2576ccf04ad8c0dbe2b29f3843c1ed984d8bd91480766...,common_voice_en_31833626.mp3,It was the thief.,1,0,fifties,male,Canadian English,en
136,a2576ccf04ad8c0dbe2b29f3843c1ed984d8bd91480766...,common_voice_en_31833628.mp3,At the bottom of this base is an oak sprig.,1,0,fifties,male,Canadian English,en


In [9]:
# Same as invalidated

In [10]:
reported = pd.read_csv('./cv-corpus-10.0-delta-2022-07-04/en/reported.tsv', delimiter='\t')
print(f'Rows: {reported.shape[0]}, Columns: {reported.shape[1]}')
#reported.head()

Rows: 4137, Columns: 4


In [11]:
# sentence_id (29), locale (29), reason (43)
print(f'num of NaN rows count: {reported.isnull().any(axis=1).sum()}')

rep_clean = reported.dropna()
print(f'Rows: {rep_clean.shape[0]}, Columns: {rep_clean.shape[1]}')
rep_clean.head()

num of NaN rows count: 43
Rows: 4094, Columns: 4


Unnamed: 0,sentence,sentence_id,locale,reason
0,It should not be confused with the Novodevichy...,0e047d99c6c75b076aa34cdd37238670b0a9340fb78737...,en,grammar-or-spelling
1,It should not be confused with the Novodevichy...,0e047d99c6c75b076aa34cdd37238670b0a9340fb78737...,en,offensive-language
2,It should not be confused with the Novodevichy...,0e047d99c6c75b076aa34cdd37238670b0a9340fb78737...,en,different-language
3,It should not be confused with the Novodevichy...,0e047d99c6c75b076aa34cdd37238670b0a9340fb78737...,en,difficult-pronounce
4,It should not be confused with the Novodevichy...,0e047d99c6c75b076aa34cdd37238670b0a9340fb78737...,en,hh


In [12]:
# dunno what to do with this one honestly

In [13]:
dev = pd.read_csv('./cv-corpus-10.0-delta-2022-07-04/en/dev.tsv', delimiter='\t')
print(f'Rows: {dev.shape[0]}, Columns: {dev.shape[1]}')
#dev.head()

Rows: 2898, Columns: 10


In [14]:
# age (378), gender (430), accents (212)
dev_clean = dev.drop(labels=['segment'], axis=1)
print(f'num of NaN rows count: {dev_clean.isnull().any(axis=1).sum()}')

dev_clean.dropna(inplace=True)
print(f'Rows: {dev_clean.shape[0]}, Columns: {dev_clean.shape[1]}')
dev_clean.head()

num of NaN rows count: 575
Rows: 2323, Columns: 9


Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accents,locale
4,5506a0ac6b862d5ba2cb1b1f0bb4a388444ef7b1d87221...,common_voice_en_32569347.mp3,He wrote many books inspired by his life.,4,2,twenties,male,East African Khoja,en
5,5506a0ac6b862d5ba2cb1b1f0bb4a388444ef7b1d87221...,common_voice_en_32569370.mp3,He was admired in Turkey for his opposition to...,2,0,twenties,male,East African Khoja,en
6,5506a0ac6b862d5ba2cb1b1f0bb4a388444ef7b1d87221...,common_voice_en_32569385.mp3,One of the most renowned Latgalian ceramicists.,2,0,twenties,male,East African Khoja,en
7,5506a0ac6b862d5ba2cb1b1f0bb4a388444ef7b1d87221...,common_voice_en_32569427.mp3,A ring of ten tubular bells are hung in the to...,2,0,twenties,male,East African Khoja,en
8,5506a0ac6b862d5ba2cb1b1f0bb4a388444ef7b1d87221...,common_voice_en_32569430.mp3,He taught at Vanderbilt University for many ye...,10,0,twenties,male,East African Khoja,en


In [15]:
train = pd.read_csv('./cv-corpus-10.0-delta-2022-07-04/en/train.tsv', delimiter='\t')
print(f'Rows: {train.shape[0]}, Columns: {train.shape[1]}')
#train.head()

Rows: 3722, Columns: 10


In [16]:
# age (951), gender (951)
train_clean = train.drop(labels=['segment'], axis=1)
print(f'num of NaN rows count: {train_clean.isnull().any(axis=1).sum()}')

train_clean.dropna(inplace=True)
print(f'Rows: {train_clean.shape[0]}, Columns: {train_clean.shape[1]}')
train_clean.head()

num of NaN rows count: 951
Rows: 2771, Columns: 9


Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accents,locale
28,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233439.mp3,All tracks are produced by Dan Auerbach.,2,0,twenties,male,United States English,en
29,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233441.mp3,Goswami hails from Bandel in Hooghly district.,2,0,twenties,male,United States English,en
30,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233442.mp3,The competition was the first to feature separ...,2,0,twenties,male,United States English,en
31,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233444.mp3,Sources differ on the total number of floors.,2,0,twenties,male,United States English,en
32,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233472.mp3,He was raised in New Jersey and attended Juill...,2,0,twenties,male,United States English,en


In [17]:
test = pd.read_csv('./cv-corpus-10.0-delta-2022-07-04/en/test.tsv', delimiter='\t')
print(f'Rows: {test.shape[0]}, Columns: {test.shape[1]}')
#test.head()

Rows: 2945, Columns: 10


In [18]:
# age (1249), gender (1216), accents (870)
test_clean = test.drop(labels=['segment'], axis=1)
print(f'num of NaN rows count: {test_clean.isnull().any(axis=1).sum()}')

test_clean.dropna(inplace=True)
print(f'Rows: {test_clean.shape[0]}, Columns: {test_clean.shape[1]}')
test_clean.head()

# print(test['segment'].isnull().sum())

num of NaN rows count: 1478
Rows: 1467, Columns: 9


Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accents,locale
4,06c53aec09bca54d5587147ee1bfcc0a9e2b4162fb75da...,common_voice_en_32825553.mp3,Finally he returns home to rebuild and rehabil...,2,0,fourties,male,Irish English,en
5,0716c31f93060f09df88184c88e347d6e191286ef6d3b6...,common_voice_en_32661886.mp3,It has also been identified in Soviet involvem...,2,0,fourties,male,England English,en
8,07f310f0cc039ba964ea9c79e060ca7dee1abf0483339d...,common_voice_en_32681548.mp3,Eleven of the losses were by six points are fe...,2,0,fourties,male,United States English,en
13,09f6eb49ac59d66919189af0aa0c173b8fbd9996c736a4...,common_voice_en_32269930.mp3,He was the son of Admiral Henrik Bielke.,4,0,fifties,male,England English,en
35,13846ce67d4be59aa0ed0d4afe1b1c28e708c61fc83c85...,common_voice_en_32813159.mp3,The monastery is constructed with stone and mu...,2,0,thirties,male,United States English,en


### __Feature Engineering__

In [19]:
# Validated: Use just client_id and path mapping (split into 2 dfs with all of the non-NaN clips and NaN clips)
# Step 1: Create two DataFrames: one with NaNs, one without
df_with_nans = val[val.isna().any(axis=1)]
df_without_nans = val[val.notna().all(axis=1)]

# Step 2: Reduce to `client_id` and `path` columns
val_df_with_nans = df_with_nans[['client_id', 'path']]
val_df_without_nans = df_without_nans[['client_id', 'path']]

In [20]:
val_df_with_nans

Unnamed: 0,client_id,path
0,00d443b51ccb99c6c13ca3427678080ece4fc5aefe9cb6...,common_voice_en_32371106.mp3
1,020f00302b130a7f0094fcf65c580e502b2584e68b0bb5...,common_voice_en_32646599.mp3
2,024294f8be5085910eb128966779b03f2ea3669ed55bf5...,common_voice_en_32703291.mp3
3,05bf98ddeee61ab852955ee2bae7296140b7f7da622b66...,common_voice_en_32542436.mp3
6,07babef9cc79f2c23190f9824e59e0e770ee5a5cde8b84...,common_voice_en_32289174.mp3
...,...,...
8473,b5c9bb77e34f76e3130ba662c4ca78975345e97102b27b...,common_voice_en_31865209.mp3
8474,b5c9bb77e34f76e3130ba662c4ca78975345e97102b27b...,common_voice_en_31865211.mp3
8475,b5c9bb77e34f76e3130ba662c4ca78975345e97102b27b...,common_voice_en_31865212.mp3
8476,b5c9bb77e34f76e3130ba662c4ca78975345e97102b27b...,common_voice_en_31865213.mp3


In [21]:
val_df_without_nans

Unnamed: 0,client_id,path
4,06c53aec09bca54d5587147ee1bfcc0a9e2b4162fb75da...,common_voice_en_32825553.mp3
5,0716c31f93060f09df88184c88e347d6e191286ef6d3b6...,common_voice_en_32661886.mp3
8,07f310f0cc039ba964ea9c79e060ca7dee1abf0483339d...,common_voice_en_32681548.mp3
13,09f6eb49ac59d66919189af0aa0c173b8fbd9996c736a4...,common_voice_en_32269930.mp3
35,13846ce67d4be59aa0ed0d4afe1b1c28e708c61fc83c85...,common_voice_en_32813159.mp3
...,...,...
9585,b5c9bb77e34f76e3130ba662c4ca78975345e97102b27b...,common_voice_en_32071341.mp3
9586,b5c9bb77e34f76e3130ba662c4ca78975345e97102b27b...,common_voice_en_32071342.mp3
9587,b5c9bb77e34f76e3130ba662c4ca78975345e97102b27b...,common_voice_en_32071343.mp3
9588,b5c9bb77e34f76e3130ba662c4ca78975345e97102b27b...,common_voice_en_32071344.mp3


In [22]:
# Combine dev and train
clean_dev_train = pd.concat([train_clean, dev_clean], ignore_index=True)
clean_dev_train

Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accents,locale
0,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233439.mp3,All tracks are produced by Dan Auerbach.,2,0,twenties,male,United States English,en
1,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233441.mp3,Goswami hails from Bandel in Hooghly district.,2,0,twenties,male,United States English,en
2,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233442.mp3,The competition was the first to feature separ...,2,0,twenties,male,United States English,en
3,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233444.mp3,Sources differ on the total number of floors.,2,0,twenties,male,United States English,en
4,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233472.mp3,He was raised in New Jersey and attended Juill...,2,0,twenties,male,United States English,en
...,...,...,...,...,...,...,...,...,...
5089,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427644.mp3,The Brellochs method uses formaldehyde to inse...,2,0,twenties,male,United States English,en
5090,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427649.mp3,Some historians such as have interpreted it as...,2,0,twenties,male,United States English,en
5091,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427653.mp3,A nearby house was damaged.,2,0,twenties,male,United States English,en
5092,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427654.mp3,After passing through several owners it became...,2,0,twenties,male,United States English,en


In [23]:
# Combine sentence/accents for targets -> NEVERMIND, TREAT THEM AS SEPARATE TARGETS

In [24]:
# up_votes - down_votes
test_clean_votes = test_clean.copy()
test_clean_votes['votes_diff'] = test_clean_votes['up_votes'] - test_clean_votes['down_votes']
test_clean_votes = test_clean_votes.drop(columns=['up_votes', 'down_votes', 'locale'])
test_clean_votes

Unnamed: 0,client_id,path,sentence,age,gender,accents,votes_diff
4,06c53aec09bca54d5587147ee1bfcc0a9e2b4162fb75da...,common_voice_en_32825553.mp3,Finally he returns home to rebuild and rehabil...,fourties,male,Irish English,2
5,0716c31f93060f09df88184c88e347d6e191286ef6d3b6...,common_voice_en_32661886.mp3,It has also been identified in Soviet involvem...,fourties,male,England English,2
8,07f310f0cc039ba964ea9c79e060ca7dee1abf0483339d...,common_voice_en_32681548.mp3,Eleven of the losses were by six points are fe...,fourties,male,United States English,2
13,09f6eb49ac59d66919189af0aa0c173b8fbd9996c736a4...,common_voice_en_32269930.mp3,He was the son of Admiral Henrik Bielke.,fifties,male,England English,4
35,13846ce67d4be59aa0ed0d4afe1b1c28e708c61fc83c85...,common_voice_en_32813159.mp3,The monastery is constructed with stone and mu...,thirties,male,United States English,2
...,...,...,...,...,...,...,...
2911,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896028.mp3,See references in table above.,twenties,male,"India and South Asia (India, Pakistan, Sri Lanka)",2
2912,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896029.mp3,These routes were to take residents to the new...,twenties,male,"India and South Asia (India, Pakistan, Sri Lanka)",2
2913,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896071.mp3,Trace activity of herbivorous invertebrates al...,twenties,male,"India and South Asia (India, Pakistan, Sri Lanka)",4
2914,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896073.mp3,Vigils and protests of thousands of participan...,twenties,male,"India and South Asia (India, Pakistan, Sri Lanka)",4


In [25]:
clean_dev_train_votes = clean_dev_train.copy()
clean_dev_train_votes['votes_diff'] = clean_dev_train_votes['up_votes'] - clean_dev_train_votes['down_votes']
clean_dev_train_votes = clean_dev_train_votes.drop(columns=['up_votes', 'down_votes', 'locale'])
clean_dev_train_votes

Unnamed: 0,client_id,path,sentence,age,gender,accents,votes_diff
0,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233439.mp3,All tracks are produced by Dan Auerbach.,twenties,male,United States English,2
1,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233441.mp3,Goswami hails from Bandel in Hooghly district.,twenties,male,United States English,2
2,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233442.mp3,The competition was the first to feature separ...,twenties,male,United States English,2
3,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233444.mp3,Sources differ on the total number of floors.,twenties,male,United States English,2
4,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233472.mp3,He was raised in New Jersey and attended Juill...,twenties,male,United States English,2
...,...,...,...,...,...,...,...
5089,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427644.mp3,The Brellochs method uses formaldehyde to inse...,twenties,male,United States English,2
5090,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427649.mp3,Some historians such as have interpreted it as...,twenties,male,United States English,2
5091,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427653.mp3,A nearby house was damaged.,twenties,male,United States English,2
5092,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427654.mp3,After passing through several owners it became...,twenties,male,United States English,2


In [26]:
# either combine locale with accents or remove locale entirely -> all of locale was en, so removed.
# Now we check accents/sentences overlap between the test and train sets.
unique_test = test_clean_votes['accents'].unique()
unique_train_dev = clean_dev_train_votes['accents'].unique()

overlap = len(set(unique_test) & set(unique_train_dev)) / len(set(unique_test)) * 100
print(f"Percentage of overlap between test and train+dev: {overlap:.2f}%")

Percentage of overlap between test and train+dev: 11.11%


In [27]:
# drop different accents from test???

# Get unique accents in train+dev
# unique_train_dev_accents = set(clean_dev_train_votes['accents'].unique())

# # Filter test_clean_votes to only include rows with accents in train+dev
# test_clean_votes = test_clean_votes[test_clean_votes['accents'].isin(unique_train_dev_accents)]

# # Check the filtered DataFrame
# print(f"Filtered test set size: Rows: {test_clean_votes.shape[0]}, Columns: {test_clean_votes.shape[1]}")

In [28]:
# make gender 0/1
test_clean_gen_enc = test_clean_votes.copy()
test_clean_gen_enc = test_clean_gen_enc[test_clean_gen_enc['gender'] != 'other']
test_clean_gen_enc['gen_enc'] = test_clean_gen_enc['gender'].replace({'male': 1, 'female': 0})
test_clean_gen_enc = test_clean_gen_enc.drop(columns='gender')

clean_dev_train_gen_enc = clean_dev_train_votes.copy()
clean_dev_train_gen_enc = clean_dev_train_gen_enc[clean_dev_train_gen_enc['gender'] != 'other']
clean_dev_train_gen_enc['gen_enc'] = clean_dev_train_gen_enc['gender'].replace({'male': 1, 'female': 0})
clean_dev_train_gen_enc = clean_dev_train_gen_enc.drop(columns='gender')

  test_clean_gen_enc['gen_enc'] = test_clean_gen_enc['gender'].replace({'male': 1, 'female': 0})
  clean_dev_train_gen_enc['gen_enc'] = clean_dev_train_gen_enc['gender'].replace({'male': 1, 'female': 0})


In [29]:
clean_dev_train_gen_enc

Unnamed: 0,client_id,path,sentence,age,accents,votes_diff,gen_enc
0,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233439.mp3,All tracks are produced by Dan Auerbach.,twenties,United States English,2,1
1,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233441.mp3,Goswami hails from Bandel in Hooghly district.,twenties,United States English,2,1
2,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233442.mp3,The competition was the first to feature separ...,twenties,United States English,2,1
3,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233444.mp3,Sources differ on the total number of floors.,twenties,United States English,2,1
4,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233472.mp3,He was raised in New Jersey and attended Juill...,twenties,United States English,2,1
...,...,...,...,...,...,...,...
5089,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427644.mp3,The Brellochs method uses formaldehyde to inse...,twenties,United States English,2,1
5090,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427649.mp3,Some historians such as have interpreted it as...,twenties,United States English,2,1
5091,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427653.mp3,A nearby house was damaged.,twenties,United States English,2,1
5092,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427654.mp3,After passing through several owners it became...,twenties,United States English,2,1


In [30]:
test_clean_gen_enc

Unnamed: 0,client_id,path,sentence,age,accents,votes_diff,gen_enc
4,06c53aec09bca54d5587147ee1bfcc0a9e2b4162fb75da...,common_voice_en_32825553.mp3,Finally he returns home to rebuild and rehabil...,fourties,Irish English,2,1
5,0716c31f93060f09df88184c88e347d6e191286ef6d3b6...,common_voice_en_32661886.mp3,It has also been identified in Soviet involvem...,fourties,England English,2,1
8,07f310f0cc039ba964ea9c79e060ca7dee1abf0483339d...,common_voice_en_32681548.mp3,Eleven of the losses were by six points are fe...,fourties,United States English,2,1
13,09f6eb49ac59d66919189af0aa0c173b8fbd9996c736a4...,common_voice_en_32269930.mp3,He was the son of Admiral Henrik Bielke.,fifties,England English,4,1
35,13846ce67d4be59aa0ed0d4afe1b1c28e708c61fc83c85...,common_voice_en_32813159.mp3,The monastery is constructed with stone and mu...,thirties,United States English,2,1
...,...,...,...,...,...,...,...
2911,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896028.mp3,See references in table above.,twenties,"India and South Asia (India, Pakistan, Sri Lanka)",2,1
2912,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896029.mp3,These routes were to take residents to the new...,twenties,"India and South Asia (India, Pakistan, Sri Lanka)",2,1
2913,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896071.mp3,Trace activity of herbivorous invertebrates al...,twenties,"India and South Asia (India, Pakistan, Sri Lanka)",4,1
2914,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896073.mp3,Vigils and protests of thousands of participan...,twenties,"India and South Asia (India, Pakistan, Sri Lanka)",4,1


In [31]:
# make age encoded
unique_test = test_clean_gen_enc['age'].unique()
unique_train_dev = clean_dev_train_gen_enc['age'].unique()

print(unique_train_dev)
print(unique_test)

['twenties' 'thirties' 'fifties' 'teens' 'sixties' 'fourties']
['fourties' 'fifties' 'thirties' 'teens' 'twenties' 'seventies' 'sixties'
 'eighties']


In [32]:
from sklearn.preprocessing import LabelEncoder
test_clean_age_enc = test_clean_gen_enc.copy()
clean_dev_train_age_enc = clean_dev_train_gen_enc.copy()

# Fit on the train set and transform both DataFrames
encoder = LabelEncoder()
encoder.fit(test_clean_age_enc['age'])

clean_dev_train_age_enc['age_enc'] = encoder.transform(clean_dev_train_age_enc['age'])
test_clean_age_enc['age_enc'] = encoder.transform(test_clean_age_enc['age'])
clean_dev_train_age_enc = clean_dev_train_age_enc.drop(columns='age')
test_clean_age_enc = test_clean_age_enc.drop(columns='age')

In [33]:
clean_dev_train_age_enc

Unnamed: 0,client_id,path,sentence,accents,votes_diff,gen_enc,age_enc
0,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233439.mp3,All tracks are produced by Dan Auerbach.,United States English,2,1,7
1,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233441.mp3,Goswami hails from Bandel in Hooghly district.,United States English,2,1,7
2,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233442.mp3,The competition was the first to feature separ...,United States English,2,1,7
3,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233444.mp3,Sources differ on the total number of floors.,United States English,2,1,7
4,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233472.mp3,He was raised in New Jersey and attended Juill...,United States English,2,1,7
...,...,...,...,...,...,...,...
5089,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427644.mp3,The Brellochs method uses formaldehyde to inse...,United States English,2,1,7
5090,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427649.mp3,Some historians such as have interpreted it as...,United States English,2,1,7
5091,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427653.mp3,A nearby house was damaged.,United States English,2,1,7
5092,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427654.mp3,After passing through several owners it became...,United States English,2,1,7


In [34]:
test_clean_age_enc

Unnamed: 0,client_id,path,sentence,accents,votes_diff,gen_enc,age_enc
4,06c53aec09bca54d5587147ee1bfcc0a9e2b4162fb75da...,common_voice_en_32825553.mp3,Finally he returns home to rebuild and rehabil...,Irish English,2,1,2
5,0716c31f93060f09df88184c88e347d6e191286ef6d3b6...,common_voice_en_32661886.mp3,It has also been identified in Soviet involvem...,England English,2,1,2
8,07f310f0cc039ba964ea9c79e060ca7dee1abf0483339d...,common_voice_en_32681548.mp3,Eleven of the losses were by six points are fe...,United States English,2,1,2
13,09f6eb49ac59d66919189af0aa0c173b8fbd9996c736a4...,common_voice_en_32269930.mp3,He was the son of Admiral Henrik Bielke.,England English,4,1,1
35,13846ce67d4be59aa0ed0d4afe1b1c28e708c61fc83c85...,common_voice_en_32813159.mp3,The monastery is constructed with stone and mu...,United States English,2,1,6
...,...,...,...,...,...,...,...
2911,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896028.mp3,See references in table above.,"India and South Asia (India, Pakistan, Sri Lanka)",2,1,7
2912,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896029.mp3,These routes were to take residents to the new...,"India and South Asia (India, Pakistan, Sri Lanka)",2,1,7
2913,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896071.mp3,Trace activity of herbivorous invertebrates al...,"India and South Asia (India, Pakistan, Sri Lanka)",4,1,7
2914,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896073.mp3,Vigils and protests of thousands of participan...,"India and South Asia (India, Pakistan, Sri Lanka)",4,1,7


In [35]:
# Paths
audio_dir = Path('./cv-corpus-10.0-delta-2022-07-04/en/clips')

def process_audio(mp3_path):
    try:
        # Define file path
        mp3_file = audio_dir / mp3_path

        # Load the audio file using Librosa
        audio_data, sample_rate = librosa.load(mp3_file, sr=None)  # sr=None preserves the original sample rate

        print(f"Processed {mp3_file}: {audio_data.shape}, Sample rate: {sample_rate}")
        return audio_data, sample_rate
    except Exception as e:
        print(f"Error processing {mp3_path}: {e}")
        return None, None

test_path = audio_dir / "common_voice_en_32233439.mp3"
print(f"Testing WAV file: {test_path}")

try:
    # Load the WAV file using Librosa instead of torchaudio
    audio_data, sample_rate = librosa.load(test_path, sr=None)
    print(f"Loaded successfully: {audio_data.shape}, Sample rate: {sample_rate}")
except Exception as e:
    print(f"Error loading WAV: {e}")

Testing WAV file: cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233439.mp3
Loaded successfully: (127872,), Sample rate: 32000


In [36]:
# Copy the original DataFrames
clean_dev_train_audio = clean_dev_train_age_enc.copy()
test_clean_audio = test_clean_age_enc.copy()

# Load with Librosa
def process_audio(mp3_path):
    try:
        # Define file path
        mp3_file = audio_dir / mp3_path

        # Load the audio file using Librosa
        audio_data, sample_rate = librosa.load(mp3_file, sr=None)  # sr=None preserves the original sample rate

        print(f"Processed {mp3_file}: {audio_data.shape}, Sample rate: {sample_rate}")
        return audio_data, sample_rate
    except Exception as e:
        print(f"Error processing {mp3_path}: {e}")
        return None, None

# Apply the function to the DataFrame columns
clean_dev_train_audio['audio_data'], clean_dev_train_audio['sample_rate'] = zip(*clean_dev_train_audio['path'].apply(process_audio))
test_clean_audio['audio_data'], test_clean_audio['sample_rate'] = zip(*test_clean_audio['path'].apply(process_audio))

Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233439.mp3: (127872,), Sample rate: 32000
Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233441.mp3: (213120,), Sample rate: 32000
Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233442.mp3: (213120,), Sample rate: 32000
Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233444.mp3: (139392,), Sample rate: 32000
Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233472.mp3: (187776,), Sample rate: 32000
Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233520.mp3: (172800,), Sample rate: 32000
Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233561.mp3: (279936,), Sample rate: 32000
Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233585.mp3: (173952,), Sample rate: 32000
Processed cv-corpus-10.0-delta-2022-07-04\en\clips\common_voice_en_32233587.mp3: (149760,), Sample rate: 32000
P

In [37]:
clean_dev_train_audio

Unnamed: 0,client_id,path,sentence,accents,votes_diff,gen_enc,age_enc,audio_data,sample_rate
0,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233439.mp3,All tracks are produced by Dan Auerbach.,United States English,2,1,7,"[0.0, -8.076328e-13, -1.883682e-13, 7.001708e-...",32000
1,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233441.mp3,Goswami hails from Bandel in Hooghly district.,United States English,2,1,7,"[0.0, -8.569839e-16, -2.2644301e-14, -1.131620...",32000
2,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233442.mp3,The competition was the first to feature separ...,United States English,2,1,7,"[0.0, 2.0893694e-15, 9.79272e-14, 4.9259255e-1...",32000
3,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233444.mp3,Sources differ on the total number of floors.,United States English,2,1,7,"[0.0, -9.379639e-14, -1.1176625e-13, -1.194842...",32000
4,1744cff9d33b70420c7e5517881e81ec4bab1ac475e3b8...,common_voice_en_32233472.mp3,He was raised in New Jersey and attended Juill...,United States English,2,1,7,"[0.0, -2.3281524e-14, -1.2230301e-14, 1.657632...",32000
...,...,...,...,...,...,...,...,...,...
5089,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427644.mp3,The Brellochs method uses formaldehyde to inse...,United States English,2,1,7,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",32000
5090,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427649.mp3,Some historians such as have interpreted it as...,United States English,2,1,7,"[0.0, -4.9501098e-12, -4.116861e-12, -2.71275e...",32000
5091,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427653.mp3,A nearby house was damaged.,United States English,2,1,7,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",32000
5092,6fe2ee910553691f58721e526b2c255075a174ef8ce2ab...,common_voice_en_32427654.mp3,After passing through several owners it became...,United States English,2,1,7,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",32000


In [38]:
test_clean_audio

Unnamed: 0,client_id,path,sentence,accents,votes_diff,gen_enc,age_enc,audio_data,sample_rate
4,06c53aec09bca54d5587147ee1bfcc0a9e2b4162fb75da...,common_voice_en_32825553.mp3,Finally he returns home to rebuild and rehabil...,Irish English,2,1,2,"[0.0, 1.6935916e-12, -1.0817007e-12, -2.155534...",32000
5,0716c31f93060f09df88184c88e347d6e191286ef6d3b6...,common_voice_en_32661886.mp3,It has also been identified in Soviet involvem...,England English,2,1,2,"[0.0, 7.70718e-13, 6.5683266e-13, 1.3705617e-1...",32000
8,07f310f0cc039ba964ea9c79e060ca7dee1abf0483339d...,common_voice_en_32681548.mp3,Eleven of the losses were by six points are fe...,United States English,2,1,2,"[0.0, -6.7051763e-12, -8.250374e-12, -6.525591...",32000
13,09f6eb49ac59d66919189af0aa0c173b8fbd9996c736a4...,common_voice_en_32269930.mp3,He was the son of Admiral Henrik Bielke.,England English,4,1,1,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",32000
35,13846ce67d4be59aa0ed0d4afe1b1c28e708c61fc83c85...,common_voice_en_32813159.mp3,The monastery is constructed with stone and mu...,United States English,2,1,6,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",32000
...,...,...,...,...,...,...,...,...,...
2911,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896028.mp3,See references in table above.,"India and South Asia (India, Pakistan, Sri Lanka)",2,1,7,"[0.0, -8.412998e-13, -3.4097654e-13, 4.170608e...",32000
2912,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896029.mp3,These routes were to take residents to the new...,"India and South Asia (India, Pakistan, Sri Lanka)",2,1,7,"[0.0, -5.356376e-14, -3.4476545e-15, 9.459626e...",32000
2913,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896071.mp3,Trace activity of herbivorous invertebrates al...,"India and South Asia (India, Pakistan, Sri Lanka)",4,1,7,"[0.0, -4.590405e-14, -8.9902905e-14, 1.3634872...",32000
2914,6eadb22dab2ab811ab54ba036b25da8bc184677b2695ef...,common_voice_en_31896073.mp3,Vigils and protests of thousands of participan...,"India and South Asia (India, Pakistan, Sri Lanka)",4,1,7,"[0.0, -5.4108736e-15, 9.850256e-15, 1.2000195e...",32000


### __Modeling__

In [39]:
# Multiclass Classification (gender: 0, 1, 2)
def extract_features(audio_data, sample_rate):
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
    mfccs_mean = np.mean(mfccs.T, axis=0)

    # Extract Spectral Centroid
    spec_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
    spec_centroid_mean = np.mean(spec_centroid.T, axis=0)

    # Extract Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio_data)
    zcr_mean = np.mean(zcr.T, axis=0)

    # Concatenate all features
    features = np.hstack([mfccs_mean, spec_centroid_mean, zcr_mean])
    return features

# Step 1: Filter DataFrames to Only Include `gen_enc` Values 0 and 1
clean_dev_train_audio_binary = clean_dev_train_audio[clean_dev_train_audio['gen_enc'].isin([0, 1])]
test_clean_audio_binary = test_clean_audio[test_clean_audio['gen_enc'].isin([0, 1])]

# Function to extract features from the DataFrame
def extract_features_from_df(df):
    features_list = []

    for index, row in df.iterrows():
        # Audio features
        audio = row['audio_data']
        sr = row['sample_rate']
        audio_features = extract_features(audio, sr)

        # Accents feature (we will convert to numerical form, i.e., hash the string)
        accents = hash(row['accents']) % 10**5  # Using hash to convert text to a numerical value, with reduced size
        
        # Age encoding
        age_encoded = row['age_enc']

        # Votes_diff feature (numeric)
        votes_diff = row['votes_diff']

        # Concatenate all features
        combined_features = np.hstack([audio_features, accents, age_encoded, votes_diff])
        features_list.append(combined_features)

    return np.array(features_list)

# Extract features from the training and test datasets
X_train = extract_features_from_df(clean_dev_train_audio_binary)
X_test = extract_features_from_df(test_clean_audio_binary)

# Extract target variables
y_train = clean_dev_train_audio_binary['gen_enc'].values
y_test = test_clean_audio_binary['gen_enc'].values

# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Verify shapes
print("X_train shape:", X_train_scaled.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test_scaled.shape)
print("y_test shape:", y_test.shape)

# Train and Evaluate the Model
# Initialize the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = rf_model.predict(X_test_scaled)

# Evaluate the model
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy Score: {accuracy:.4f}')

X_train shape: (4841, 18)
y_train shape: (4841,)
X_test shape: (1420, 18)
y_test shape: (1420,)
              precision    recall  f1-score   support

           0       0.40      0.64      0.50       332
           1       0.87      0.71      0.78      1088

    accuracy                           0.70      1420
   macro avg       0.64      0.68      0.64      1420
weighted avg       0.76      0.70      0.72      1420

Accuracy Score: 0.6958


In [40]:
import numpy as np
import librosa
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Filter DataFrames to only include `gen_enc` values 0 and 1
clean_dev_train_audio_binary = clean_dev_train_audio[clean_dev_train_audio['gen_enc'].isin([0, 1])]
test_clean_audio_binary = test_clean_audio[test_clean_audio['gen_enc'].isin([0, 1])]

# Function to extract MFCCs from audio data for deep learning input
def extract_mfcc_features(audio_data, sample_rate, max_length=100):
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
    
    # Pad or truncate MFCCs to a fixed length
    if mfccs.shape[1] < max_length:
        mfccs = np.pad(mfccs, ((0, 0), (0, max_length - mfccs.shape[1])), mode='constant')
    else:
        mfccs = mfccs[:, :max_length]

    return mfccs

# Extract features from the DataFrame
def extract_features_from_df(df):
    features_list = []
    labels = []

    for index, row in df.iterrows():
        # Audio data and sample rate
        audio = row['audio_data']
        sr = row['sample_rate']

        # Extract MFCC features
        mfcc_features = extract_mfcc_features(audio, sr)

        # Append features and labels
        features_list.append(mfcc_features)
        labels.append(row['gen_enc'])

    return np.array(features_list), np.array(labels)

# Extract features from the training and test datasets
X_train, y_train = extract_features_from_df(clean_dev_train_audio_binary)
X_test, y_test = extract_features_from_df(test_clean_audio_binary)

# Reshape data for CNN input (batch_size, height, width, channels)
X_train = X_train[..., np.newaxis]  # Adding channel dimension
X_test = X_test[..., np.newaxis]    # Adding channel dimension

# Verify shapes
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

# Step 3: Create and Compile a CNN Model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(40, 100, 1)),  # (n_mfcc, max_length, channels)
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

# Step 4: Train the Model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Step 5: Evaluate the Model
# Predict on the test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

# Evaluate performance
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy Score: {accuracy:.4f}')

X_train shape: (4841, 40, 100, 1)
y_train shape: (4841,)
X_test shape: (1420, 40, 100, 1)
y_test shape: (1420,)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.8172 - loss: 1.2707 - val_accuracy: 0.8761 - val_loss: 0.4348
Epoch 2/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.9503 - loss: 0.1227 - val_accuracy: 0.8127 - val_loss: 0.6471
Epoch 3/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.9684 - loss: 0.0790 - val_accuracy: 0.8606 - val_loss: 0.6211
Epoch 4/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.9734 - loss: 0.0856 - val_accuracy: 0.8662 - val_loss: 0.6952
Epoch 5/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9850 - loss: 0.0497 - val_accuracy: 0.8430 - val_loss: 0.6314
Epoch 6/20
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.9889 - loss: 0.0310 - val_accuracy: 0.8077 - val_loss: 0.6963
Epoch 7/20
[1m152/152

In [None]:
# later if we have time: age, (sentence & accent)