In [None]:
import os
import pandas as pd
import numpy as np
import librosa
import soundfile as sf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split, GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [None]:
#Global parameters
language_dic = {"it" : "Italian", "es" : "Spanish"}
analysis_window_length = 0.01  # 10 ms in seconds
language_mp3_path = "/Users/Andee/Documents/CBS - Data Science/Second Semester/Machine Learning/Assignments/MLAssignments/FinalProject/languages"
min_clip = 1.3035
x_seconds = 5
num_reps = int(np.ceil(x_seconds / min_clip))

In [None]:
# Function to repeat the audio for a fixed length
def repeat_audio_x_seconds(track, dur, num_repeats=10, fs=16000):
    num_samples_xs = int(fs * dur)
    track = np.concatenate([track]*num_repeats, axis=0)
    track = track[0:num_samples_xs]
    return track

In [None]:
# Data Loading and Feature Extraction
data = {"filename": [], "language": [], "tracks": [], "mfccs": []}
language_list = os.listdir(language_mp3_path)

for language in language_list:
    language_path = os.path.join(language_mp3_path, language)
    if not os.path.isdir(language_path):
        continue

    language_name = language_dic.get(language, "Unknown language!")
    clips_path = os.path.join(language_path, "clips")

    if not os.path.isdir(clips_path):
        print(f"No 'clips' directory found in {language_path}")
        continue

    mp3_list = os.listdir(clips_path)[:200]  # limit the number of mp3 files processed

    for mp3 in mp3_list:
        data["filename"].append(mp3)
        data["language"].append(language_name)

        audio_path = os.path.join(clips_path, mp3)
        audio_samples, fs = sf.read(audio_path)
        audio_samples = repeat_audio_x_seconds(audio_samples, x_seconds, num_repeats=num_reps, fs=fs)
        data["tracks"].append(audio_samples)

        y = audio_samples
        sr = fs
        hop_length = int(analysis_window_length * sr)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=128, hop_length=hop_length)
        mfccs_scaled_features = np.mean(mfcc.T,axis=0)
        data["mfccs"].append(mfccs_scaled_features)

df = pd.DataFrame(data)

In [None]:
# Prepare data for model
X = np.array(df['mfccs'].tolist())
y = to_categorical(LabelEncoder().fit_transform(np.array(df['language'].tolist())))

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, train_size=0.8)

In [None]:
# Define the model architecture
def create_model():
    model = Sequential()
    model.add(Dense(100,input_shape=(128,)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(200))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(100))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(y.shape[1]))  # number of classes
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='Adam')
    return model

In [None]:
# Create the Keras classifier
model = KerasClassifier(build_fn=create_model, verbose=0)

# Define the grid search parameters
param_grid = {'batch_size': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
              'epochs': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
              'optimizer': ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']}

# Create Grid Search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train)

In [None]:
# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Evaluate the model with the test data
test_accuracy = grid.score(X_test, y_test)
print("Test accuracy: ", test_accuracy)