# Pitch Synchronous Approach

In [2]:
from google.colab import drive
drive.mount("/content/gdrive")
import sys
MAIN_DIR = "/content/gdrive/MyDrive/SSP"
sys.path.insert(0, MAIN_DIR)

Mounted at /content/gdrive


In [None]:
!pip install python_speech_features

In [5]:
import numpy as np
import pandas as pd
import librosa
import glob
import os
import scipy
import scipy.signal
import python_speech_features
import pickle as pkl
from sklearn.mixture import GaussianMixture
from functions import getVoiced, get_pitch_sync_frames

In [14]:
def get_mfcc(audio, sr, use_librosa=False):
    mfcc = []
    frames = get_pitch_sync_frames(audio, sr)
    for frame in frames:
        if use_librosa:
            mfcc_coeffs = librosa.feature.mfcc(
                frame, sr=sr, n_mfcc=13, hop_length=len(frame) + 1, win_length=len(frame)
            )
        else:
            mfcc_coeffs = python_speech_features.mfcc(
                signal=frame, samplerate=sr, numcep=13, winlen=len(frame) / sr, winstep=len(frame) / sr, nfft=N_FFT
            )
        mfcc.append(mfcc_coeffs.flatten())
    return np.array(mfcc)

In [16]:
def extract_train_mfcc(use_librosa=False):
    languages = [os.path.basename(x) for x in glob.glob(f"{TRAIN_DIR}/*")]
    for language in languages:
        print("Extracting Train MFCC features for", language)
        wav_files = sorted(glob.glob(f"{TRAIN_DIR}/{language}/*.wav"))
        mfcc_features = []
        for file in wav_files:
            try:
                audio, sr = librosa.load(file, sr=SR)
                mfcc = get_mfcc(audio, sr, use_librosa)
                mfcc_features.extend([mfcc[i] for i in range(mfcc.shape[0])])
            except Exception as e:
                print(file, e)
                continue
        filename = f"{MFCC_TRAIN_DIR}/{language}_{'lib' if use_librosa else 'psf'}{'_pitch_sync'}.npy"
        np.save(filename, np.array(mfcc_features))
        print("Saved MFCC features for", language, "in", filename)
        print()

In [20]:
def extract_test_mfcc(use_librosa=False):
    languages = [os.path.basename(x) for x in glob.glob(f"{TEST_DIR}/*")]
    for language in languages:
        print("Extracting Test MFCC features for", language)
        wav_files = sorted(glob.glob(f"{TEST_DIR}/{language}/*.wav"))
        mfcc_features = []
        for file in wav_files:
            try:
                audio, sr = librosa.load(file, sr=SR)
                mfcc = get_mfcc(audio, sr)
                mfcc_features.append(mfcc)
            except Exception as e:
                print(file, e)
                continue
        filename = f"{MFCC_TEST_DIR}/{language}_{'lib' if use_librosa else 'psf'}{'_pitch_sync'}.npy"
        with open(filename, "wb") as file:
            pkl.dump(mfcc_features, file)
        print("Saved MFCC features for", language, "in", filename)
        print()

In [21]:
def train(n_gaussians, use_deltas=True, use_librosa=False):
    dirs = glob.glob(f"{TRAIN_DIR}/*")
    languages = [os.path.basename(d) for d in dirs]
    models = {}
    for language in languages:
        mfcc_filename = f"{MFCC_TRAIN_DIR}/{language}_{'lib' if use_librosa else 'psf'}{'_pitch_sync'}.npy"
        mfcc_features = np.load(mfcc_filename)
        print(f"Training GMM for {language}")
        models[language] = GaussianMixture(n_gaussians, covariance_type="diag", max_iter=MAX_ITER).fit(mfcc_features)
    return models

In [22]:
def test(models, use_deltas=True, use_librosa=False):
    dirs = glob.glob(f"{TEST_DIR}/*")
    languages = sorted([os.path.basename(d) for d in dirs])
    conf_matrix = {language: {lang: 0 for lang in languages} for language in languages}
    for language in languages:
        mfcc_filename = f"{MFCC_TEST_DIR}/{language}_{'lib' if use_librosa else 'psf'}{'_pitch_sync'}.npy"
        with open(mfcc_filename, "rb") as file:
            mfcc_features = pkl.load(file)
        for mfcc in mfcc_features:
            pred = ""
            scores = {}
            for lang in models:
                scores[lang] = models[lang].score(mfcc)
                if pred == "" or scores[pred] < scores[lang]:
                    pred = lang
            conf_matrix[language][pred] += 1
    cf_matrix = np.zeros((len(languages), len(languages)))
    language_mappings = {}
    for i, language in enumerate(languages):
        language_mappings[language] = i
    for language in conf_matrix:
        r = language_mappings[language]
        for lang in conf_matrix[language]:
            c = language_mappings[lang]
            cf_matrix[r][c] = conf_matrix[language][lang]
    return cf_matrix.trace() / cf_matrix.sum(), cf_matrix, language_mappings

In [23]:
TRAIN_DIR = f"{MAIN_DIR}/dataset/train"
TEST_DIR = f"{MAIN_DIR}/dataset/test"
MFCC_TRAIN_DIR = f"{MAIN_DIR}/mfcc/train"
MFCC_TEST_DIR = f"{MAIN_DIR}/mfcc/test"
MODELS_DIR = f"{MAIN_DIR}/models"

In [24]:
if not os.path.isdir(MFCC_TRAIN_DIR):
    os.makedirs(MFCC_TRAIN_DIR)
if not os.path.isdir(MFCC_TEST_DIR):
    os.makedirs(MFCC_TEST_DIR)
if not os.path.isdir(MODELS_DIR):
    os.makedirs(MODELS_DIR)

In [26]:
SR = 8000
USE_LIBROSA = False
USE_DELTAS = False
N_FFT = 1024
MAX_ITER = 200

In [None]:
extract_train_mfcc(USE_LIBROSA)

Extracting Train MFCC features for manipuri
Saved MFCC features for manipuri in /content/gdrive/MyDrive/SSP/mfcc/train/manipuri_lib.npy

Extracting Train MFCC features for assamese
Saved MFCC features for assamese in /content/gdrive/MyDrive/SSP/mfcc/train/assamese_lib.npy

Extracting Train MFCC features for gujarathi
Saved MFCC features for gujarathi in /content/gdrive/MyDrive/SSP/mfcc/train/gujarathi_lib.npy

Extracting Train MFCC features for telugu
Saved MFCC features for telugu in /content/gdrive/MyDrive/SSP/mfcc/train/telugu_lib.npy

Extracting Train MFCC features for odia
Saved MFCC features for odia in /content/gdrive/MyDrive/SSP/mfcc/train/odia_lib.npy

Extracting Train MFCC features for marathi
Saved MFCC features for marathi in /content/gdrive/MyDrive/SSP/mfcc/train/marathi_lib.npy

Extracting Train MFCC features for bengali
Saved MFCC features for bengali in /content/gdrive/MyDrive/SSP/mfcc/train/bengali_lib.npy



In [None]:
extract_test_mfcc(USE_LIBROSA)

Extracting Test MFCC features for assamese
Saved MFCC features for assamese in /content/gdrive/MyDrive/SSP/mfcc/test/assamese_lib.npy

Extracting Test MFCC features for gujarathi
Saved MFCC features for gujarathi in /content/gdrive/MyDrive/SSP/mfcc/test/gujarathi_lib.npy

Extracting Test MFCC features for manipuri
Saved MFCC features for manipuri in /content/gdrive/MyDrive/SSP/mfcc/test/manipuri_lib.npy

Extracting Test MFCC features for marathi
Saved MFCC features for marathi in /content/gdrive/MyDrive/SSP/mfcc/test/marathi_lib.npy

Extracting Test MFCC features for telugu
Saved MFCC features for telugu in /content/gdrive/MyDrive/SSP/mfcc/test/telugu_lib.npy

Extracting Test MFCC features for odia
Saved MFCC features for odia in /content/gdrive/MyDrive/SSP/mfcc/test/odia_lib.npy

Extracting Test MFCC features for bengali
Saved MFCC features for bengali in /content/gdrive/MyDrive/SSP/mfcc/test/bengali_lib.npy



In [None]:
N = [8, 16, 32, 64, 128, 256]
best_models = {}
best_accuracy = 0
for n in N:
    models = train(n, USE_DELTAS, USE_LIBROSA)
    print()
    print("Testing the performance")
    acc, cf_matrix, language_mappings = test(models, USE_DELTAS, USE_LIBROSA)
    if acc > best_accuracy:
        best_accuracy = acc
        best_models = models.copy()
    print(f"Accuracy using {n} gaussians:", acc)
    print()

Training GMM for manipuri
Training GMM for assamese
Training GMM for gujarathi
Training GMM for telugu
Training GMM for odia
Training GMM for marathi
Training GMM for bengali

Testing the performance
Accuracy using 8 gaussians: 0.176056338028169

Training GMM for manipuri
Training GMM for assamese
Training GMM for gujarathi
Training GMM for telugu
Training GMM for odia
Training GMM for marathi
Training GMM for bengali

Testing the performance
Accuracy using 16 gaussians: 0.14788732394366197

Training GMM for manipuri
Training GMM for assamese
Training GMM for gujarathi
Training GMM for telugu
Training GMM for odia
Training GMM for marathi
Training GMM for bengali

Testing the performance
Accuracy using 32 gaussians: 0.19014084507042253

Training GMM for manipuri
Training GMM for assamese
Training GMM for gujarathi
Training GMM for telugu
Training GMM for odia
Training GMM for marathi
Training GMM for bengali

Testing the performance
Accuracy using 64 gaussians: 0.14788732394366197

Tra

ValueError: ignored

In [None]:
# saving models
for language in best_models:
    filename = f"{MODELS_DIR}/{language}_{'lib' if USE_LIBROSA else 'psf'}{'_pitch_sync'}.pkl"
    with open(filename, "wb") as file:
        pkl.dump(best_models[language], file)

In [None]:
acc, cf_matrix, language_mappings = test(best_models, USE_DELTAS, USE_LIBROSA)

In [None]:
print("Accuracy:",  acc)
print(language_mappings)
print("Confusion Matrix:\n", cf_matrix)

In [None]:
df = {}
for i, language in enumerate(language_mappings):
    df[language] = cf_matrix[i].astype(np.int32)
df = pd.DataFrame(df, columns=language_mappings, index=language_mappings)
df.to_csv(f"{MAIN_DIR}/{'lib' if USE_LIBROSA else 'psf'}{'_pitch_sync'}.csv", columns=language_mappings, index=True)
df