# Pitch Synchronous Approach

In [13]:
MAIN_DIR = "."

In [3]:
import numpy as np
import pandas as pd
import librosa
import glob
import os
import scipy
import scipy.signal
import python_speech_features
import pickle as pkl
from sklearn.mixture import GaussianMixture
from functions import getVoiced, get_pitch_sync_frames

In [4]:
def get_mfcc(audio, sr, use_librosa=False):
    mfcc = []
    frames = get_pitch_sync_frames(audio, sr)
    for frame in frames:
        if use_librosa:
            mfcc_coeffs = librosa.feature.mfcc(
                frame, sr=sr, n_mfcc=13, hop_length=len(frame) + 1, win_length=len(frame)
            )
        else:
            mfcc_coeffs = python_speech_features.mfcc(
                signal=frame, samplerate=sr, numcep=13, winlen=len(frame) / sr, winstep=len(frame) / sr, nfft=N_FFT
            )
        mfcc.append(mfcc_coeffs.flatten())
    return np.array(mfcc)

In [5]:
def extract_train_mfcc(use_librosa=False):
    languages = [os.path.basename(x) for x in glob.glob(f"{TRAIN_DIR}/*")]
    for language in languages:
        print("Extracting Train MFCC features for", language)
        wav_files = sorted(glob.glob(f"{TRAIN_DIR}/{language}/*.wav"))
        mfcc_features = []
        for file in wav_files:
            try:
                audio, sr = librosa.load(file, sr=SR)
                mfcc = get_mfcc(audio, sr, use_librosa)
                mfcc_features.extend([mfcc[i] for i in range(mfcc.shape[0])])
            except Exception as e:
                print(file, e)
                continue
        filename = f"{MFCC_TRAIN_DIR}/{language}_{'lib' if use_librosa else 'psf'}{'_pitch_sync'}.npy"
        np.save(filename, np.array(mfcc_features))
        print("Saved MFCC features for", language, "in", filename)
        print()

In [6]:
def extract_test_mfcc(use_librosa=False):
    languages = [os.path.basename(x) for x in glob.glob(f"{TEST_DIR}/*")]
    for language in languages:
        print("Extracting Test MFCC features for", language)
        wav_files = sorted(glob.glob(f"{TEST_DIR}/{language}/*.wav"))
        mfcc_features = []
        for file in wav_files:
            try:
                audio, sr = librosa.load(file, sr=SR)
                mfcc = get_mfcc(audio, sr)
                mfcc_features.append(mfcc)
            except Exception as e:
                print(file, e)
                continue
        filename = f"{MFCC_TEST_DIR}/{language}_{'lib' if use_librosa else 'psf'}{'_pitch_sync'}.npy"
        with open(filename, "wb") as file:
            pkl.dump(mfcc_features, file)
        print("Saved MFCC features for", language, "in", filename)
        print()

In [7]:
def train(n_gaussians, use_deltas=True, use_librosa=False):
    dirs = glob.glob(f"{TRAIN_DIR}/*")
    languages = [os.path.basename(d) for d in dirs]
    models = {}
    for language in languages:
        mfcc_filename = f"{MFCC_TRAIN_DIR}/{language}_{'lib' if use_librosa else 'psf'}{'_pitch_sync'}.npy"
        mfcc_features = np.load(mfcc_filename)
        print(f"Training GMM for {language}")
        models[language] = GaussianMixture(n_gaussians, covariance_type="diag", max_iter=MAX_ITER).fit(mfcc_features)
    return models

In [8]:
def test(models, use_deltas=True, use_librosa=False):
    dirs = glob.glob(f"{TEST_DIR}/*")
    languages = sorted([os.path.basename(d) for d in dirs])
    conf_matrix = {language: {lang: 0 for lang in languages} for language in languages}
    for language in languages:
        mfcc_filename = f"{MFCC_TEST_DIR}/{language}_{'lib' if use_librosa else 'psf'}{'_pitch_sync'}.npy"
        with open(mfcc_filename, "rb") as file:
            mfcc_features = pkl.load(file)
        for mfcc in mfcc_features:
            pred = ""
            scores = {}
            for lang in models:
                scores[lang] = models[lang].score(mfcc)
                if pred == "" or scores[pred] < scores[lang]:
                    pred = lang
            conf_matrix[language][pred] += 1
    cf_matrix = np.zeros((len(languages), len(languages)))
    language_mappings = {}
    for i, language in enumerate(languages):
        language_mappings[language] = i
    for language in conf_matrix:
        r = language_mappings[language]
        for lang in conf_matrix[language]:
            c = language_mappings[lang]
            cf_matrix[r][c] = conf_matrix[language][lang]
    return cf_matrix.trace() / cf_matrix.sum(), cf_matrix, language_mappings

In [9]:
TRAIN_DIR = f"{MAIN_DIR}/dataset/train"
TEST_DIR = f"{MAIN_DIR}/dataset/test"
MFCC_TRAIN_DIR = f"{MAIN_DIR}/mfcc/train"
MFCC_TEST_DIR = f"{MAIN_DIR}/mfcc/test"
MODELS_DIR = f"{MAIN_DIR}/models"

In [10]:
if not os.path.isdir(MFCC_TRAIN_DIR):
    os.makedirs(MFCC_TRAIN_DIR)
if not os.path.isdir(MFCC_TEST_DIR):
    os.makedirs(MFCC_TEST_DIR)
if not os.path.isdir(MODELS_DIR):
    os.makedirs(MODELS_DIR)

In [11]:
SR = 8000
USE_LIBROSA = False
USE_DELTAS = False
N_FFT = 1024
MAX_ITER = 200

In [12]:
extract_train_mfcc(USE_LIBROSA)

Extracting Train MFCC features for gujarathi
Saved MFCC features for gujarathi in .//mfcc/train/gujarathi_psf_pitch_sync.npy

Extracting Train MFCC features for manipuri
Saved MFCC features for manipuri in .//mfcc/train/manipuri_psf_pitch_sync.npy

Extracting Train MFCC features for telugu
Saved MFCC features for telugu in .//mfcc/train/telugu_psf_pitch_sync.npy

Extracting Train MFCC features for assamese
Saved MFCC features for assamese in .//mfcc/train/assamese_psf_pitch_sync.npy

Extracting Train MFCC features for odia
Saved MFCC features for odia in .//mfcc/train/odia_psf_pitch_sync.npy

Extracting Train MFCC features for marathi
Saved MFCC features for marathi in .//mfcc/train/marathi_psf_pitch_sync.npy

Extracting Train MFCC features for bengali
.//dataset/train/bengali/f4_16.wav v cannot be empty
Saved MFCC features for bengali in .//mfcc/train/bengali_psf_pitch_sync.npy



In [14]:
extract_test_mfcc(USE_LIBROSA)

Extracting Test MFCC features for gujarathi
Saved MFCC features for gujarathi in .//mfcc/test/gujarathi_psf_pitch_sync.npy

Extracting Test MFCC features for manipuri
Saved MFCC features for manipuri in .//mfcc/test/manipuri_psf_pitch_sync.npy

Extracting Test MFCC features for telugu
Saved MFCC features for telugu in .//mfcc/test/telugu_psf_pitch_sync.npy

Extracting Test MFCC features for assamese
Saved MFCC features for assamese in .//mfcc/test/assamese_psf_pitch_sync.npy

Extracting Test MFCC features for odia
Saved MFCC features for odia in .//mfcc/test/odia_psf_pitch_sync.npy

Extracting Test MFCC features for marathi
Saved MFCC features for marathi in .//mfcc/test/marathi_psf_pitch_sync.npy

Extracting Test MFCC features for bengali
Saved MFCC features for bengali in .//mfcc/test/bengali_psf_pitch_sync.npy



In [20]:
N = [8, 16, 32, 64, 128, 256, 1024, 2048, 4096]
best_models = {}
best_accuracy = 0
for n in N:
    models = train(n, USE_DELTAS, USE_LIBROSA)
    print()
    print("Testing the performance")
    acc, cf_matrix, language_mappings = test(models, USE_DELTAS, USE_LIBROSA)
    if acc > best_accuracy:
        best_accuracy = acc
        best_models = models.copy()
    print(f"Accuracy using {n} gaussians:", acc)
    print()

Training GMM for gujarathi
Training GMM for manipuri
Training GMM for telugu
Training GMM for assamese
Training GMM for odia
Training GMM for marathi
Training GMM for bengali

Testing the performance
Accuracy using 8 gaussians: 0.9295774647887324

Training GMM for gujarathi
Training GMM for manipuri
Training GMM for telugu
Training GMM for assamese
Training GMM for odia
Training GMM for marathi
Training GMM for bengali

Testing the performance
Accuracy using 16 gaussians: 0.9295774647887324

Training GMM for gujarathi
Training GMM for manipuri
Training GMM for telugu
Training GMM for assamese
Training GMM for odia
Training GMM for marathi
Training GMM for bengali

Testing the performance
Accuracy using 32 gaussians: 0.9577464788732394

Training GMM for gujarathi
Training GMM for manipuri
Training GMM for telugu
Training GMM for assamese
Training GMM for odia
Training GMM for marathi
Training GMM for bengali

Testing the performance
Accuracy using 64 gaussians: 0.971830985915493

Traini

KeyboardInterrupt: 

In [None]:
# saving models
for language in best_models:
    filename = f"{MODELS_DIR}/{language}_{'lib' if USE_LIBROSA else 'psf'}{'_pitch_sync'}.pkl"
    with open(filename, "wb") as file:
        pkl.dump(best_models[language], file)

In [None]:
acc, cf_matrix, language_mappings = test(best_models, USE_DELTAS, USE_LIBROSA)

In [None]:
print("Accuracy:",  acc)
print(language_mappings)
print("Confusion Matrix:\n", cf_matrix)

Accuracy: 0.9859154929577465
{'assamese': 0, 'bengali': 1, 'gujarathi': 2, 'manipuri': 3, 'marathi': 4, 'odia': 5, 'telugu': 6}
Confusion Matrix:
 [[20.  0.  0.  0.  0.  0.  0.]
 [ 0. 21.  0.  0.  0.  0.  0.]
 [ 1.  0. 20.  0.  0.  0.  0.]
 [ 0.  0.  0. 20.  0.  0.  0.]
 [ 0.  0.  0.  0. 20.  0.  0.]
 [ 0.  0.  0.  0.  0. 20.  0.]
 [ 0.  0.  1.  0.  0.  0. 19.]]


In [None]:
df = {}
for i, language in enumerate(language_mappings):
    df[language] = cf_matrix[i].astype(np.int32)
df = pd.DataFrame(df, columns=language_mappings, index=language_mappings)
df.to_csv(f"{MAIN_DIR}/{'lib' if USE_LIBROSA else 'psf'}{'_pitch_sync'}.csv", columns=language_mappings, index=True)
df

Unnamed: 0,assamese,bengali,gujarathi,manipuri,marathi,odia,telugu
assamese,20,0,1,0,0,0,0
bengali,0,21,0,0,0,0,0
gujarathi,0,0,20,0,0,0,1
manipuri,0,0,0,20,0,0,0
marathi,0,0,0,0,20,0,0
odia,0,0,0,0,0,20,0
telugu,0,0,0,0,0,0,19
