<a href="https://colab.research.google.com/github/ffer200395/The-Joe-Rogan-Experience/blob/main/Tuned_threshold_GMM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.mixture import GaussianMixture
from scipy.io.wavfile import read, write
import numpy as np
import pandas as pd
import scipy.special
import math
import os
from tqdm import tqdm

In [2]:
def get_signal(file_name):
    # Obtain raw signal from audio
    _, signal = read(file_name)
    # Convert signal into absolute values to avoid zero mean
    s_abs = np.abs(signal)
    if len(s_abs)%16!=0:
        cut = len(s_abs)-len(s_abs)%16
        s_abs = s_abs[:cut]
    # We will work with miliseconds, mean of 16 samples = 1ms
    s_abs = np.mean(s_abs.reshape(-1, 16), axis=1)
    # Convert into a logarithmic scale
    s_abs[s_abs == 0] = 0.1
    s_log = np.log10(s_abs)
    return s_abs, s_log
    
def get_gmm_result(s_log):
    # Fit a GMM and apply it to he signal
    gm = GaussianMixture(n_components=2).fit(s_log.reshape(-1, 1))
    yhat = gm.predict(s_log.reshape(-1, 1))
    # Get both means and sigma assuming binomial distribution
    mu_1 = np.mean(s_log[np.where(yhat==0)])
    mu_2 = np.mean(s_log[np.where(yhat==1)])
    if mu_1>mu_2:
        threshold = min(s_log[np.where(yhat==0)])
    else:
        threshold = min(s_log[np.where(yhat==1)])
    return threshold

def consecutive(data, stepsize=1):
    return np.split(data, np.where(np.diff(data) != stepsize)[0]+1)

def hms(total_ms):
    # Convert total miliseconds in 00:00:00:000 format
    ms = ((total_ms % 1000))
    s = int((total_ms / 1000) % 60)
    m = int((total_ms / (1000 * 60)) % 60)
    h = int((total_ms / (1000 * 60 * 60)) % 24)
    return '{:02d}:{:02d}:{:02d}:{:03d}'.format(h,m,s,ms)

def get_start_end_time(tup):
    return (hms(tup[0]),hms(tup[1]))

def get_noises_intervals(s_log, s_abs, threshold):
    # From which value it is considered "no silence"
    threshold = 10**threshold
    # Find all segments over threshold
    idx_noise = np.where(s_abs>threshold)[0]
    # Aggregate segments
    noise_intervals = consecutive(idx_noise)
    noise_start_end = [(x[0],x[0]+1) if len(x)==1 else (x[0],x[-1]) for x in noise_intervals]
    return noise_start_end
    
def get_hist(signal, threshold, name):
    normal_t = 10**threshold
    plt.hist(signal, bins = np.arange(0,5,0.05),color='blue',alpha=0.7, rwidth=0.85) 
    plt.title(f"Histogram for {name} (log scale)") 
    plt.axvline(threshold, color='red', linewidth=2)
    min_ylim, max_ylim = plt.ylim()
    plt.text(threshold*0.55, max_ylim*0.95, f'Cut {round(threshold,2)} ({round(normal_t,2)})')
    plt.savefig(f'histograms/histogram_{name}.jpg')
    plt.clf()

def run_all(path_audio):
    ls_thresholds = []
    # List of files to be processed
    files = os.listdir(path_audio)
    for file in tqdm(files):
        file_name = path_audio+file
        try:
            # Read .wav file
            s_abs, s_log = get_signal(file_name)
            # Apply Gaussian Mixture Model assuming 2 distributions
            threshold = get_gmm_result(s_log)
            # Get noise intervals
            noise_start_end = get_noises_intervals(s_log, s_abs, threshold)
            # Convert time format
            map_object = map(get_start_end_time, noise_start_end)
            new_list = list(map_object)
            # Create and save a dataframe of noise intervals for each episode
            df = pd.DataFrame(new_list, columns=['start', 'end'])
            df.to_csv('noises/'+file.split('.')[0]+'.csv')
            # Create a histogram
            get_hist(s_log, threshold, file.split('.')[0])
            # Keep record of thresholds
            ls_thresholds.append((int(file.split('.')[0][3:]), 10**threshold))
            print(len(new_list),10**threshold)
        except Exception as e:
            print(file, e)
    df_treshold = pd.DataFrame(ls_thresholds, columns=['Episode','Threshold'])
    df_treshold.to_csv('episode_threshold.csv',index=False)

In [None]:
# Audio path
path_audio = 'data/podcasts_wav/'
run_all(path_audio)