<a href="https://colab.research.google.com/github/BadrOuannas/DD2476-Project/blob/master/phonemeRecognitionDNNHMM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Import python libraries and packages


In [0]:
import numpy as np
import scipy.io.wavfile as scipywav
import scipy as scipy
import scipy.signal as signal
import math
import unittest
import matplotlib.pyplot as plt
from scipy.fftpack import fft
import matplotlib.colors as mcolors
from matplotlib.pyplot import specgram 
import pandas as pd 
import IPython.display as ipd
import matplotlib.style
import matplotlib as mpl
mpl.style.use('ggplot')
import plotly.graph_objects as go
couleurs = mcolors.CSS4_COLORS
Tcolors = list(couleurs.keys())
import tensorflow as tf
import tensorflow.keras as keras
from collections import deque
import os
from pysndfile import sndio
import soundfile as sf

### Utils ###

In [0]:


def path2info(path):
    """
    path2info: parses paths in the TIDIGIT format and extracts information
               about the speaker and the utterance

    Example:
    path2info('tidigits/disc_4.1.1/tidigits/train/man/ae/z9z6531a.wav')
    """
    rest, filename = os.path.split(path)
    rest, speakerID = os.path.split(rest)
    rest, gender = os.path.split(rest)
    digits = filename[:-5]
    repetition = filename[-5]
    return gender, speakerID, digits, repetition


def loadAudio(filename):
    """
    loadAudio: loads audio data from file using pysndfile

    Note that, by default pysndfile converts the samples into floating point
    numbers and rescales them in the range [-1, 1]. This is avoided by specifying
    the option dtype=np.int16 which keeps both the original data type and range
    of values.
    """
    sndobj = sndio.read(filename, dtype=np.int16)
    samplingrate = sndobj[1]
    samples = np.array(sndobj[0])
    return sf.read(filename, dtype='int16')


def frames2trans(sequence, outfilename=None, timestep=0.01):
    """
    Outputs a standard transcription given a frame-by-frame
    list of strings.

    Example (using functions from Lab 1 and Lab 2):
    phones = ['sil', 'sil', 'sil', 'ow', 'ow', 'ow', 'ow', 'ow', 'sil', 'sil']
    trans = frames2trans(phones, 'oa.lab')

    Then you can use, for example wavesurfer to open the wav file and the transcription
    """
    sym = sequence[0]
    start = 0
    end = 0
    trans = ''
    for t in range(len(sequence)):
        if sequence[t] != sym:
            trans = trans + str(start) + ' ' + str(end) + ' ' + sym + '\n'
            sym = sequence[t]
            start = end
        end = end + timestep
    trans = trans + str(start) + ' ' + str(end) + ' ' + sym + '\n'
    if outfilename != None:
        with open(outfilename, 'w') as f:
            f.write(trans)
    return trans


### From Lab 1 

In [0]:
from scipy import fftpack

def enframe(samples, Wlen, Wshift):
    """
    Slices the input samples into overlapping windows.

    Args:
        Wlen : window length in samples.
        Wshift: shift of consecutive windows in samples
    Returns:
        numpy array [N x Wlen], where N is the number of windows that fit
        in the input signal
        and 
        numpy array [Wlen x N ], where N is the number of windows that fit
        in the input signal
     tests :
     frames, _ = enframe(np.array([1,2,3,4,5,6,7,8]),4, 1)
     print(frames)
    """

    L = len(samples)
    K = math.floor((L - Wlen) / Wshift ) + 1
    
    all_frames = []
    for i in range(K) :        
        frame = samples[i*Wshift : i*Wshift + Wlen  ]
        all_frames.append(frame)

    return np.array(all_frames), np.array(all_frames).T

def preemp(frames, coef=0.97):
    """
    Pre-emphasis filter.

    Args:
        y: array of speech frames [N x M] where N is the number of frames and
               M the samples per frame
        coef: preemhasis factor (defaults to the value specified in the exercise)

    Output:
        output: array of pre-emphasised speech samples
    Note (you can use the function lfilter from scipy.signal)
    """
    def pre_emphasis(y, coef) :
        
        b = np.asarray([1.0, -coef])
        a = np.asarray([1.0])
        y_filtered = scipy.signal.lfilter(b, a, y)
        return y_filtered
        
    row, _ = np.shape(frames)
    
    all_frames = []
    for i in range(row) :
        
        temp_frame = frames[i, :]
        emp_tmp = pre_emphasis(temp_frame, coef)
        all_frames.append(emp_tmp.tolist())
            
    return np.array(all_frames)

def windowing(input):
    """
    Applies hamming window to the input frames.

    Args:
        input: array of speech samples [N x M] where N is the number of frames and
               M the samples per frame
    Output:
        array of windoed speech samples [N x M]
    Note (you can use the function hamming from scipy.signal, include the sym=0 option
    if you want to get the same results as in the example)

    # plotting hamming window
    w = signal.hamming(100, sym=0)
    plt.plot(np.linspace(-1, 1, 100), w)
    # test on frames
    print(example_frames)
    xw = windowing(example_frames)
    print(xw)
    """
    w = signal.hamming(input.shape[1], sym=0)
    res = np.ones(input.shape)*input
    res *= w
    return res, w

def powerSpectrum(data, nfft=512):
    from scipy.fftpack import fft
    """
    Calculates the power spectrum of the input signal, that is the square of the modulus of the FFT

    Args:
        input: array of speech samples [N x M] where N is the number of frames and
               M the samples per frame
        nfft: length of the FFT
    Output:
        array of power spectra [N x nfft]
    Note: you can use the function fft from scipy.fftpack
    ps = powerSpectrum(xw, 512)
    plt.subplot(1, 2, 1)
    plt.pcolormesh(ps)
    plt.subplot(1, 2, 2)
    plt.pcolormesh(spec)
    plt.show()
    """
    fft_x = fft(data, nfft)

    return np.square(np.abs(fft_x))

def trfbank(fs, nfft, lowfreq=133.33, linsc=200/3., logsc=1.0711703, nlinfilt=13, nlogfilt=27, equalareas=False):
    """Compute triangular filterbank for MFCC computation.

    Inputs:
    fs:         sampling frequency (rate)
    nfft:       length of the fft
    lowfreq:    frequency of the lowest filter
    linsc:      scale for the linear filters
    logsc:      scale for the logaritmic filters
    nlinfilt:   number of linear filters
    nlogfilt:   number of log filters

    Outputs:
    res:  array with shape [N, nfft], with filter amplitudes for each column.
            (N=nlinfilt+nlogfilt)
    From scikits.talkbox"""
    # Total number of filters
    nfilt = nlinfilt + nlogfilt

    #------------------------
    # Compute the filter bank
    #------------------------
    # Compute start/middle/end points of the triangular filters in spectral
    # domain
    freqs = np.zeros(nfilt+2)
    freqs[:nlinfilt] = lowfreq + np.arange(nlinfilt) * linsc
    freqs[nlinfilt:] = freqs[nlinfilt-1] * logsc ** np.arange(1, nlogfilt + 3)
    if equalareas:
        heights = np.ones(nfilt)
    else:
        heights = 2./(freqs[2:] - freqs[0:-2])

    # Compute filterbank coeff (in fft domain, in bins)
    fbank = np.zeros((nfilt, nfft))
    # FFT bins (in Hz)
    nfreqs = np.arange(nfft) / (1. * nfft) * fs
    for i in range(nfilt):
        low = freqs[i]
        cen = freqs[i+1]
        hi = freqs[i+2]

        lid = np.arange(np.floor(low * nfft / fs) + 1,
                        np.floor(cen * nfft / fs) + 1, dtype=np.int)
        lslope = heights[i] / (cen - low)
        rid = np.arange(np.floor(cen * nfft / fs) + 1,
                        np.floor(hi * nfft / fs) + 1, dtype=np.int)
        rslope = heights[i] / (hi - cen)
        fbank[i][lid] = lslope * (nfreqs[lid] - low)
        fbank[i][rid] = rslope * (hi - nfreqs[rid])

    return fbank

def logMelSpectrum(input, samplingrate):
    """
    Calculates the log output of a Mel filterbank when the input is the power spectrum

    Args:
        input: array of power spectrum coefficients [N x nfft] where N is the number of frames and
               nfft the length of each spectrum
        samplingrate: sampling rate of the original signal (used to calculate the filterbank shapes)
    Output:
        array of Mel filterbank log outputs [N x nmelfilters] where nmelfilters is the number
        of filters in the filterbank
    Note: use the trfbank function provided in lab1_tools.py to calculate the filterbank shapes and
          nmelfilters

    lms = logMelSpectrum(ps, sampling_rate)
    plt.subplot(1, 2, 1)
    plt.pcolormesh(lms)
    plt.subplot(1, 2, 2)
    plt.pcolormesh(mspec)
    plt.show()
    """
    mfcc = trfbank(samplingrate, input.shape[1])
    res = np.log(np.matmul(input, mfcc.T))
    return res, mfcc

def lifter(mfcc, lifter=22):
    """
    Applies liftering to improve the relative range of MFCC coefficients.

       mfcc: NxM matrix where N is the number of frames and M the number of MFCC coefficients
       lifter: lifering coefficient

    Returns:
       NxM array with lifeterd coefficients
    """
    nframes, nceps = mfcc.shape
    cepwin = 1.0 + lifter/2.0 * np.sin(np.pi * np.arange(nceps) / lifter)
    return np.multiply(mfcc, np.tile(cepwin, nframes).reshape((nframes,nceps)))

def cepstrum(input, nceps = 13):        
    return fftpack.realtransforms.dct(input)[:,:nceps]

def mspec(samples, winlen = 400, winshift = 200, preempcoeff=0.97, nfft=512, samplingrate=20000):
    """Computes Mel Filterbank features.

    Args:
        samples: array of speech samples with shape (N,)
        winlen: lenght of the analysis window
        winshift: number of samples to shift the analysis window at every time step
        preempcoeff: pre-emphasis coefficient
        nfft: length of the Fast Fourier Transform (power of 2, >= winlen)
        samplingrate: sampling rate of the original signal

    Returns:
        N x nfilters array with mel filterbank features (see trfbank for nfilters)
    """
    frames, _ = enframe(samples, winlen, winshift)
    preemph = preemp(frames, preempcoeff)
    windowed,_ = windowing(preemph)
    spec = powerSpectrum(windowed, nfft)
    MelSpectrum, filterBanks = logMelSpectrum(spec, samplingrate)

    return MelSpectrum

def mfcc_func(samples, winlen = 400, winshift = 200, preempcoeff=0.97, nfft=512, nceps=13, samplingrate=20000, liftercoeff=22):
    """Computes Mel Frequency Cepstrum Coefficients.
    Args:
        samples: array of speech samples with shape (N,)
        winlen: lenght of the analysis window
        winshift: number of samples to shift the analysis window at every time step
        preempcoeff: pre-emphasis coefficient
        nfft: length of the Fast Fourier Transform (power of 2, >= winlen)
        nceps: number of cepstrum coefficients to compute
        samplingrate: sampling rate of the original signal
        liftercoeff: liftering coefficient used to equalise scale of MFCCs

    Returns:
        N x nceps array with lifetered MFCC coefficients
    """
    mspecs = mspec(samples, winlen, winshift, preempcoeff, nfft, samplingrate)
    ceps = cepstrum(mspecs, nceps)

    return lifter(ceps, liftercoeff)

### From Lab 2 

In [0]:
def concatTwoHMMs(hmm1, hmm2):
    
    A_state_prob = hmm1['startprob']
    A_transition = hmm1['transmat']
    B_state_prob = hmm2['startprob']
    B_transition = hmm2['transmat']
    
    new_length = len(A_state_prob) + len(B_state_prob) - 1
    new_state_prob_A = np.ones(new_length)
    new_state_prob_B = np.ones(new_length)
    
    # Create the new state probability
    new_state_prob_B[len(A_state_prob)-1:] = B_state_prob
    new_state_prob_A[:-(len(B_state_prob) - 1)] = A_state_prob
    new_state_prob_A[len(A_state_prob):] = A_state_prob[-1]

    new_state_prob_AB = new_state_prob_A * new_state_prob_B
    
    # Create the new transition matrix A 
    A_transition_concat = np.zeros((new_length,new_length))
    row_A, col_A = np.shape(A_transition)
    A_transition_concat[:row_A-1,:col_A] = A_transition[:row_A-1,:]
    
    form = np.shape(A_transition_concat[:row_A-1,col_A:])
    if 1 in form : 
        A_transition_concat[:row_A-1,col_A:] = np.reshape(A_transition[:row_A-1,-1], form) 
    else :        
        temp_vec = np.repeat(A_transition[:row_A-1,-1], len(B_state_prob) - 1)
        temp_vec_reshape = np.reshape( temp_vec,(len(A_state_prob) - 1, len(B_state_prob) - 1))        
        A_transition_concat[:row_A-1,col_A:] = np.reshape( temp_vec,(len(A_state_prob) - 1, len(B_state_prob) - 1))        
        
    A_transition_concat[:row_A-1,] = A_transition_concat[:row_A-1,] * new_state_prob_B
    A_transition_concat[row_A-1:,col_A-1:] = B_transition
    
    A_means = hmm1['means']
    B_means = hmm2['means']
    
    A_cov = hmm1['covars']
    B_cov = hmm2['covars']    
    AB_means = np.concatenate((A_means, B_means))
    AB_cov = np.concatenate((A_cov, B_cov))    
    
    combinedHmms = {'name': hmm1['name'] + '_' +  hmm2['name'], 
                    'startprob': new_state_prob_AB, 
                    'transmat': A_transition_concat, 
                    'means':AB_means, 
                    'covars':AB_cov}
        
    return combinedHmms

def concatHMMs(hmmmodels, namelist):
    concat = hmmmodels[namelist[0]]
    for idx in range(1,len(namelist)):
        concat = concatTwoHMMs(concat, hmmmodels[namelist[idx]])
    return concat

def log_multivariate_normal_density_diag(X, means, covars):
    """Compute Gaussian log-density at X for a diagonal model

    Args:
        X: array like, shape (n_observations, n_features)
        means: array like, shape (n_components, n_features)
        covars: array like, shape (n_components, n_features)

    Output:
        lpr: array like, shape (n_observations, n_components)
    From scikit-learn/sklearn/mixture/gmm.py
    """
    n_samples, n_dim = X.shape
    lpr = -0.5 * (n_dim * np.log(2 * np.pi) + np.sum(np.log(covars), 1)
                  + np.sum((means ** 2) / covars, 1)
                  - 2 * np.dot(X, (means / covars).T)
                  + np.dot(X ** 2, (1.0 / covars).T))
    return lpr
    
def viterbi(log_emlik, log_startprob, log_transmat, forceFinalState=True):
    """Viterbi path.

    Args:
        log_emlik: NxM array of emission log likelihoods, N frames, M states
        log_startprob: log probability to start in state i
        log_transmat: transition log probability from state i to j
        forceFinalState: if True, start backtracking from the final state in
                  the model, instead of the best state at the last time step

    Output:
        viterbi_loglik: log likelihood of the best path
        viterbi_path: best path
    """
    N, M = log_emlik.shape
    
    # initialization
    viterbi_loglik = np.zeros((N, M))
    memo_path = np.zeros((N, M), dtype=int)
    viterbi_loglik[0,:] = log_startprob[:-1] + log_emlik[0, :]
    
    # induction 
    for i in range(1, N):
        viterbi_loglik[i, :] = np.max(viterbi_loglik[i-1, :] + log_transmat[:-1, :-1].T, 1) + log_emlik[i, :]
        memo_path[i, :] = np.argmax(viterbi_loglik[i-1, :] + log_transmat[:-1, :-1].T, 1)

    # backtracking
    if forceFinalState:
        lastState = M-1
    else:
        lastState = np.argmax(memo_path[-1, :])

    viterbi_path = deque()
    viterbi_path.append(lastState)

    for i in range(N-2, -1, -1):
        viterbi_path.appendleft(memo_path[i+1, viterbi_path[0]])    

    return max(viterbi_loglik[-1, :]), viterbi_path

### Data handling functions

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
def read_data():
    # data = np.load('lab2_data.npz', allow_pickle=True)['data'][0]
    return data

In [14]:
# data = read_data()
# print(data)

{'filename': '/home/giampi/corpora/tidigits/disc_4.2.1/tidigits/test/man/bm/oa.wav', 'samplingrate': 20000, 'gender': 'man', 'speaker': 'bm', 'digit': 'o', 'repetition': 'a', 'samples': array([-2, -1, -3, ..., 10,  8,  5], dtype=int16), 'lmfcc': array([[  64.7657709 ,  -65.84854216,   96.90766069, ...,  -19.82632821,
          11.63249931,  159.50790488],
       [  57.72402791,  -79.93732698,  105.80893583, ...,  -11.84501965,
         109.29701349,  138.79176124],
       [  62.16944867, -121.81014369,   61.89899215, ...,   13.28252915,
        -127.7140298 , -128.75563442],
       ...,
       [ 190.29491187,   97.34488369,  175.61357289, ...,  -62.63035172,
         138.96640354,   82.95734165],
       [ 201.37123403,   80.09496653,   50.98133557, ...,  219.16711476,
          94.41131525, -100.10312668],
       [ 194.17067167,   70.85205455,   40.456538  , ...,   -3.81053312,
         143.83615904,  -42.56212766]])}


# Code Implementation

## 4. Data preparation for DNN training

### 4.1. Target class definition

In [0]:
def tr(filename) :
    hmms = np.load(filename)['phoneHMMs'].item()
    phones = sorted(hmms.keys())
    nstates = {phone: hmms[phone]['means'].shape[0] for phone in phones}
    states = [ph + '_' + str(id) for ph in phones for id in range(nstates[ph])]
    return states

In [0]:
states = tr('lab2_models_all.npz')
np.save(states, "stateList.npz")

### 4.2. Forced Alignment

In [0]:
def words2phones(wordList, pronDict, addSilence=True, addShortPause=True):
    """ word2phones: converts word level to phone level transcription adding silence

    Args:
       wordList: list of word symbols
       pronDict: pronunciation dictionary. The keys correspond to words in wordList
       addSilence: if True, add initial and final silence
       addShortPause: if True, add short pause model "sp" at end of each word
    Output:
       list of phone symbols
    """
    sil = []
    sp = []
    if addSilence:
        sil = ['sil']    
    if addShortPause:
        sp = ['sp']

    res = sil
    for w in wordList: 
        res += pronDict[w] + sp
    res += sil 
    return res

In [0]:
def forcedAlignment(lmfcc, phoneHMMs, phoneTrans):
    """ forcedAlignmen: aligns a phonetic transcription at the state level

    Args:
       lmfcc: NxD array of MFCC feature vectors (N vectors of dimension D)
              computed the same way as for the training of phoneHMMs
       phoneHMMs: set of phonetic Gaussian HMM models
       phoneTrans: list of phonetic symbols to be aligned including initial and
                   final silence

    Returns:
       list of strings in the form phoneme_index specifying, for each time step
       the state from phoneHMMs corresponding to the viterbi path.
    """
    utteranceHMM = concatHMMs(phoneHMMs, phoneTrans)
    
    phones = sorted(phoneHMMs.keys())
    nstates = {phone: phoneHMMs[phone]['means'].shape[0] for phone in phones}
    stateTrans = [phone + '_' + str(stateid) for phone in phoneTrans for stateid in range(nstates[phone])]

    log_emlik = log_multivariate_normal_density_diag(lmfcc, utteranceHMM['means'], utteranceHMM['covars'])
    log_startprob = np.log(utteranceHMM['startprob'])
    log_transmat = np.log(utteranceHMM['transmat'])

    _, viterbi_path = viterbi(log_emlik, log_startprob, log_transmat)

    res = [stateTrans[i] for i in viterbi_path]
    return res

### 4.3. Features Extraction

### 4.4. Training and Validation sets

### 4.5. Acoustic Context (Dynamic Features)

### 4.6. Feature Standardisation

## 5. Phoneme Recognition with Deep Neural Networks

In [0]:
# model building
batch_size = 256
max_epochs  = 10
depth = 2
units = 256
classes = 12 #todo change depending on output 

def build_model(input_shape, units, depth, classes):
    model = keras.Sequential()
    model.add(keras.layers.Dense(units , input_shape=input_shape, activation='relu'))
    for _ in range(depth):
        model.add(model.add(keras.layers.Dense(units , input_shape=tuple(input_shape[:-1])+(units,), activation='relu')))
    model.add(keras.layers.Dense(classes , input_shape=tuple(input_shape[:-1])+(8,), activation='softmax'))    
    return model

model = build_model(input_shape, units, depth, classes ) # todo add input shape 

In [0]:
# compiling + training 
model.compile(
    optimizer='adam', 
    loss='sparse_categorical_crossentropy',
    metrics =['sparse_categorical_accuracy',
              'sparse_categorical_crossentropy']              
            )

# todo get training + validation data 
model.fit(x_train, y_train, batch_size=batch_size, epochs=max_epochs, validation_data=(x_valid, y_valid))

### 5.1. Detailed Evaluation

### 5.2. Possible questions

# Code Demonstation and Answers to questions

In [0]:
# prondict 
prondict = {} 
prondict['o'] = ['ow']
prondict['z'] = ['z', 'iy', 'r', 'ow']
prondict['1'] = ['w', 'ah', 'n']
prondict['2'] = ['t', 'uw']
prondict['3'] = ['th', 'r', 'iy']
prondict['4'] = ['f', 'ao', 'r']
prondict['5'] = ['f', 'ay', 'v']
prondict['6'] = ['s', 'ih', 'k', 's']
prondict['7'] = ['s', 'eh', 'v', 'ah', 'n']
prondict['8'] = ['ey', 't']
prondict['9'] = ['n', 'ay', 'n']

In [10]:
# Show HMMs 
phoneHMMs = np.load('lab2_models_all.npz', allow_pickle=True)['phoneHMMs'].item()
# print(phoneHMMs)

{'name': 'sp', 'startprob': array([0.1216644, 0.8783357]), 'transmat': array([[0.8656793, 0.1343206],
       [0.       , 1.       ]]), 'means': array([[  38.44177  , -129.2183   ,   54.57983  ,   15.29446  ,
          35.78878  ,   -6.647676 ,  -11.7913   ,  -25.44149  ,
         -20.31962  ,  -27.77863  ,   -5.970622 ,    0.9313969,
          22.0168   ]]), 'covars': array([[ 1672.14 ,  2563.211,  2475.374,  3410.646,  4425.932,  5530.672,
         7249.499,  9209.617, 10143.23 ,  9683.029,  8902.944,  8219.629,
         7589.251]])}


In [4]:
# show example for lab3
example = np.load('lab3_example.npz', allow_pickle=True)
example = example['example'].item()
# print(example)

{'filename': 'tidigits/disc_4.1.1/tidigits/train/man/nw/z43a.wav', 'samples': array([ 1,  1,  1, ..., -3, -2, -2], dtype=int16), 'gender': 'man', 'speaker': 'nw', 'digits': 'z43', 'repetition': 'a', 'lmfcc': array([[ -22.91392389, -189.9582419 ,    7.40105799, ..., -112.7883752 ,
          41.12045179,  -12.04285405],
       [  -6.81710936, -187.14843532,  -24.39690243, ...,  -94.4723144 ,
        -198.74795293, -127.98350389],
       [ -11.92109946, -204.41816753,  -53.65419198, ...,  -95.38523624,
         -14.04657624,  -66.81715806],
       ...,
       [ 154.81267871,   41.12510125,  -11.03644377, ..., -251.4617779 ,
         -86.72418937,  159.01415406],
       [ 163.1202934 ,   15.30942337,  -15.99373122, ...,  -38.20996944,
         -78.60225893,  233.63308676],
       [ 143.95962157,    4.19763652,  -38.44629773, ...,  -57.65568221,
          23.2052515 ,  225.60552906]]), 'wordTrans': ['z', '4', '3'], 'phoneTrans': ['sil', 'z', 'iy', 'r', 'ow', 'sp', 'f', 'ao', 'r', 'sp', 'th'