The first few cells set up phonetisaurus; they are adapted from the instructions in the git README.

In [None]:
%%capture
!apt-get -y install git g++ autoconf-archive make libtool
# Python bindings
!apt-get -y install python-setuptools python-dev
# mitlm (to build a quick play model)
!apt-get -y install gfortran

In [None]:
%%capture
!wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.2.tar.gz
!tar -xvzf openfst-1.6.2.tar.gz
%cd openfst-1.6.2
# Minimal configure, compatible with current defaults for Kaldi
!./configure --enable-static --enable-shared --enable-far --enable-ngram-fsts
!make -j 4
# Now wait a while...
!make install

In [17]:
import os
ldlibpath = os.environ['LD_LIBRARY_PATH']
#_STORED_LD = "/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
newld = f'{ldlibpath}:/usr/local/lib:/usr/local/lib/fst'
os.environ['LD_LIBRARY_PATH']=newld
%env LD_LIBRARY_PATH

'/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/lib:/usr/local/lib/fst'

In [None]:
%%capture
%cd /content
!git clone https://github.com/AdolfVonKleist/Phonetisaurus.git
%cd Phonetisaurus
!./configure
!make
!make install

In [36]:
%cd /content/

/content


We also need MITLM

In [None]:
%%capture
!git clone https://github.com/mitlm/mitlm
%cd mitlm
!autoreconf -i
!./configure
!make
!make install

In [38]:
%cd /content

/content


The TIMIT dictionary is relatively clean, so there are only a few small changes that are needed for phonetisaurus.

In [None]:
!cat TIMITDIC.txt|grep -v '^;'|tr -d '/'|sed -e 's/  */ /g' > TIMIT.clean

In [39]:
!phonetisaurus-align --input=TIMIT.clean --ofile=TIMIT.clean.corpus --seq1_del=false
# Train an n-gram model (5s-10s):
!estimate-ngram -o 8 -t TIMIT.clean.corpus -wl timit.arpa
# Convert to OpenFst format (10s-20s):
!phonetisaurus-arpa2wfst --lm=timit.arpa --ofile=timit.fst

GitRevision: 0.9.1
Loading input file: TIMIT.clean
Starting EM...
Finished first iter...
Iteration: 1 Change: 7.52353
Iteration: 2 Change: 0.00586414
Iteration: 3 Change: 0.00986576
Iteration: 4 Change: 0.00996876
Iteration: 5 Change: 0.00853443
Iteration: 6 Change: 0.00628281
Iteration: 7 Change: 0.00513172
Iteration: 8 Change: 0.00343132
Iteration: 9 Change: 0.0027523
Iteration: 10 Change: 0.00179482
Iteration: 11 Change: 0.00112724
Last iteration: 
0.001	Loading corpus TIMIT.clean.corpus...
0.105	Smoothing[1] = ModKN
0.105	Smoothing[2] = ModKN
0.105	Smoothing[3] = ModKN
0.105	Smoothing[4] = ModKN
0.105	Smoothing[5] = ModKN
0.105	Smoothing[6] = ModKN
0.105	Smoothing[7] = ModKN
0.105	Smoothing[8] = ModKN
0.105	Set smoothing algorithms...
0.105	Y 8.181818e-01
0.105	Y 5.155185e-01
0.105	Y 6.079470e-01
0.105	Y 7.203451e-01
0.106	Y 8.125633e-01
0.107	Y 8.845738e-01
0.108	Y 9.304152e-01
0.109	Y 9.185127e-01
0.109	Estimating full n-gram model...
0.114	Saving LM to timit.arpa...
GitRevision:

In [45]:
!python phoneticize.py -m timit.fst -w test

Traceback (most recent call last):
  File "phoneticize.py", line 2, in <module>
    import phonetisaurus
ModuleNotFoundError: No module named 'phonetisaurus'


In [43]:
%%writefile phoneticize.py
#!/usr/bin/env python
import phonetisaurus
from itertools import izip

def Phoneticize (model, args) :
    """Python wrapper function for g2p bindings.

    Python wrapper function for g2p bindings.  Most basic possible example.
    Intended as a template for doing something more useful.

    Args:
        model (str): The g2p fst model to load.
        args (obj): The argparse object with user specified options.
    """

    results = model.Phoneticize (
        args.token,
        args.nbest,
        args.beam,
        args.thresh,
        args.write_fsts,
        args.accumulate,
        args.pmass
    )

    for result in results :
        uniques = [model.FindOsym (u) for u in result.Uniques]
        print ("{0:0.2f}\t{1}".format (result.PathWeight, " ".join (uniques)))
        print ("-------")

        #Should always be equal length
        for ilab, olab, weight in izip (result.ILabels,
                                        result.OLabels,
                                        result.PathWeights) :
            print ("{0}:{1}:{2:0.2f}".format (
                model.FindIsym (ilab),
                model.FindOsym (olab),
                weight
            ))

    return


if __name__ == "__main__" :
    import argparse, sys

    example = "{0} --model model.fst --word \"test\"".format (sys.argv [0])
    parser  = argparse.ArgumentParser (description=example)
    parser.add_argument ("--model", "-m", help="Phonetisaurus G2P model.",
                         required=True)
    group   = parser.add_mutually_exclusive_group (required=True)
    group.add_argument ("--word", "-w", help="Input word in lower case.")
    group.add_argument ("--wlist", "-wl", help="Provide a wordlist.")
    parser.add_argument ("--nbest", "-n", help="NBest",
                         default=1, type=int)
    parser.add_argument ("--beam", "-b", help="Search beam",
                         default=500, type=int)
    parser.add_argument ("--thresh", "-t", help="NBest threshold.",
                         default=10., type=float)
    parser.add_argument ("--write_fsts", "-wf", help="Write decoded fsts "
                         "to disk", default=False, action="store_true")
    parser.add_argument ("--accumulate", "-a", help="Accumulate probs across "
                         "unique pronunciations.", default=False,
                         action="store_true")
    parser.add_argument ("--pmass", "-p", help="Target probability mass.",
                         default=0.0, type=float)
    parser.add_argument ("--verbose", "-v", help="Verbose mode.",
                         default=False, action="store_true")
    args = parser.parse_args ()

    if args.verbose :
        for key,val in args.__dict__.iteritems () :
            print ("{0}:  {1}".format (key, val))

    model = phonetisaurus.Phonetisaurus (args.model)

    if args.word :
        args.token = args.word
        Phoneticize (model, args)

    else :
        with open (args.wlist, "r") as ifp :
            for word in ifp :
                word = word.decode ("utf8").strip ()
                args.token = word
                Phoneticize (model, args)
                print ("-----------------------")
                print ("")

Writing phoneticize.py
