In [3]:
import json
import csv
import os

In [24]:
#load list of phones with number values as dictionary

phone_dict = 'phone-dict.csv'

with open(phone_dict, mode='r') as inp:
    reader = csv.reader(inp)
    lookup_dict = {row[0]:row[1] for row in reader}

print(lookup_dict)


{'Phone': 'Index', 'aa': '1', 'ae': '2', 'ah': '3', 'ao': '4', 'aw': '5', 'ay': '6', 'b': '7', 'ch': '8', 'd': '9', 'dh': '10', 'eh': '11', 'er': '12', 'ey': '13', 'f': '14', 'g': '15', 'hh': '16', 'ih': '17', 'iy': '18', 'jh': '19', 'k': '20', 'l': '21', 'm': '22', 'n': '23', 'ng': '24', 'ow': '25', 'oy': '26', 'p': '27', 'r': '28', 's': '29', 'sh': '30', 't': '31', 'th': '32', 'uh': '33', 'uw': '34', 'v': '35', 'w': '36', 'y': '37', 'z': '38', 'zh': '39'}


In [20]:
#function to find phones in a json file and convert them to the number in the dictionary

def phone_to_num(file):
    with open(file) as read_file:
        
        data = json.load(read_file)
        
        phone_to_num.phones = []
        
        # for words that were not successfully given phoneme data by gentle, we give an arbitrary
        # value of ['0', '0'] for the first and last phonemes. This avoids errors and means that
        # the word is still accessible in some way in the Max patch.
        test_case = data['words'][0]['case']
        not_found = 'not-found-in-audio'
            
        if not_found in test_case:
            phone_to_num.phones = ['0', '0']
    
        else:
            for i in data['words'][0]['phones']:
                j = i.get('phone')
                
                # "_S" seems to be another error in gentle along the lines of 'not-found-in-audio'
                # so we treat it the same way
                if "_S" in j:
                    phone_to_num.phones = ['0', '0']
                    
                # Currently we only take the first and last phoneme and ignore the interior ones,
                # which are tagged "_I"
                elif "_I" not in j:
                    
                    # remove the "_B" or "_E" tag before comparing to dictionary
                    sep1 = '_'
                    j = j.split(sep1, 1)[0]
        
                    # Look phoneme up in dictionary, replace with number value
                    k = " ".join(lookup_dict.get(ele, ele) for ele in j.split())
                    
                    # Add number to phoneme list
                    phone_to_num.phones.append(k)
            

output = phone_to_num('/Users/iandouglas-moore/church-text/phone-jsons/ian-1-1/https-i-1.json')
print(phone_to_num.phones)

['0', '0']


In [6]:
# This function, borrowed from https://thispointer.com/python-three-ways-to-check-if-a-file-is-empty/ ,
# tests if the phone database file is empty so the make_phone_coll function knows where to write data.

def is_file_empty(file_path):
    return os.path.exists(file_path) and os.stat(file_path).st_size == 0
    

In [7]:
# To test the function / file before running the whole thing.

file_path = '/Users/iandouglas-moore/church-text/new-phone/phone-lists/phone-coll-new.txt'
is_empty = is_file_empty(file_path)

if is_empty:
    print('File is empty')
else:
    print('File is not empty')

File is not empty


In [7]:
# Opens a directory of json files with phone info, gets a list of that info for each file,
# and writes the list to a text file in a way that my Max patch can read.
# Output file must exist as a .txt file, but it can be empty.

def make_phone_coll(phones_json_dir, phone_list):
    
    # sort directory alphabetically and ignore directory structure file
    phones_json = sorted((f for f in os.listdir(phones_json_dir) if not f.startswith(".")))
    
    with open(phone_list, 'r+') as file:
        fieldnames = ['index', 'phones']
        
        csvreader = csv.DictReader(file, delimiter=",", quotechar='"', fieldnames = fieldnames)
        csvwriter = csv.DictWriter(file, delimiter=",", quotechar='"', fieldnames = fieldnames)
        csvdata = list(csvreader)
     
        # if file is empty, write to first line. If not, start writing after last line
        is_empty = is_file_empty(phone_list)
        
        # Max patch needs an index (starting from 1) so that each line refers to a specific buffer
        # If file is empty, this writes the first index as '1'. If it already contains data,
        # it writes the first new index as last index value + 1.
        linenum = 1 if is_empty else int(csvdata[-1]['index'])+1

        # Iterates through directory of json files, writing into database file in this format:
        # [index number], 0 [first phone] [last phone];
        # The '0' is because the Max patch needs a time value for the data '0' indicates start of file
        for j in range(len(phones_json)):
            
            new_file = phones_json_dir + phones_json[j]
            output = phone_to_num(new_file)
            

            csvwriter.writerow({'index': linenum, 'phones': ' 0 {} {};'.format(phone_to_num.phones[0],phone_to_num.phones[1])})
            
            linenum = linenum + 1


In [16]:
# call make_phone_coll
# 1st arg -- directory of json files giving phoneme information
# 2nd arg -- file to put compiled phoneme info into. file must already exist as .txt, but can be empty

# output = make_phone_coll('/Users/iandouglas-moore/church-text/phone-jsons/aaron-1-1_16bit/', '/Users/iandouglas-moore/church-text/new-phone/phone-lists/phone-coll-new.txt')

# output = make_phone_coll('/Users/iandouglas-moore/church-text/phone-jsons/ian-1-1/', '/Users/iandouglas-moore/church-text/new-phone/phone-lists/phone-coll-new.txt')

