In this notebook you can find functions to:
1) Get the phonological LPC form of a word or a whole sentence

2) Get the number of LPC gestures of a word or a whole sentence

3) Get the LPC code for a word per gesture

You can use this functions by running the next code at the begining of other notebooks:

import io
import nbformat

def execute_notebook(nbfile):
    with io.open(nbfile, encoding="utf8") as f:
        nb = nbformat.read(f, as_version=4)

    ip = get_ipython()

    for cell in nb.cells:
        if cell.cell_type != 'code':
            continue
        ip.run_cell(cell.source)
        
        
execute_notebook(r"C:\Users\hagar\OneDrive - mail.tau.ac.il\Desktop\Stage\LPC_2022\basic lpc functions\LPC_Usefull_Functions.ipynb")


General functions:

In [2]:
def get_number_of_words(sentence):
    sentence = sentence.replace('-',' ')
    w_lst = sentence.split(' ')
    return len(w_lst)


In [3]:
def get_number_of_letters(sentence):
    sentence = sentence.lower()
    replace_chars = [',','.',' ']
    for replace_char in replace_chars:
        sentence = sentence.replace(replace_char,'')
    return len(sentence)
    

# Get the phonological LPC form of a word or a whole sentence

In [13]:
# Openning lexique 

import numpy as np
import pandas as pd


lex = pd.read_csv(r"C:\Users\hagar\OneDrive - mail.tau.ac.il\Desktop\Stage\LPC_2022\basic lpc functions\Lexique380.utf8.csv")
lex = lex[(lex.ortho.str.contains('-| ') == False) & (lex.phon.str.contains('°') == False)]  # suppress schwa
lex = lex.drop_duplicates(subset='ortho', keep="first")
lex = lex[['ortho','phon', 'p_cvcv','nbhomogr','cv-cv','syll']]
dic = lex.set_index('ortho').to_dict()

p_dic = dic['p_cvcv']
n_dic = dic['nbhomogr']
cv_dic = dic['cv-cv']
p_cv_dic = dic['syll']
phon_dic = dic['phon']

lex['cv-div'] = lex['cv-cv'].apply(lambda x: x.split('-'))

In [14]:

# Creating the possible configurations for 1 spoken syllable

flat_list = [item for sublist in lex['cv-div'] for item in sublist]
flat_set = set(flat_list)
flatset_l = list(flat_set)



lpc_syl_config = pd.DataFrame(flatset_l,columns =['spoken_config'])


#lpc_syl_config.to_csv('lpc_syl_config.csv')


In [15]:
# number of gestures of each syll

dev_syl = pd.read_csv(r"C:\Users\hagar\OneDrive - mail.tau.ac.il\Desktop\Stage\LPC_2022\basic lpc functions\lpc_syl_configurations.csv")
dev_syl['lpc_n'] = dev_syl['LPC_config'].apply(lambda x: x.split('-'))
dev_syl['lpc_n'] = dev_syl['lpc_n'].apply(lambda x: len(x))

dic2 = dev_syl.set_index('spoken_config').to_dict()
g_cv_dic = dic2['LPC_config']
syl_dic = dic2['lpc_n']



In [3]:
def get_LPC_cv(word):
    LPC_cv = ''    
    if word in cv_dic:
        cv_lst = cv_dic[word].split('-')
        for syl in cv_lst:
            LPC_cv = LPC_cv + g_cv_dic[syl] + '-'
        return LPC_cv[:-1]

    else: 
        return word

In [4]:
def get_LPC_p(word):
    lpc_cv = get_LPC_cv(word)
    new_word = ''
    phon = phon_dic[word]
    if lpc_cv == cv_dic[word]:
        return p_cv_dic[word]
    else:
        l_lpc = lpc_cv.split('-')
        for syl in l_lpc:
            new_word += phon[:len(syl)]+'-'
            phon = phon[len(syl):]
        return new_word[:-1]


In [5]:
def get_LPC_p_sentence(sentence):
    sentence = sentence.lower()
    replace_chars = [',','.']
    for replace_char in replace_chars:
        sentence = sentence.replace(replace_char,'')
        
    sentence = sentence.replace('-', ' ')
    snt_to_wlst = sentence.split(' ')
    new_sentence = ''
    no_in_db = []
    more_ortho ={}

    
    for word in snt_to_wlst:
        if word in cv_dic:
            phon_w = get_LPC_p(word)
            new_sentence += phon_w + '   '
            if n_dic[word] != 1:
                more_ortho[word] = phon_dic[word]
        else: 
            no_in_db.append(word)
        
    
    if no_in_db == [] and more_ortho == {}:
        return new_sentence
    if no_in_db == [] and more_ortho != {}:
        return new_sentence + f',check the cv for: {more_ortho}'
    else:
        return new_sentence + f',except {no_in_db}, check the phon for: {more_ortho}'

# Get the number of LPC gestures of a word or a whole sentence

In [6]:
def get_LPC_w(word):
    LPC_gustures = 0     
    if word in cv_dic:
        cv_lst = cv_dic[word].split('-')
        for syl in cv_lst:
            LPC_gustures += syl_dic[syl]
        return LPC_gustures

    else: 
        return word

In [7]:
def get_LPC_g(sentence):
    sentence = sentence.lower()
    replace_chars = [',','.']
    for replace_char in replace_chars:
        sentence = sentence.replace(replace_char,'')
        
    sentence = sentence.replace('-', ' ')
    snt_to_wlst = sentence.split(' ')
    LPC_gustures = 0 
    no_in_db = []
    more_ortho ={}

    
    for word in snt_to_wlst:
        if word in cv_dic:
            LPC_gustures += get_LPC_w(word)
            if n_dic[word] != 1:
                more_ortho[word] = get_LPC_w(word)
        else: 
            no_in_db.append(word)
        
    
    if no_in_db == [] and more_ortho == []:
        return f'{LPC_gustures}'
    if no_in_db == [] and more_ortho != []:
        return f'{LPC_gustures}, check the cv for: {more_ortho}'
    else:
        return f'{LPC_gustures}, except {no_in_db}, check the cv for: {more_ortho}'


In [8]:
def get_LPC_g1(sentence):
    sentence = sentence.lower()
    sentence = sentence.replace('.', '')
    sentence = sentence.replace('-', ' ')
    snt_to_wlst = sentence.split(" ")
    LPC_gustures = 0 
    e = 0
    no_in_db = []
    more_ortho ={}

    
    for word in snt_to_wlst:
        if word in p_dic:
            #print(word, p_dic[word])
            e = 1
            if p_dic[word][0] == 'V':
                LPC_gustures += 1
            for letter in p_dic[word]:
                if letter == 'C':
                    LPC_gustures += 1
            if n_dic[word] != 1:
                more_ortho[word] = p_dic[word]

        else: 
            e = 0
        if e == 0 :
            no_in_db.append(word)
        
    
    if no_in_db == [] and more_ortho == []:
        return f'{LPC_gustures}'
    if no_in_db == [] and more_ortho != []:
        return f'{LPC_gustures}, check the cv for: {more_ortho}'
    else:
        return f'{LPC_gustures}, except {no_in_db}, check the cv for: {more_ortho}'


# Get the LPC code for word per gesture

In [30]:
#lpc coding, based on the article: https://hal.archives-ouvertes.fr/hal-00266052/document
position = {'a': '0', 'o': '0', '9': '0', '5': '1', '2': '1', 'i': '2', '§': '2', '@': '2', 'E': '3', 'u': '3', 'O': '3', '1': '4', 'y': '4', 'e': '4'}
configuration = {'p': '0', 'd': '0', 'Z': '0', 'k': '1', 'v': '1', 'z': '1', 's': '2', 'R': '2', 'b': '3', 'n': '3', '8': '3', 't': '4', 'm': '4', 'f': '4', 'l': '5', 'S': '5', 'N': '5', 'w': '5', 'g': '6', 'j': '7', 'G': '7'}
poa = {'p': '0', 'b': '0', 'm': '0', 'f': '1', 'v': '1', 't': '2', 'd': '2', 's': '2','z':'2','n':'2', 'N': '2', 'G': '2', 'S': '3', 'Z': '3', 'k': '4', 'g': '4', 'R': '4', 'l': '4', 'j':'*', '8':'*', 'w':'*'}
rounding = {'O':'0','y': '0', 'o': '0', '2': '0', 'u': '0', 'a': '1', '5': '1', 'i': '1', '1': '1', 'e': '1', 'E': '1','@': '2','§': '2', '9': '2'}


def lpc_code(word):
    code_word = ""
    syll_lst = get_LPC_p(word).split("-")
    for syll in syll_lst:
        if len(syll) == 1:
            if syll in configuration:
                code_word += configuration[syll]
                code_word += poa[syll]
                code_word += '0/'
            else:
                code_word += '5/'
                code_word += position[syll]
                code_word += rounding[syll]
        else:
            for i in range (0,len(syll)):
                if syll[i] in configuration:
                    code_word += configuration[syll[i]]
                    code_word += poa[syll[i]]
                else:
                    code_word += position[syll[i]]
                    code_word += rounding[syll[i]]
        code_word += '-'
    return code_word[:-1]


In [37]:
print(get_LPC_p('table'))
lpc_code('table')



ta-b-l


'4201-300/-540/'

# mapping all possible combinations, not sure it's usefull but it's nice to have


mat = []
rounding = [0,1,2]
poa = [0,1,2,3,4]
position = [0,1,2,3,4]
shape = [0,1,2,3,4,5,6,7]

for x in rounding:
    for y in poa:
        for i in position:
            for z in shape:
                mat.append([x,y,i,z])
                
mat = np.array(mat)
mat = pd.DataFrame(mat)
#mat.to_csv('cv_combinations.csv')
