# The Code for the Phonological Complexity in Andic Languages

In [57]:
import csv
import json

This piece of code is a word counter

In [58]:
words_for_lang = {}

with open('andic_dicts.csv', encoding='utf8') as csv_database:
    database = csv.DictReader(csv_database, delimiter=',')
    
    for row in database:
        lang = row['glottocode']
        mean = row['id_meaning']
        if mean == '1':
            if lang == 'botl1242':
                lang = lang + ' ' + row['reference']
                
            if words_for_lang.get(lang):
                words_for_lang[lang] += 1
            else:
                words_for_lang[lang] = 1
        
words_for_lang

{'akhv1239': 8006,
 'andi1255': 6144,
 'bagv1239': 7893,
 'botl1242 Saidova, Abusov 2012': 6815,
 'botl1242 Alekseev 2006': 8453,
 'cham1309': 7085,
 'ghod1238': 5711,
 'kara1474': 5165,
 'tind1238': 7837,
 'toki1238': 218}

## 1. Preprocessing the data

In [59]:
languages = ['akhv1239', 'andi1255', 'bagv1239', 'botl1242', 'cham1309', 'ghod1238', 'kara1474', 'tind1238']
languages2 = ['akhv1239', 'andi1255', 'bagv1239', 'bot6', 'bot12', 'cham1309', 'ghod1238', 'kara1474', 'tind1238']

In [60]:
consonants = ['b', 'bː', 'bʷ',
              'p', 'pː', "p'",
              'd', 'dː', 'dʷ', 'dːʷ',
              't', 'tː', "t'", "t'ː", 'tʷ', "t'ʷ", 'tsːʷ', 'tʲ',
              'ɡ', 'ɡʷ', 'ɡʲ',
              'k', "k'", 'kː', "k'ː", 'kʷ', 'kʷ', "k'ʷ", 'kːʷ', "k'ːʷ", 'kʲ', "kʲ'", 'kʲː',
              'ɢ','ɢʷ', 
              'q', "q'", 'qː', "q'ː", 'qʷ', "q'ʷ", 'qːʷ', "q'ːʷ",
              'ʔ', 'ʔʷ', 
              'dz',
              'ts', 'tsː', "ts'", "tsː'", "ts'ː", 'tsʷ', "ts'ʷ", "ts'ːʷ",
              'dʒ', 'dʒʷ', 'tʃ', 'tʃː', "tʃ'", "tʃː'", "tʃ'ː", 'tʃʷ', "tʃ'ʷ", 'tʃːʷ', "tʃ'ːʷ",
              'tɬ', "tɬ'", 'tɬː', "tɬː'", "tɬ'ː", 'tɬʷ', "tɬ'ʷ", 'tɬːʷ', "tɬ'ːʷ",
              'qχ', "qχ'", 'qχː', "qχː'", 'qχʷ', "qχ'ʷ",
              'f', 
              'z', 'zː', 'zʷ',
              's', 'sː', "s'", "s'ː", 'sʷ', "s'ʷ", 'sːʷ', "s'ːʷ",
              'ʒ', 'ʒʷ',
              'ʃ', 'ʃː', "ʃ'ː", 'ʃʷ', 'ʃːʷ', "ʃ'ːʷ",
              'ɬ', 'ɬː', 'ɬʷ', 'ɬːʷ', 'ɬʲ',
              'x', 'xː', 'xʷ', 'xːʷ', 'xʲ', 
              'ʁ', 'ʁʷ',
              'χ', 'χː', 'χʷ', 'χːʷ',
              'ʕ', 'ʕʷ',
              'ħ', 'ħː', 'ħʷ',
              'h', 'hʷ',
              'm', 'mː',
              'w',
              'n', 'nː', 'nʷ', 'nʲ',
              'r', 'rʷ', 'rʲ', 
              'l', 'lː', 'lʷ', 'lʲ',
              'j']

vowels = ['a', 'aː','ˌa', 'ˌaː', "a'", 'ã', 'ãː',
          'i', 'ˌi', 'iː', 'ĩ', 'ĩː',
          'e', 'ˌe', 'eː', 'ẽ', 'ẽː',
          'o', 'ˌo', 'oː', 'õ', 'õː',
          'u', 'uː', 'ˌu', "u'", 'ũ', 'ũː']


Preprocessing for the level of phonetics

In [61]:
with open('andic_dicts.csv', encoding='utf8') as csv_database:
    database = csv.DictReader(csv_database, delimiter=',')

    i = 0
    phon_dict = {}
    b6 = {}
    b12 = {}

    for row in database:
        if not row['bor']:
            
            if row['glottocode'] != languages[i]:
                if languages[i] != 'botl1242': 
                    with open(languages[i] + 'phonemes.json', 'w', encoding='utf-8') as f:
                        json.dump(phon_dict, f, ensure_ascii=False, indent=2)
                        phon_dict = {}
                        i += 1
                else:
                    with open('bot12phonemes.json', 'w', encoding='utf-8') as f:
                        json.dump(b12, f, ensure_ascii=False, indent=2)

                    with open('bot6phonemes.json', 'w', encoding='utf-8') as f:
                        json.dump(b6, f, ensure_ascii=False, indent=2)
                        i += 1
                    
            if row['glottocode'] == 'toki1238':
                break
                
            if row['glottocode'] == 'botl1242':  
                ref = row['reference']
                phonemes = row['ipa'].split('-')
                for sound in phonemes:
                    sound = sound.replace('(', '')
                    sound = sound.replace(')', '')
                    sound = sound.replace ("ˌ", '')
                    
                    if len(sound) == 1:
                        if ref == 'Saidova, Abusov 2012':
                            if b12.get(sound):
                                b12[sound] += 1
                            else:
                                b12[sound] = 1
                        else:
                            if b6.get(sound):
                                b6[sound] += 1
                            else:
                                b6[sound] = 1
                                
                    if len(sound) > 1:
                        sound2 = sound.split(' ')
                        for s in sound2:
                            if s[0] == "'":
                                s = s[1:]
                            if ref == 'Saidova, Abusov 2012':
                                if b12.get(s):
                                    b12[s] += 1
                                else:
                                    b12[s] = 1
                            else:
                                if b6.get(s):
                                    b6[s] += 1
                                else:
                                    b6[s] = 1
            else:
                phonemes = row['ipa'].split('-')
                for sound in phonemes:
                    sound = sound.replace('(', '')
                    sound = sound.replace(')', '')
                    sound = sound.replace ("ˌ", '')
                    if len(sound) == 1:
                        if phon_dict.get(sound):
                            phon_dict[sound] += 1
                        else:
                            phon_dict[sound] = 1
                                
                    if len(sound) > 1:
                        sound2 = sound.split(' ')
                        for s in sound2:
                            if s[0] == "'":
                                s = s[1:]
                            if phon_dict.get(s):
                                phon_dict[s] += 1
                            else:
                                phon_dict[s] = 1

Preprocessing for the level of phonology

List of consonants by place of articulation

In [62]:
bilabial = ['b', 'bː', 'bʷ',
            'p', 'pː', "p'",
            'm', 'mː',
            'w',]

dental = ['d', 'dː', 'dʷ', 'dːʷ',
          't', 'tː', "t'", "t'ː", 'tʷ', "t'ʷ", 'tsːʷ', 'tʲ',
          'dz',
          'ts', 'tsː', "ts'", "tsː'", "ts'ː", 'tsʷ', "ts'ʷ", "ts'ːʷ",
          'z', 'zː', 'zʷ',
          's', 'sː', "s'", "s'ː", 'sʷ', "s'ʷ", 'sːʷ', "s'ːʷ",
          'n', 'nː', 'nʷ', 'nʲ']

alveolar = ['dʒ', 'dʒʷ',
            'tʃ', 'tʃː', "tʃ'", "tʃː'", "tʃ'ː",
            'ʒ', 'ʒʷ',
            'ʃ', 'ʃː', "ʃ'ː", 'ʃʷ', 'ʃːʷ', "ʃ'ːʷ",
            'r', 'rʷ', 'rʲ']

lateral = ['tɬ', "tɬ'", 'tɬː', "tɬː'", "tɬ'ː", 'tɬʷ', "tɬ'ʷ", 'tɬːʷ', "tɬ'ːʷ",
           'ɬ', 'ɬː', 'ɬʷ', 'ɬːʷ', 'ɬʲ',
           'l', 'lː', 'lʷ', 'lʲ']

palatal = ['j']

velar = ['ɡ', 'ɡʷ', 'ɡʲ',
         'k', "k'", 'kː', "k'ː", 'kʷ', 'kʷ', "k'ʷ", 'kːʷ', "k'ːʷ", 'kʲ', "kʲ'", 'kʲː',
         'x', 'xː', 'xʷ', 'xːʷ', 'xʲ']

uvular = ['ɢ','ɢʷ',
          'q', "q'", 'qː', "q'ː", 'qʷ', "q'ʷ", 'qːʷ', "q'ːʷ",
          'qχ', "qχ'", 'qχː', "qχː'", 'qχʷ', "qχ'ʷ",
          'ʁ', 'ʁʷ',
          'χ', 'χː', 'χʷ', 'χːʷ']

pharyngeal = ['ʕ', 'ʕʷ',
              'ħ', 'ħː', 'ħʷ']

laryngeal = ['ʔ', 'ʔʷ',
             'h', 'hʷ']

places = [bilabial, dental, alveolar, lateral, palatal, velar, uvular, pharyngeal, laryngeal]
places_id = {0: 'bilabial', 1: 'dental', 2: 'alveolar', 3: 'lateral', 4: 'palatal', 5: 'velar',
             6: 'uvular', 7: 'pharyngeal', 8: 'laryngeal'}

Lists of consonants by manner of articulation

In [63]:
stops = ['b', 'bː', 'bʷ',
         'p', 'pː', "p'",
         'd', 'dː', 'dʷ', 'dːʷ',
         't', 'tː', "t'", "t'ː", 'tʷ', "t'ʷ", 'tsːʷ', 'tʲ',
         'ɡ', 'ɡʷ', 'ɡʲ',
         'k', "k'", 'kː', "k'ː", 'kʷ', 'kʷ', "k'ʷ", 'kːʷ', "k'ːʷ", 'kʲ', "kʲ'", 'kʲː',
         'ɢ','ɢʷ', 
         'q', "q'", 'qː', "q'ː", 'qʷ', "q'ʷ", 'qːʷ', "q'ːʷ",
         'ʔ', 'ʔʷ']

affricates = ['dz',
              'ts', 'tsː', "ts'", "tsː'", "ts'ː", 'tsʷ', "ts'ʷ", "ts'ːʷ",
              'dʒ', 'dʒʷ',
              'tʃ', 'tʃː', "tʃ'", "tʃː'", "tʃ'ː", 'tʃʷ', "tʃ'ʷ", 'tʃːʷ', "tʃ'ːʷ",
              'tɬ', "tɬ'", 'tɬː', "tɬː'", "tɬ'ː", 'tɬʷ', "tɬ'ʷ", 'tɬːʷ', "tɬ'ːʷ",
              'qχ', "qχ'", 'qχː', "qχː'", 'qχʷ', "qχ'ʷ"]

fricative = ['f', 
             'z', 'zː', 'zʷ',
             's', 'sː', "s'", "s'ː", 'sʷ', "s'ʷ", 'sːʷ', "s'ːʷ",
             'ʒ', 'ʒʷ',
             'ʃ', 'ʃː', "ʃ'ː", 'ʃʷ', 'ʃːʷ', "ʃ'ːʷ",
             'ɬ', 'ɬː', 'ɬʷ', 'ɬːʷ', 'ɬʲ',
             'x', 'xː', 'xʷ', 'xːʷ', 'xʲ', 
             'ʁ', 'ʁʷ',
             'χ', 'χː', 'χʷ', 'χːʷ',
             'ʕ', 'ʕʷ',
             'ħ', 'ħː', 'ħʷ',
             'h', 'hʷ']

sonorant = ['m', 'mː',
            'w',
            'n', 'nː', 'nʷ', 'nʲ',
            'r', 'rʷ', 'rʲ', 
            'l', 'lː', 'lʷ', 'lʲ',
            'j']

manners = [stops, affricates, fricative, sonorant]
manners_id = {0: 'stops', 1: 'affricates', 2: 'fricative', 3: 'sonorant'}

This piece of code is making a file where are written all the positions where consonants are united by its manner of articulation

In [74]:
with open ('andic_dicts.csv', encoding='utf8') as csv_database:
    database = csv.DictReader(csv_database, delimiter=',')
    
    i = 0
    place_dict = {}

    for row in database:
        if not row['bor']:
            if row['glottocode'] != languages[i]:
                with open(languages[i] + 'places by manners.json', 'w', encoding='utf-8') as f:
                    json.dump(place_dict, f, ensure_ascii=False, indent=2)
                i += 1
                place_dict = {}
                
            if row['glottocode'] == 'toki1238':
                break
            
            phonemes = row['ipa'].split('-')
            for j in range(0, len(phonemes)):
                sound = phonemes[j]
                sound = sound.replace('(', '')
                sound = sound.replace(')', '')
                sound = sound.replace ("ˌ", '')
                if len(sound) == 1:
                    for y in range(0, len(manners)):
                        if manners[y].count(sound) > 0:
                            if j != 0:
                                if len(phonemes[j-1]) == 1:
                                    if phonemes[j-1] in consonants:
                                        p = 'C_'
                                    else:
                                        p = 'V_'
                                else:
                                    phons = phonemes[j-1].split(' ')
                                    if phons[0] == "'":
                                        phons = phons[1:]
                                    if phons[0] in consonants:
                                        p = 'C_'
                                    else:
                                        p = 'V_'
                            else:
                                p = '#_'
                                    
                            if j != len(phonemes) - 1:
                                if len(phonemes[j+1]) == 1:
                                    if phonemes[j+1] in consonants:
                                        p += 'C'
                                    else:
                                        p += 'V'
                                else:
                                    phons = phonemes[j+1].split(' ')
                                    if phons[0] == "'":
                                        phons = phons[1:]
                                    if phons[0] in consonants:
                                        p += 'C'
                                    else:
                                        p += 'V'
                            else:
                                p += '#'
                                    
                            if place_dict.get(manners_id[y]):
                                if place_dict[manners_id[y]].count(p) == 0:
                                    place_dict[manners_id[y]].append(p)
                            else:
                                place_dict[manners_id[y]] = [p]
                            break
                            
                if len(sound) > 1:
                    sound2 = sound.split(' ')
                    if len(sound2) != 1:
                        for u in range(0, len(sound2)):
                            for y in range(0, len(manners)):
                                if sound2[u][0] == "'":
                                    sound2[u] = sound2[u][1:]
                                if manners[y].count(sound2[u]) > 0:
                                    if u != 0:
                                        if sound2[u-1] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                    else:
                                        p = '#_'
                                
                                    if u != len(sound2) - 1:
                                        if sound2[u+1] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                    else:
                                        p += '#'
                                    
                                    if place_dict.get(manners_id[y]):
                                        if place_dict[manners_id[y]].count(p) == 0:
                                            place_dict[manners_id[y]].append(p)
                                    else:
                                        place_dict[manners_id[y]] = [p]
                                    break
                    else:
                        sound = sound2[0]
                        if sound[0] == "'":
                            sound = sound[1:]
                        for y in range(0, len(manners)):
                            if manners[y].count(sound) > 0:
                                if j != 0:
                                    if len(phonemes[j-1]) == 1:
                                        if phonemes[j-1] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                    else:
                                        phons = phonemes[j-1].split(' ')
                                        if phons[0] == "'":
                                            phons = phons[1:]
                                        if phons[0] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                else:
                                    p = '#_'
                                    
                                if j != len(phonemes) - 1:
                                    if len(phonemes[j+1]) == 1:
                                        if phonemes[j+1] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                    else:
                                        phons = phonemes[j+1].split(' ')
                                        if phons[0] == "'":
                                            phons = phons[1:]
                                        if phons[0] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                else:
                                    p += '#'
                                    
                                if place_dict.get(manners_id[y]):
                                    if place_dict[manners_id[y]].count(p) == 0:
                                        place_dict[manners_id[y]].append(p)
                                else:
                                    place_dict[manners_id[y]] = [p]
                                break

### The code below is only for Botlikh

In [73]:
with open ('andic_dicts.csv', encoding='utf8') as csv_database:
    database = csv.DictReader(csv_database, delimiter=',')
    
    b6 = {}
    b12 = {}

    for row in database:
        if not row['bor']:
            if row['glottocode'] == 'botl1242':
                ref = row['reference']
                phonemes = row['ipa'].split('-')
                for j in range(0, len(phonemes)):
                    sound = phonemes[j]
                    sound = sound.replace('(', '')
                    sound = sound.replace(')', '')
                    sound = sound.replace ("ˌ", '')
                    
                    if len(sound) == 1:
                        for y in range(0, len(manners)):
                            if manners[y].count(sound) > 0:
                                if j != 0:
                                    if len(phonemes[j-1]) == 1:
                                        if phonemes[j-1] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                    else:
                                        phons = phonemes[j-1].split(' ')
                                        if phons[0] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                else:
                                    p = '#_'
                                    
                                if j != len(phonemes) - 1:
                                    if len(phonemes[j+1]) == 1:
                                        if phonemes[j+1] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                    else:
                                        phons = phonemes[j+1].split(' ')
                                        if phons[0] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                else:
                                    p += '#'
                                    
                                if ref == 'Saidova, Abusov 2012':
                                    if b12.get(manners_id[y]):
                                        if b12[manners_id[y]].count(p) == 0:
                                            b12[manners_id[y]].append(p)
                                    else:
                                        b12[manners_id[y]] = [p]
                                else:
                                    if b6.get(manners_id[y]):
                                        if b6[manners_id[y]].count(p) == 0:
                                            b6[manners_id[y]].append(p)
                                    else:
                                        b6[manners_id[y]] = [p]
                                break
                            
                    if len(sound) > 1:
                        sound2 = sound.split(' ')
                        if len(sound2) != 1:
                            for u in range(0, len(sound2)):
                                for y in range(0, len(manners)):
                                    if manners[y].count(sound2[u]) > 0:
                                        if u != 0:
                                            if sound2[u-1] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                        else:
                                            p = '#_'
                                
                                        if u != len(sound2) - 1:
                                            if sound2[u+1] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                        else:
                                            p += '#'
                                            
                                        if ref == 'Saidova, Abusov 2012':
                                            if b12.get(manners_id[y]):
                                                if b12[manners_id[y]].count(p) == 0:
                                                    b12[manners_id[y]].append(p)
                                                else:
                                                    b12[manners_id[y]] = [p]
                                            else:
                                                if b6.get(manners_id[y]):
                                                    if b6[manners_id[y]].count(p) == 0:
                                                        b6[manners_id[y]].append(p)
                                        else:
                                            b6[manners_id[y]] = [p]
                                        break
                        else:
                            sound = sound2[0]
                            for y in range(0, len(manners)):
                                if manners[y].count(sound) > 0:
                                    if j != 0:
                                        if len(phonemes[j-1]) == 1:
                                            if phonemes[j-1] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                        else:
                                            phons = phonemes[j-1].split(' ')
                                            if phons[0] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                    else:
                                        p = '#_'
                                    
                                    if j != len(phonemes) - 1:
                                        if len(phonemes[j+1]) == 1:
                                            if phonemes[j+1] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                        else:
                                            phons = phonemes[j+1].split(' ')
                                            if phons[0] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                    else:
                                        p += '#'
                                    
                                    if ref == 'Saidova, Abusov 2012':
                                        if b12.get(manners_id[y]):
                                            if b12[manners_id[y]].count(p) == 0:
                                                b12[manners_id[y]].append(p)
                                            else:
                                                b12[manners_id[y]] = [p]
                                        else:
                                            if b6.get(manners_id[y]):
                                                if b6[manners_id[y]].count(p) == 0:
                                                    b6[manners_id[y]].append(p)
                                    else:
                                        b6[manners_id[y]] = [p]
                                    break
    with open('bot6places by manners.json', 'w', encoding='utf-8') as f:
        json.dump(b6, f, ensure_ascii=False, indent=2)
    with open('bot12places by manners.json', 'w', encoding='utf-8') as f:
        json.dump(b12, f, ensure_ascii=False, indent=2)

help

In [78]:
with open ('andic_dicts.csv', encoding='utf8') as csv_database:
    database = csv.DictReader(csv_database, delimiter=',')
    
    i = 0
    place_dict = {}

    for row in database:
        if not row['bor']:
            if row['reference'] == 'Saidova, Abusov 2012':
                phonemes = row['ipa'].split('-')
                for j in range(0, len(phonemes)):
                    sound = phonemes[j]
                    sound = sound.replace('(', '')
                    sound = sound.replace(')', '')
                    sound = sound.replace ("ˌ", '')
                    if len(sound) == 1:
                        for y in range(0, len(manners)):
                            if manners[y].count(sound) > 0:
                                if j != 0:
                                    if len(phonemes[j-1]) == 1:
                                        if phonemes[j-1] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                    else:
                                        phons = phonemes[j-1].split(' ')
                                        if phons[0] == "'":
                                            phons = phons[1:]
                                        if phons[0] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                else:
                                    p = '#_'
                                    
                                if j != len(phonemes) - 1:
                                    if len(phonemes[j+1]) == 1:
                                        if phonemes[j+1] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                    else:
                                        phons = phonemes[j+1].split(' ')
                                        if phons[0] == "'":
                                            phons = phons[1:]
                                        if phons[0] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                else:
                                    p += '#'
                                    
                                if place_dict.get(manners_id[y]):
                                    if place_dict[manners_id[y]].count(p) == 0:
                                        place_dict[manners_id[y]].append(p)
                                else:
                                    place_dict[manners_id[y]] = [p]
                                break
                            
                    if len(sound) > 1:
                        sound2 = sound.split(' ')
                        if len(sound2) != 1:
                            for u in range(0, len(sound2)):
                                for y in range(0, len(manners)):
                                    if sound2[u][0] == "'":
                                        sound2[u] = sound2[u][1:]
                                    if manners[y].count(sound2[u]) > 0:
                                        if u != 0:
                                            if sound2[u-1] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                        else:
                                            p = '#_'
                                
                                        if u != len(sound2) - 1:
                                            if sound2[u+1] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                        else:
                                            p += '#'
                                    
                                        if place_dict.get(manners_id[y]):
                                            if place_dict[manners_id[y]].count(p) == 0:
                                                place_dict[manners_id[y]].append(p)
                                        else:
                                            place_dict[manners_id[y]] = [p]
                                        break
                        else:
                            sound = sound2[0]
                            if sound[0] == "'":
                                sound = sound[1:]
                            for y in range(0, len(manners)):
                                if manners[y].count(sound) > 0:
                                    if j != 0:
                                        if len(phonemes[j-1]) == 1:
                                            if phonemes[j-1] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                        else:
                                            phons = phonemes[j-1].split(' ')
                                            if phons[0] == "'":
                                                phons = phons[1:]
                                            if phons[0] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                    else:
                                        p = '#_'
                                    
                                    if j != len(phonemes) - 1:
                                        if len(phonemes[j+1]) == 1:
                                            if phonemes[j+1] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                        else:
                                            phons = phonemes[j+1].split(' ')
                                            if phons[0] == "'":
                                                phons = phons[1:]
                                            if phons[0] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                    else:
                                        p += '#'
                                    
                                    if place_dict.get(manners_id[y]):
                                        if place_dict[manners_id[y]].count(p) == 0:
                                            place_dict[manners_id[y]].append(p)
                                    else:
                                        place_dict[manners_id[y]] = [p]
                                    break
    with open('bot12places by manners.json', 'w', encoding='utf-8') as f:
        json.dump(b12, f, ensure_ascii=False, indent=2)

This piece of code is making a file where are written all the positions where consonants are united by its place of articulation

In [68]:
with open ('andic_dicts.csv', encoding='utf8') as csv_database:
    database = csv.DictReader(csv_database, delimiter=',')

    i = 0
    place_dict = {}

    for row in database:
        if not row['bor']:
            if row['glottocode'] == 'botl1242':
                continue
            if row['glottocode'] != languages[i]:
                with open(languages[i] + 'places by places.json', 'w', encoding='utf-8') as f:
                    json.dump(place_dict, f, ensure_ascii=False, indent=2)
                i += 1
                place_dict = {}
                
            if row['glottocode'] == 'toki1238':
                break
                
            phonemes = row['ipa'].split('-')
            for j in range(0, len(phonemes)):
                sound = phonemes[j]
                sound = sound.replace('(', '')
                sound = sound.replace(')', '')
                sound = sound.replace ("ˌ", '')
                if len(sound) == 1:
                    for y in range(0, len(places)):
                        if places[y].count(sound) > 0:
                            if j != 0:
                                if len(phonemes[j-1]) == 1:
                                    if phonemes[j-1] in consonants:
                                        p = 'C_'
                                    else:
                                        p = 'V_'
                                else:
                                    phons = phonemes[j-1].split(' ')
                                    if phons[0] == "'":
                                        phons = phons[1:]
                                    if phons[0] in consonants:
                                        p = 'C_'
                                    else:
                                        p = 'V_'
                            else:
                                p = '#_'
                                    
                            if j != len(phonemes) - 1:
                                if len(phonemes[j+1]) == 1:
                                    if phonemes[j+1] in consonants:
                                        p += 'C'
                                    else:
                                        p += 'V'
                                else:
                                    phons = phonemes[j+1].split(' ')
                                    if phons[0] == "'":
                                        phons = phons[1:]
                                    if phons[0] in consonants:
                                        p += 'C'
                                    else:
                                        p += 'V'
                            else:
                                p += '#'
                                    
                            if place_dict.get(places_id[y]):
                                if place_dict[places_id[y]].count(p) == 0:
                                    place_dict[places_id[y]].append(p)
                            else:
                                place_dict[places_id[y]] = [p]
                            break
                            
                if len(sound) > 1:
                    sound2 = sound.split(' ')
                    if len(sound2) != 1:
                        for u in range(0, len(sound2)):
                            for y in range(0, len(places)):
                                if sound2[u][0] == "'":
                                    sound2[u] = sound2[u][1:]
                                if places[y].count(sound2[u]) > 0:
                                    if u != 0:
                                        if sound2[u-1] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                    else:
                                        p = '#_'
                                
                                    if u != len(sound2) - 1:
                                        if sound2[u+1] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                    else:
                                        p += '#'
                                    
                                    if place_dict.get(places_id[y]):
                                        if place_dict[places_id[y]].count(p) == 0:
                                            place_dict[places_id[y]].append(p)
                                    else:
                                        place_dict[places_id[y]] = [p]
                                    break
                    else:
                        sound = sound2[0]
                        for y in range(0, len(places)):
                            if places[y].count(sound) > 0:
                                if j != 0:
                                    if len(phonemes[j-1]) == 1:
                                        if phonemes[j-1] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                    else:
                                        phons = phonemes[j-1].split(' ')
                                        if phons[0] == "'":
                                            phons = phons[1:]
                                        if phons[0] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                else:
                                    p = '#_'
                                    
                                if j != len(phonemes) - 1:
                                    if len(phonemes[j+1]) == 1:
                                        if phonemes[j+1] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                    else:
                                        phons = phonemes[j+1].split(' ')
                                        if phons[0] == "'":
                                            phons = phons[1:]
                                        if phons[0] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                else:
                                    p += '#'
                                    
                                if place_dict.get(places_id[y]):
                                    if place_dict[places_id[y]].count(p) == 0:
                                            place_dict[places_id[y]].append(p)
                                else:
                                    place_dict[places_id[y]] = [p]
                                break

### This code is only for Botlikh

In [69]:
with open ('andic_dicts.csv', encoding='utf8') as csv_database:
    database = csv.DictReader(csv_database, delimiter=',')

    b6 = {}
    b12 = {}

    for row in database:
        if not row['bor']:
            if row['glottocode'] == 'botl1242':
                phonemes = row['ipa'].split('-')
                for j in range(0, len(phonemes)):
                    sound = phonemes[j]
                    sound = sound.replace('(', '')
                    sound = sound.replace(')', '')
                    sound = sound.replace ("ˌ", '')
                    if len(sound) == 1:
                        for y in range(0, len(places)):
                            if places[y].count(sound) > 0:
                                if j != 0:
                                    if len(phonemes[j-1]) == 1:
                                        if phonemes[j-1] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                    else:
                                        phons = phonemes[j-1].split(' ')
                                        if phons[0] == "'":
                                            phons[0] = phons[0][1:]
                                        if phons[0] in consonants:
                                            p = 'C_'
                                        else:
                                            p = 'V_'
                                else:
                                    p = '#_'
                                    
                                if j != len(phonemes) - 1:
                                    if len(phonemes[j+1]) == 1:
                                        if phonemes[j+1] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                    else:
                                        phons = phonemes[j+1].split(' ')
                                        if phons[0] == "'":
                                            phons = phons[1:]
                                        if phons[0] in consonants:
                                            p += 'C'
                                        else:
                                            p += 'V'
                                else:
                                    p += '#'
                                    
                            if ref == 'Saidova, Abusov 2012':
                                if b12.get(places_id[y]):
                                    if b12[places_id[y]].count(p) == 0:
                                        b12[places_id[y]].append(p)
                                else:
                                    b12[places_id[y]] = [p]
                            else:
                                if b6.get(places_id[y]):
                                    if b6[places_id[y]].count(p) == 0:
                                        b6[places_id[y]].append(p)
                                else:
                                    b6[places_id[y]] = [p]
                                break
                            
                    if len(sound) > 1:
                        sound2 = sound.split(' ')
                        if len(sound2) != 1:
                            for u in range(0, len(sound2)):
                                for y in range(0, len(places)):
                                    if sound2[u][0] == "'":
                                            sound2[u] = sound2[u][1:]
                                    if places[y].count(sound2[u]) > 0:
                                        if u != 0:
                                            if sound2[u-1] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                        else:
                                            p = '#_'
                                
                                        if u != len(sound2) - 1:
                                            if sound2[u+1] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                        else:
                                            p += '#'
                                    
                                    if ref == 'Saidova, Abusov 2012':
                                        if b12.get(places_id[y]):
                                            if b12[places_id[y]].count(p) == 0:
                                                b12[places_id[y]].append(p)
                                        else:
                                            b12[places_id[y]] = [p]
                                    else:
                                        if b6.get(places_id[y]):
                                            if b6[places_id[y]].count(p) == 0:
                                                b6[places_id[y]].append(p)
                                        else:
                                            b6[places_id[y]] = [p]
                                    break
                        else:
                            sound = sound2[0]
                            if sound[0] == "'":
                                sound = sound[1:]
                            for y in range(0, len(places)):
                                if places[y].count(sound) > 0:
                                    if j != 0:
                                        if len(phonemes[j-1]) == 1:
                                            if phonemes[j-1] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                        else:
                                            phons = phonemes[j-1].split(' ')
                                            if phons[0] == "'":
                                                phons = phons[1:]
                                            if phons[0] in consonants:
                                                p = 'C_'
                                            else:
                                                p = 'V_'
                                    else:
                                        p = '#_'
                                    
                                    if j != len(phonemes) - 1:
                                        if len(phonemes[j+1]) == 1:
                                            if phonemes[j+1] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                        else:
                                            phons = phonemes[j+1].split(' ')
                                            if phons[0] == "'":
                                                phons = phons[1:]
                                            if phons[0] in consonants:
                                                p += 'C'
                                            else:
                                                p += 'V'
                                    else:
                                        p += '#'
                                    
                                if ref == 'Saidova, Abusov 2012':
                                    if b12.get(places_id[y]):
                                        if b12[places_id[y]].count(p) == 0:
                                            b12[places_id[y]].append(p)
                                    else:
                                        b12[places_id[y]] = [p]
                                else:
                                    if b6.get(places_id[y]):
                                        if b6[places_id[y]].count(p) == 0:
                                            b6[places_id[y]].append(p)
                                    else:
                                        b6[places_id[y]] = [p]
                                break
    with open('bot6places by places.json', 'w', encoding='utf-8') as f:
        json.dump(b6, f, ensure_ascii=False, indent=2)
    with open('bot12places by places.json', 'w', encoding='utf-8') as f:
        json.dump(b12, f, ensure_ascii=False, indent=2)

## Some statistics of Phonetics

In [70]:
all_phonemes = {}

for language in languages2:
    cons = []
    vow = []
    with open(language + 'phonemes.json', encoding='utf8') as f:
        ph_dct = json.load(f)
    for sound, amount in ph_dct.items():
        if sound in consonants:
            cons.append(sound)
        elif sound in vowels:
            vow.append(sound)
    all_phonemes[language] = {'consonants': cons, 'vowels': vow}

with open('phonemes by language.json', 'w', encoding='utf8') as f:
    json.dump(all_phonemes, f, ensure_ascii=False, indent=2)

Here we are making list of number of languages in which every phoneme appears just in case to look at the distribution

In [71]:
c_num = {}
v_num = {}
c_num_sort = {}
v_num_sort = {}

with open('phonemes by language.json', encoding='utf8') as f:
    dct = json.load(f)
    
for lang in dct.keys():
    for kind, sound in dct[lang].items():
        if kind == 'consonants':
            for s in sound:
                if c_num.get(s):
                    c_num[s] += 1
                else:
                    c_num[s] = 1
        if kind == 'vowels':
            for s in sound:
                if v_num.get(s):
                    v_num[s] += 1
                else:
                    v_num[s] = 1
                    
sort = sorted(c_num, key=c_num.get, reverse=True)
for v in sort:
    c_num_sort[v] = c_num[v]
    
sort = sorted(v_num, key=v_num.get, reverse=True)
for v in sort:
    v_num_sort[v] = v_num[v]
    
print(c_num_sort)
print(v_num_sort)

{'b': 9, 'ʒ': 9, 'r': 9, 'χ': 9, 'ʃ': 9, 'w': 9, 'd': 9, 'ɬ': 9, 'l': 9, 'j': 9, 'ʁ': 9, 'z': 9, 'h': 9, 'ɡ': 9, 'n': 9, 'k': 9, 't': 9, 'sː': 9, "q'": 9, 'χʷ': 9, "k'": 9, 'ɬː': 9, 'ʔ': 9, 'χː': 9, 'm': 9, 'tʃ': 9, "tʃ'": 9, "t'": 9, "k'ʷ": 9, 'ʃː': 9, 'ʕ': 9, 'q': 9, 'ts': 9, 'ʁʷ': 9, 'χːʷ': 9, 'ɡʷ': 9, "ts'": 9, 'tʃː': 9, 's': 9, 'kʷ': 9, 'p': 9, 'tɬ': 8, 'ħ': 8, "tɬ'": 8, 'qʷ': 8, 'tʃʷ': 8, "q'ʷ": 8, 'kː': 8, "tʃ'ʷ": 8, 'dʒ': 8, 'zʷ': 8, 'x': 8, "k'ː": 7, "p'": 7, 'tsː': 7, "k'ːʷ": 7, "t'ʷ": 7, "tʃ'ː": 7, "tɬ'ʷ": 7, 'ʒʷ': 6, 'dʷ': 6, "ts'ː": 6, 'sːʷ': 6, 'tʷ': 6, 'hʷ': 6, 'ʃːʷ': 6, 'xʷ': 6, "ts'ːʷ": 5, 'tɬʷ': 5, 'dː': 5, 'sʷ': 5, 'tsːʷ': 4, 'lː': 4, 'ħʷ': 4, "tʃ'ːʷ": 4, "ts'ʷ": 4, 'kːʷ': 4, 'tsʷ': 4, 'ʃʷ': 3, 'xː': 3, 'ɬːʷ': 3, 'ʔʷ': 3, 'mː': 3, 'bː': 3, "kʲ'": 3, 'nː': 3, 'kʲ': 3, "q'ː": 2, 'tɬː': 2, 'xːʷ': 2, 'ɬʷ': 2, 'tː': 2, 'pː': 2, "s'ː": 2, "s'": 2, "t'ː": 2, 'rʷ': 2, 'nʷ': 2, 'lʷ': 2, 'zː': 2, 'lʲ': 2, 'ɡʲ': 2, 'xʲ': 2, 'kʲː': 2, 'tʃːʷ': 2, 'bʷ': 2, 'qː': 1, "tɬ'ː": 1, "tɬ'

Here we count the number of phonemes in each language to figure out what languages are most complex at the level of phonetics. 

In [72]:
with open('phonemes by language.json', encoding='utf8') as f:
    phon = json.load(f)
    
for language in phon.keys():
    c = len(phon[language]['consonants'])
    v = len(phon[language]['vowels'])
    print(language, 'consonants:', c, 'vowels', v)

akhv1239 consonants: 85 vowels 18
andi1255 consonants: 67 vowels 5
bagv1239 consonants: 84 vowels 18
bot6 consonants: 66 vowels 13
bot12 consonants: 65 vowels 13
cham1309 consonants: 89 vowels 19
ghod1238 consonants: 66 vowels 14
kara1474 consonants: 79 vowels 17
tind1238 consonants: 89 vowels 20
