# Phonological Distance in Kenyan Sign Language

This code aims to extract minimal pairs from a dataset of sign language coded with 43 feature variables.

**Block 1**: Import the "pandas" Python library in order to generate a dataframe ("df")

In [None]:
# block 1
import pandas as pd

**Block 2**: Open the dataset (CSV file of phonological coding) as a pandas dataframe and show the first 5 rows of the data (to visually confirm it is the correct data)

In [None]:
# block 2
df = pd.read_csv('hope_data_43_variables_Sep06.csv',dtype=str)
df.head()
num_sign = len(df)

**Block 3**: Specify optional parameters in the phonological coding:

- Select for comparison: either variables related to **whole handshape** or **handshape features** (5 features per handshape)

- Select the Orientation variables (Variable #20, #21), or disregard them

- Apply one of the two conditionalities for dynamic handshapes (Variable #8, #32)

In [None]:
# block 3
whole_handshape = True
orientation = True
handshape_movement = "8a"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Block 4**

- This block creates a duplicate list "feature strings" of phonological values for each sign (aka "gloss"), and this duplicate list will form the output. The original imported data remains in a separate dataset ("df")

- A "feature string" is a nested list that consists of the index of the variable (the index is created as byproduct of the scripting) and its value (e.g., the value "03" in Variable #7 refers to a fist handshape).

- The elements of these strings in this duplicate list will be changed after applying the conditionalities.

In [None]:
# block 4(a)
# feature string for gloss1
sign_list_gloss1 = []
data = pd.DataFrame(df)
for index in range(0,num_sign):
    variable_list = []
    sign_name = data.iloc[index,1]
    sign_string = data.iloc[index, 2:].values.tolist()
    sign_string2 = []
    for idx, feature in enumerate(sign_string):
      feature_list = []
      feature_list.append(idx)
      feature_list.append(feature)
      sign_string2.append(feature_list)
    variable_list.append(sign_name)
    variable_list.append(sign_string2)
    sign_list_gloss1.append(variable_list)

In [None]:
# block 4b
# feature string for gloss2
sign_list_gloss2 = []
data2 = pd.DataFrame(df)
for index in range(0,num_sign):
    variable_list = []
    sign_name = data.iloc[index,1]
    sign_string = data.iloc[index, 2:].values.tolist()
    sign_string2 = []
    for idx, feature in enumerate(sign_string):
      feature_list = []
      feature_list.append(idx)
      feature_list.append(feature)
      sign_string2.append(feature_list)
    variable_list.append(sign_name)
    variable_list.append(sign_string2)
    sign_list_gloss2.append(variable_list)

In [None]:
# block 5a
# feature string base for gloss1
sign_list_base_gloss1 = []
data = pd.DataFrame(df)
for index in range(0,num_sign):
    variable_list = []
    sign_name = data.iloc[index,1]
    sign_string = data.iloc[index, 2:].values.tolist()
    sign_string2 = []
    for idx, feature in enumerate(sign_string):
      feature_list = []
      feature_list.append(idx)
      feature_list.append(feature)
      sign_string2.append(feature_list)
    variable_list.append(sign_name)
    variable_list.append(sign_string2)
    sign_list_base_gloss1.append(variable_list)

In [None]:
# block 5b
# feature string base for gloss2
sign_list_base_gloss2 = []
data = pd.DataFrame(df)
for index in range(0,num_sign):
    variable_list = []
    sign_name = data.iloc[index,1]
    sign_string = data.iloc[index, 2:].values.tolist()
    sign_string2 = []
    for idx, feature in enumerate(sign_string):
      feature_list = []
      feature_list.append(idx)
      feature_list.append(feature)
      sign_string2.append(feature_list)
    variable_list.append(sign_name)
    variable_list.append(sign_string2)
    sign_list_base_gloss2.append(variable_list)
print(sign_list_base_gloss2)

[['ABOUT-1', [[0, '2'], [1, 'S'], [2, 'B'], [3, 'A'], [4, 'U'], [5, '0'], [6, '02'], [7, '00'], [8, '02'], [9, 'E'], [10, 'S'], [11, 'N'], [12, 'R'], [13, '0'], [14, 'E'], [15, 'S'], [16, 'N'], [17, 'R'], [18, '0'], [19, 'D'], [20, 'C'], [21, 'N'], [22, '01'], [23, '00'], [24, 'S'], [25, 'P'], [26, 'C'], [27, 'M'], [28, '0'], [29, 'L'], [30, '0'], [31, '0'], [32, '0'], [33, '0'], [34, 'M'], [35, 'E'], [36, '0'], [37, '0'], [38, '0'], [39, 'U'], [40, '0'], [41, '0'], [42, '0']]], ['ABOUT-2', [[0, '2'], [1, 'S'], [2, 'B'], [3, 'A'], [4, 'U'], [5, '0'], [6, '12'], [7, '00'], [8, '12'], [9, 'H'], [10, 'S'], [11, 'N'], [12, 'S'], [13, '0'], [14, 'H'], [15, 'S'], [16, 'N'], [17, 'S'], [18, '0'], [19, 'D'], [20, 'A'], [21, 'N'], [22, '01'], [23, '00'], [24, 'S'], [25, 'P'], [26, 'C'], [27, 'H'], [28, '0'], [29, 'L'], [30, '0'], [31, '0'], [32, '0'], [33, '0'], [34, 'M'], [35, 'E'], [36, '0'], [37, '0'], [38, '0'], [39, 'U'], [40, '0'], [41, '0'], [42, '0']]], ['ABOVE', [[0, '1'], [1, '0'], [2

**Block 6**

- Apply the conditionalities and compare every sign pair in the dataset

- The output includes a .csv file consisting of the number of similarities and differences for each variable for all compared pairs

In [None]:
# block 6
from numpy import e

# define a function to delete certain variable in the sign string
def delete_feature(index2):
  for dex,sign1 in enumerate(sign1_feature):
    if sign1[0] == index2:
      del sign1_feature[dex]
  for dex2,sign2 in enumerate(sign2_feature):
    if sign2[0] == index2:
      del sign2_feature[dex2]
  return sign1_feature
  return sign2_feature

number_of_differences = []
number_of_similarities_40 = []
number_of_similarities = []

gloss1_name = []
gloss2_name = []

different_items_all = []
similar_items_all = []

# loop through each sign, compare it with all of the following signs, apply the conditionalities, and delete the variables that do not qualify the conditionalities
for index in range(0,num_sign):
    sign1_feature_base = sign_list_base_gloss1[index][1]
    sign1_feature_dict = dict(sign_list_gloss1[index][1])
    sign1_name = sign_list_gloss1[index][0]
    for idx in range(index+1,num_sign):
      different_items = []
      different_items_true = []
      difference = 0
      similar_items= []
      similar_items_true =[]
      similar = 0
      sign2_feature_dict = dict(sign_list_gloss2[idx][1])
      sign1_feature = [[key, value] for key, value in sign1_feature_dict.items()]
      sign2_feature_base = sign_list_base_gloss2[idx][1]
      sign2_name = sign_list_gloss2[idx][0]
      sign2_feature = [[key, value] for key, value in sign2_feature_dict.items()]
      # variable 1
      if sign1_feature_base[22][1] == 'W' and sign2_feature_base[22][1] == 'W':
        delete_feature(0)
      if sign1_feature_base[0][1] != 'W' and sign2_feature_base[0][1] == 'W':
        delete_feature(0)
      if sign1_feature_base[0][1] == 'W' and sign2_feature_base[0][1] != 'W':
        delete_feature(0)
      if sign1_feature_base[21][1] == "E" and sign1_feature_base[21][1] != '0':
        similar = similar + 1
        similar_items_true.append(1)
        delete_feature(0)
      if sign1_feature_base[21][1] != "0" and sign1_feature_base[21][1] == 'E':
        similar = similar + 1
        similar_items_true.append(1)
        delete_feature(0)
      # variable 3
      if sign1_feature_base[0][1] not in ['2','E'] or sign2_feature_base[0][1] not in ['2','E']:
        delete_feature(2)
      # variable 4
      if sign1_feature_base[2][1] != 'B' or sign2_feature_base[2][1] != 'B':
        delete_feature(3)
      # variable 5
      if sign1_feature_base[0][1] not in ['2','E','B'] or sign2_feature_base[0][1] not in ['2','E','B']:
        delete_feature(4)
      #variable 8
      if handshape_movement == "8a":
        if sign1_feature_base[7][1] == '00' or sign2_feature_base[7][1] == '00':
          delete_feature(7)
      if handshape_movement == "8b":
        if sign1_feature_base[30][1] != 'H' and sign2_feature_base[30][1] != 'H':
          delete_feature(7)
      # variable 9
      if sign1_feature_base[1][1] == 'S' and sign2_feature_base[1][1] == 'S':
          delete_feature(8)
      if sign1_feature_base[8][1] == '00' and sign2_feature_base[8][1] != '00':
          delete_feature(8)
      if sign1_feature_base[8][1] != '00' and sign2_feature_base[8][1] == '00':
          delete_feature(8)
      # variable 24
      if sign1_feature_base[22][1] != sign2_feature_base[22][1]:
          delete_feature(23)
      if sign1_feature_base[23][1] == "00" and sign2_feature_base[23][1] == "00":
          delete_feature(23)
      # variable 25
      if sign1_feature_base[21][1] not in ['N','H','T','K'] or sign2_feature_base[21][1] not in ['N','H','T','K']:
       delete_feature(24)
      if sign1_feature_base[0][1] == '1'and sign2_feature_base[0][1] == '2':
        if sign1_feature_base[22][1] in ['06','15'] and sign2_feature_base[22][1] in ['06','15']:
          try:
            delete_feature(24)
          except:
            pass
      # varaible 27
      if sign1_feature_base[25][1] != 'P' and sign2_feature_base[25][1] != 'P':
       delete_feature(26)
      if sign1_feature_base[25][1] != 'P' and sign2_feature_base[25][1] == 'P':
       delete_feature(26)
      if sign1_feature_base[25][1] == 'P' and sign2_feature_base[25][1] != 'P':
       delete_feature(26)
      # variable 28
      if sign1_feature_base[25][1] != 'P' and sign2_feature_base[25][1] != 'P':
       delete_feature(27)
      if sign1_feature_base[25][1] != 'P' and sign2_feature_base[25][1] == 'P':
       delete_feature(27)
      if sign1_feature_base[25][1] == 'P' and sign2_feature_base[25][1] != 'P':
       delete_feature(27)
      # variable 29
      if sign1_feature_base[28][1] == '0' and sign2_feature_base[25][1] == '0':
        delete_feature(28)
      if sign1_feature_base[28][1] != '0' and sign2_feature_base[25][1] == '0':
        delete_feature(28)
      if sign1_feature_base[28][1] == '0' and sign2_feature_base[25][1] != '0':
        delete_feature(28)
      # variable 30
      if sign1_feature_base[27][1] != sign2_feature_base[27][1]:
        delete_feature(29)
      if sign1_feature_base[27][1] == sign2_feature_base[27][1]:
          if sign1_feature_base[29][1] == 'B' and sign2_feature_base[29][1] != 'B':
            similar = similar + 1
            similar_items_true.append(30)
            delete_feature(29)
          if sign1_feature_base[29][1] != 'B' and sign2_feature_base[29][1] == 'B':
            similar = similar + 1
            similar_items_true.append(30)
            delete_feature(29)
          if sign1_feature_base[29][1] == '0' and sign2_feature_base[29][1] == '0':
            delete_feature(29)
          else:
            pass
      # variable 34
      if sign1_feature_base[32][1] != 'R' and sign2_feature_base[32][1] != 'R':
        delete_feature(33)
      # variable 35
      if sign1_feature_base[34][1] in ['1','M'] and sign2_feature_base[34][1] == 'E':
        similar = similar + 1
        similar_items_true.append(35)
        delete_feature(34)
      if sign2_feature_base[34][1] in ['1','M'] and sign1_feature_base[34][1] == 'E':
        similar = similar + 1
        similar_items_true.append(35)
        delete_feature(34)
      # variable 36
      if sign1_feature_base[34][1] not in ['M','E'] or sign2_feature_base[34][1] not in ['M','E']:
        delete_feature(35)
      # variable 37
      if sign1_feature_base[34][1] not in ['M','E'] or sign2_feature_base[34][1] not in ['M','E']:
        delete_feature(36)
      if sign1_feature_base[36][1] != sign2_feature_base[36][1]:
        try:
          delete_feature(35)
        except:
          pass
      # variable 38
      if sign1_feature_base[34][1] not in ['M','E'] or sign2_feature_base[34][1] not in ['M','E']:
        delete_feature(37)
      if sign1_feature_base[37][1] != sign2_feature_base[37][1]:
        try:
          delete_feature(35)
        except:
          pass
      # variable 39
      if sign1_feature_base[34][1] not in ['M','E'] or sign2_feature_base[34][1] not in ['M','E']:
        delete_feature(38)
      if sign1_feature_base[38][1] != sign2_feature_base[38][1]:
        try:
          delete_feature(35)
        except:
          pass
      # variable 40
      if sign1_feature_base[34][1] in ['M','E'] and sign2_feature_base[34][1] in ['M','E']:
        if sign1_feature_base[39][1] not in ['U','B'] or sign2_feature_base[39][1] not in ['U','B']:
          delete_feature(39)
      else:
          delete_feature(39)
      # variable 41
      if sign1_feature_base[40][1] != "0" and sign2_feature_base[40][1] == "0":
        delete_feature(40)
      if sign1_feature_base[40][1] == "0" and sign2_feature_base[40][1] != "0":
        delete_feature(40)
      if sign1_feature_base[40][1] == "0" and sign2_feature_base[40][1] == "0":
        delete_feature(40)

      if whole_handshape == False:
        sign1_feature = [nested_list for nested_list in sign1_feature if nested_list[0] not in [6,7,8]]
        sign2_feature = [nested_list for nested_list in sign2_feature if nested_list[0] not in [6,7,8]]
      if whole_handshape == True:
        sign1_feature = [nested_list for nested_list in sign1_feature if nested_list[0] not in [9,10,11,12,13,14,15,16,17,18]]
        sign2_feature = [nested_list for nested_list in sign2_feature if nested_list[0] not in [9,10,11,12,13,14,15,16,17,18]]
      if orientation == False:
        sign1_feature = [nested_list for nested_list in sign1_feature if nested_list[0] not in [19,20]]
        sign2_feature = [nested_list for nested_list in sign2_feature if nested_list[0] not in [19,20]]

      # delete variable 2
      try:
        delete_feature(1)
      except:
        pass
      # delete variable 22
      try:
        delete_feature(21)
      except:
        pass
      # delete variable 32
      try:
        delete_feature(31)
      except:
        pass
      # delete variable 34
      try:
        delete_feature(33)
      except:
        pass
      # delete variable 42
      try:
        delete_feature(41)
      except:
        pass
      # delete variable 43
      try:
        delete_feature(42)
      except:
        pass

      for index,item in enumerate(sign1_feature):
        if sign1_feature[index] != sign2_feature[index]:
          difference = difference + 1
          different_item = sign1_feature[index][0] + 1
          different_items_true.append(different_item)
      for ind,item in enumerate(sign1_feature):
        if sign1_feature[ind] == sign2_feature[ind]:
          similar = similar + 1
          similar_item = sign1_feature[ind][0] + 1
          similar_items_true.append(similar_item)
      different_items_all.append(different_items_true)
      similar_items_all.append(similar_items_true)
      similarity_1 = 40 - difference
      similarity = len(sign1_feature) - difference
      gloss1_name.append(sign1_name)
      gloss2_name.append(sign2_name)
      number_of_differences.append(difference)
      number_of_similarities.append(similarity)
      number_of_similarities_40.append(similarity_1)

# convert the result into a pandas dataframe
result_dict_with_condition = {'gloss1':gloss1_name,'gloss2':gloss2_name,'number of differences': number_of_differences,'number of similarities':number_of_similarities,'number of similarities [40]':number_of_similarities_40,'different items':different_items_all,'similar items':similar_items_all}
result = pd.DataFrame(result_dict_with_condition)

# print the total number of signs and number of compared strings
print('number of signs:',' ',num_sign)
print('number of strings compared:',' ',len(number_of_differences))
result.to_csv('output_statistics.csv')
print(different_items_all)


**Block 7**:
 Generate a .csv file consisting of either minimal pairs or homophones, which are the pairs that only differ by 1 variable or are an identical pair. Enter 1 to 'number of differences' to get minimal pairs and 0 to get identical pairs (homophones).

In [None]:
# block 7
minimal_pair = result[result['number of differences']==1]
minimal_pair.to_csv('minimal_pairs_whole_handshape.csv')