### Step 1: Importing Required Libraries

In [1]:
import re
from collections import defaultdict

### Step 2: Implementing the Soundex Algorithm

In [45]:
def get_soundex_code(word):
    
    # Step 1: Convert the word to uppercase
    word = word.upper()
    
    # Step 2: Remove non-alphabetic characters
    word = re.sub(r'[^A-Z]', '', word)
    
    # Step 3: Handle special cases
    if not word:
        return ''
    
    # Step 4: Encode the first letter
    soundex_code = word[0]
    
    # Step 5: Replace consonants with digits
    encoding_map = {'BFPV': '1', 'CGJKQSXZ': '2', 'DT': '3', 'L': '4', 'MN': '5', 'R': '6'}
    for encoding, digit in encoding_map.items():
        soundex_code += ''.join(digit if letter in encoding else '' for letter in word[1:])
    
    # Step 6: Remove adjacent duplicate digits
    soundex_code = re.sub(r'(.)\1+', r'\1', soundex_code)
    
    # Step 7: Remove vowels and the encoded first letter if it appears after the first position
    #soundex_code = soundex_code.replace(soundex_code[0], '', 1)
    soundex_code =soundex_code[0]+re.sub(r'[AEIOUY]', '', soundex_code[1:])
    
    # Step 8: Pad or truncate the code to a length of 4 characters
    soundex_code = soundex_code.ljust(4, '0')
    soundex_code = soundex_code[:4]
    
    return soundex_code

### Step 3: Building a Soundex Index

In [46]:
def build_soundex_index(words):
    soundex_index = defaultdict(list)
    for word in words:
        soundex_code = get_soundex_code(word)
        soundex_index[soundex_code].append(word)
    return soundex_index

### Step 4: Finding Similar-Sounding Words

In [47]:
def find_similar_sounding_words(query, soundex_index):
    soundex_code = get_soundex_code(query)
    return soundex_index[soundex_code]

### Step 5: Putting it All Together

In [48]:
# Sample List of Words
words = [
    'Robert', 'Rupert', 'Rubin', 'Robby', 'Rabat', 'Rabbit',
    'Smith', 'Smyth', 'Smythe', 'Schmidt', 'Schmitz', 'Mohammed', 'Muhamed'
]



# Build the Soundex Index
soundex_index = build_soundex_index(words)
print(soundex_index.items())

# Find Similar-Sounding Words
query = 'Mohammed'
similar_words = find_similar_sounding_words(query, soundex_index)
# Print the Results
print(f"Words similar to '{query}':")
print(similar_words)

dict_items([('R136', ['Robert', 'Rupert']), ('R150', ['Rubin']), ('R100', ['Robby']), ('R130', ['Rabat', 'Rabbit']), ('S350', ['Smith', 'Smyth', 'Smythe']), ('S235', ['Schmidt', 'Schmitz']), ('M350', ['Mohammed', 'Muhamed'])])
Words similar to 'Mohammed':
['Mohammed', 'Muhamed']
