In [2]:
def levenshtein_distance(s1, s2):
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)

    for i, c1 in enumerate(s1):
        current_row = [i + 1]

        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)

            current_row.append(min(insertions, deletions, substitutions))

        previous_row = current_row

    return previous_row[-1]

def find_most_similar_domain(domain, domain_list):
    min_distance = float('inf')
    most_similar_domain = None

    for d in domain_list:
        distance = levenshtein_distance(domain, d)
        if distance < min_distance:
            min_distance = distance
            most_similar_domain = d

    return most_similar_domain

# Example usage:
input_domain = "webrashim.hit.ac.il"
domain_list = ["mihash", "hit.ac.il", "webrashim.hit.ac.il", "hit.webrashim.ac.il", "mihss"]

most_similar_domain = find_most_similar_domain(input_domain, domain_list)
print("Most similar domain:", most_similar_domain)


Most similar domain: webrashim.hit.ac.il
