In [1]:
!pip install python-Levenshtein



In [4]:
def levenshtein_distance(s1, s2):
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row

    return previous_row[-1]


In [12]:
def get_neighbors(words, candidates, target, k, distance):
    distances = {word: distance(target, word) for word in candidates}
    sorted_distances = sorted(distances.items(), key=lambda x: x[1])
    return dict(sorted_distances[:k])



In [15]:
def vote_distance_weights(city, neighbors):
    total_distance = sum(distance for _, distance in neighbors)
    weighted_neighbors = [(neighbor, distance / total_distance) for neighbor, distance in neighbors]
    return (city, weighted_neighbors)



In [17]:
cities = open("data/city_names.txt").readlines()
cities = [city.strip() for city in cities]
for city in ["Freiburg", "Frieburg", "Freiborg", "Hamborg", "Saarluis"]:
    neighbors = get_neighbors(cities, cities, city, 2, distance=levenshtein_distance)
    weights = vote_distance_weights(city, neighbors.items())
    print("vote_distance_weights:", weights)


vote_distance_weights: ('Freiburg', [])
vote_distance_weights: ('Frieburg', [])
vote_distance_weights: ('Freiborg', [])
vote_distance_weights: ('Hamborg', [])
vote_distance_weights: ('Saarluis', [])
