In [17]:
from collections import defaultdict
from itertools import combinations

def jaccard_similarity(set1, set2):
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union

def top_k_similar_websites(k, visits):
    # Build a dictionary where the key is the website and the value is a set of users
    website_users = defaultdict(set)
    for website, user in visits:
        website_users[website].add(user)
    
    # Calculate similarity for all pairs of websites
    similarities = []
    websites = list(website_users.keys())
    for w1, w2 in combinations(websites, 2):
        sim = jaccard_similarity(website_users[w1], website_users[w2])
        similarities.append((sim, w1, w2))
    
    # Sort the similarities in descending order and return the top k pairs
    similarities.sort(reverse=True, key=lambda x: x[0])
    return [(w1, w2) for _, w1, w2 in similarities[:k]]

# Input reading
data = '''
4
14
x 1
x 2
x 3
y 2
y 3
y 4
y 3
z 4
z 5
z 10
m 2
m 5
n 6
n 1

'''.strip().split('\n')
k = int(data[0])
n = int(data[1])
visits = []
for _ in range(2, n+2):
    website, user = data[_].split()
    visits.append((website, int(user)))

# Output
result = top_k_similar_websites(k, visits)
print(result)


[('x', 'y'), ('x', 'm'), ('x', 'n'), ('y', 'm')]
