In [12]:
import random
import numpy as np
import hashlib
from datasketch import MinHash, MinHashLSH, MinHashLSHForest

In [1]:
orientations =  [0, 45, -45, 90]
NUM_PLIES=18
POP_SIZE = 10000

In [3]:
# Generate population function
def generate_population2(num_plies, pop_size):
    
    population = []
    for i in range(pop_size):
        individual = [random.choice(orientations) for _ in range(num_plies)]
        # Enforce symmetry constraints by flipping the orientation of the i-th ply
        # if the (num_plies - i - 1)-th ply has the same orientation
        for j in range(num_plies // 2):
            if individual[j] == individual[num_plies - j - 1]:
                individual[num_plies - j - 1] = random.choice(orientations)
        population.append(individual)
    return population

In [4]:
def jaccard_distance(p1, p2, th):
    set1=set(p1)
    set2=set(p2)
    hash1= hash(frozenset(set1))
    hash2= hash(frozenset(set2))
    if hash1 & hash2 and float(len(set1 & set2))/ len(set1|set2)>= th:
        print(f'Populations are more than {(1-th)}% different')


In [5]:
p1=generate_population2(18, POP_SIZE)

In [6]:
p2=generate_population2(18, POP_SIZE)

In [7]:
def HashMin(population):
    minhash_seq=MinHash()
    for sequence in population:
        for orientation in sequence:
            minhash_seq.update(str(orientation).encode('utf-8'))
    return minhash_seq


In [13]:
def JaccardSimilarity(pop_1, pop_2):
    minhash1=HashMin(pop_1)
    minhash2=HashMin(pop_2)
    # Jaccard similarity between the two MinHash objects
    jaccard_similarity = minhash1.jaccard(minhash2)

    # Print the Jaccard similarity
    print(f"The Jaccard similarity between population 1 and population 2 is {jaccard_similarity}")

In [14]:
JaccardSimilarity(p1, p2)

The Jaccard similarity between population 1 and population 2 is 1.0


In [15]:
def LSH_index(pop_1, pop_2, th, n_perm):
    minhash1=HashMin(pop_1)
    minhash2=HashMin(pop_2)
    # Initialize LSH index
    lsh = MinHashLSH(threshold=th, num_perm=n_perm)

    # Add minhash1 to the LSH index
    lsh.insert("minhash1", minhash1)

    # Check if minhash2 is a near neighbor of any element in the LSH index
    if lsh.query(minhash2):
        print("The populations are similar.")
    else:
        print("The populations are not similar.")

In [16]:
LSH_index(p1, p2, .5, 128)

The populations are similar.


In [19]:
def LSH_forest(pop_1, pop_2, n_perm):
    minhash1=HashMin(pop_1)
    minhash2=HashMin(pop_2)
    # Initialize LSH Forest index
    forest = MinHashLSHForest(num_perm=n_perm)

    # Add minhash1 to the LSH Forest index
    forest.add("minhash1", minhash1)

    # Index the LSH Forest
    forest.index()

    # Check if minhash2 is a near neighbor of any element in the LSH Forest
    if forest.query(minhash2, k=1):
        print("The populations are similar.")
    else:
        print("The populations are not similar.")

In [20]:
LSH_forest(p1, p2, 128)

The populations are similar.


In [21]:
def HashMin_order(population):
    minhash_seq=MinHash()
    for sequence in population:
        for index, orientation in enumerate(sequence):
            minhash_seq.update((str(orientation) + str(index)).encode('utf-8'))
    return minhash_seq

In [24]:
# Order of the list elements matters
def LSH_forest_order(pop_1, pop_2, n_perm):
    minhash1=HashMin_order(pop_1)
    minhash2=HashMin_order(pop_2)
    # Initialize LSH Forest index
    forest = MinHashLSHForest(num_perm=n_perm)

    # Add minhash1 to the LSH Forest index
    forest.add("minhash1", minhash1)

    # Index the LSH Forest
    forest.index()

    # Check if minhash2 is a near neighbor of any element in the LSH Forest
    if forest.query(minhash2, k=1):
        print("The populations are similar.")
    else:
        print("The populations are not similar.")

In [25]:
LSH_forest_order(p1, p2, 128)

The populations are similar.


In [28]:
LSH_forest_order(p1[:10], p2[:10], 128)

The populations are similar.


In [26]:
p1[:10]

[[-45, 0, 90, -45, 0, 0, 0, 45, 45, -45, 0, 90, 0, -45, 0, 90, 90, 0],
 [-45, 90, -45, 45, 0, 45, 0, 90, 90, 45, -45, 45, -45, -45, 45, 45, -45, 45],
 [90, -45, 45, 90, -45, -45, 0, 0, 90, 45, -45, 90, 0, 0, -45, 0, 90, 45],
 [45, 45, -45, -45, -45, -45, 90, 90, 90, 90, 45, -45, 45, 45, 0, 45, 90, -45],
 [0, 45, 45, 90, 45, 0, -45, 45, 45, 90, 90, 45, -45, -45, 45, 90, 0, 45],
 [90, -45, 90, 0, -45, 90, 90, 0, 90, -45, 0, 0, -45, 0, 90, -45, 45, -45],
 [0, 45, 0, 90, 0, 0, 90, 90, 0, 90, 45, 0, 45, -45, 45, -45, -45, 45],
 [-45, 45, 45, 45, 0, 45, 45, 0, 45, -45, 90, 90, 90, 45, -45, -45, 0, 45],
 [0, 0, 0, 45, 45, 45, 45, -45, 45, 90, 45, -45, -45, -45, 0, 90, -45, -45],
 [0, 90, 0, 0, 90, 90, -45, -45, 90, 0, 0, -45, 0, -45, 45, 90, 45, 90]]

In [27]:
p2[:10]

[[90, 90, 0, 0, -45, 90, 45, -45, 45, 0, 90, 90, 45, 90, 90, 90, -45, 0],
 [45, -45, 45, 0, 90, 90, 0, 90, 90, -45, 45, -45, 45, 45, 45, 90, 90, 0],
 [-45, -45, 90, 45, 45, 90, 90, 90, 90, 0, 45, -45, 0, 0, -45, 0, 0, 45],
 [90, 45, -45, 45, -45, 45, 90, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45],
 [0, 0, 0, 45, 90, 90, 0, 90, 0, 90, -45, 0, 90, 45, 0, -45, -45, 45],
 [45, 0, 90, 0, 45, 90, 0, 45, 45, 90, 90, 90, 45, 90, -45, -45, -45, 90],
 [90, 45, -45, -45, -45, 0, 0, 90, 90, 90, 0, 45, 0, 45, 45, 90, -45, 0],
 [-45, -45, 90, 90, -45, 45, 90, -45, 90, 0, 0, 0, 90, 90, 45, 0, -45, 45],
 [90, 45, 90, 90, 45, 90, -45, 90, 0, -45, 45, 45, 0, 90, 0, 0, 90, -45],
 [45, 45, 0, 0, 45, -45, 90, 45, -45, 45, 90, 0, 0, -45, 45, -45, 90, -45]]

In [30]:
h1=HashMin_order(p1[:10])
h2=HashMin_order(p2[:10])
jaccard_similarity = h1.jaccard(h2)
print(f"The Jaccard similarity between population 1 and population 2 is {jaccard_similarity}")

The Jaccard similarity between population 1 and population 2 is 0.9140625
