In [3]:
import numpy as np
import pandas as pd
import os

Run this cell to check your current working directory. It should return the top folder "machine-learning-assisted-khovanov-homology"

In [4]:
os.getcwd()

'/mnt/c/Users/wuwj2/Desktop/jupyter/MAT_180_ML_Projects/machine-learning-assisted-khovanov-homology/notebooks'

In [5]:
#Run this cell once if still in the notebooks folder.
#Note that running this command multiple times might get you too high in the directory tree so be 
#cautious running this cell
os.chdir("../")

In [6]:
# The following functions cannot be put into .py script files because they
# contain SageMath language that cannot be run outside of the SageMath kernel

def free_part(kh):
    res = {}
    # Not the optimal way; while also not the bottleneck for computation
    # The size of kh is not big
    # TODO: improve this function
    for key1 in kh.keys():
        for key2 in kh[key1].keys():
            a = kh[key1][key2]
            gens = a.gens()
            n = len([gen for gen in gens if gen.additive_order() == +Infinity])
            if n>0:
                res[(key1, key2)] = n 
    return res

def torsion_part(kh): # key - orders of the generator; value - number of generator with that order
    res = {}
    # Not the optimal way; while also not the bottleneck for computation
    # The size of kh is not big
    # TODO: improve this function
    for key1 in kh.keys():
        for key2 in kh[key1].keys():
            val = {}
            a = kh[key1][key2]
            gens = a.gens()
            for gen in gens:
                b = gen.additive_order()
                if b == +Infinity:
                    continue
                elif b in val.keys():
                    val[b] += 1
                else:
                    val[b] = 1   
            if len(list(val.keys()))>0:
                res[(key1, key2)] = val
    return res

In [7]:
# import functions built for generating and parsing data to a standard form
from scripts.dataGeneration import getRandomWord, count_FP, count_TP, count_FP_per_col, count_FP_per_row

S = [-3, -2, -1, 1, 2, 3]
B = BraidGroup(4)

In [25]:
def generate_dataset(n,m,name):
# n is the desired size of the dataset
# m determines how long the words used to generate the links are going to be
# WARNING: m >= 9 will make the khovanov homology very long so be careful.
# I am personally using m = 8
    
    # desired features
    data = {
        'braid': [],
        'components': [],
        'khovanov_homology': [],
        'free_part': [],
        'torsion_part': [],
        'free_part_count': [],
        'torsion_part_count': [],
        'total_num_FP_per_row': [],
        'total_num_FP_per_column': [],
        'jones_polynomial': []
        
    }
    k = n // 100
    if k == 0:
        k = 1
    for i in range(n):
        if i%k == 0:
            # Print every ~1% of progress
            print(f'Currently on iteration {i+1}')
            
        W = getRandomWord(m, S)
        link = Link(B(W))
        components = link.number_of_components()
        
        if components > 3:
            continue
        
        kh = link.khovanov_homology()
        
        if kh in data["khovanov_homology"]:
            continue
            
        FP = free_part(kh)
        TP = torsion_part(kh)
        
        if TP == {}:
            continue

        data["braid"].append(W)
        data['components'].append(components)
        data["khovanov_homology"].append(kh)
        data["free_part"].append(FP)
        data["torsion_part"].append(TP)
        data["free_part_count"].append(count_FP(FP))
        data["torsion_part_count"].append(count_TP(TP))
        data["total_num_FP_per_row"].append(count_FP_per_row(FP))
        data["total_num_FP_per_column"].append(count_FP_per_col(FP))
        
        jp = {}
        # parse and append jones_polynomial dictionary: index = degree of t, val = coefficient
        for term in link.jones_polynomial().coefficients():
            jp[term[1]] = term[0]
        data['jones_polynomial'].append(jp)

    df = pd.DataFrame(data)
    
    df.to_csv(f'data/{name}.csv', index=False)

In [26]:
generate_dataset(50, 9, "today")

Currently on iteration 1
Currently on iteration 2
Currently on iteration 3
Currently on iteration 4
Currently on iteration 5
Currently on iteration 6
Currently on iteration 7
Currently on iteration 8
Currently on iteration 9
Currently on iteration 10
Currently on iteration 11
Currently on iteration 12
Currently on iteration 13
Currently on iteration 14
Currently on iteration 15
Currently on iteration 16
Currently on iteration 17
Currently on iteration 18
Currently on iteration 19
Currently on iteration 20
Currently on iteration 21
Currently on iteration 22
Currently on iteration 23
Currently on iteration 24
Currently on iteration 25
Currently on iteration 26
Currently on iteration 27
Currently on iteration 28
Currently on iteration 29
Currently on iteration 30
Currently on iteration 31
Currently on iteration 32
Currently on iteration 33
Currently on iteration 34
Currently on iteration 35
Currently on iteration 36
Currently on iteration 37
Currently on iteration 38
Currently on iteratio

PermissionError: [Errno 13] Permission denied: 'data/today.csv'