In [193]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from keras import layers

In [194]:
# read pre-parsed knot data with braid words and equivalent braids
knot_data = pd.read_csv('~/Desktop/Math 498/data/knot_data.csv')
knot_data.head()

Unnamed: 0,Name,Braid Notation,Positive,Q-Positive,Braid List,Equivalent Braids
0,3_1,{1;1;1},Y,Y,"[1, 1, 1]","[[-1, 2, 2, 2, 1, -2], [1, 1, 1, 1, -1, 1, -1]..."
1,4_1,{1;-2;1;-2},N,N,"[1, -2, 1, -2]","[[-1, -2, 1, -3, 4, -3, -2, 4], [-1, -2, 3, -2..."
2,5_1,{1;1;1;1;1},Y,Y,"[1, 1, 1, 1, 1]","[[1, 2, 2, 2, 2, 2], [1, 2, 1, 1, 1, 1, 1, -1,..."
3,5_2,{1;1;1;2;-1;2},Y,Y,"[1, 1, 1, 2, -1, 2]","[[1, 2, -1, 3, 1, 1, 2, -1, 2, 1, -3, 1, -2], ..."
4,6_1,{1;1;2;-1;-3;2;-3},N,N,"[1, 1, 2, -1, -3, 2, -3]","[[1, 1, 2, -1, -3, 1, -1, 2, 1, -3, -1], [-1, ..."


In [195]:
# one hot encoding
# note: better to use built in preprocessing from Keras if possible
#       one-hot encoding is built in but not sure how to do it on a list
#       possibility: multiply each crossing by slot^crossing to get an integer, then one-hot
def one_hot(braid, maxStrands, maxCrossings):

    newBraid = []
    for b in braid:
        if b < 0:
            newBraid.append(b + maxStrands)
        else:
            newBraid.append(b + maxStrands - 1)

    nGenerators = maxStrands * 2

    enc = []

    for crossing in newBraid:
        seq = np.zeros(nGenerators)
        seq[crossing] = 1

        enc = np.append(enc, seq)

    enc = np.append(enc, np.zeros(nGenerators * maxCrossings - len(enc)))

    return enc

In [196]:
maxStrands = 6
maxCrossings = 19

In [216]:
# split braids into training and test data
# keep all equivalent braids in same set
train_split = .7

train_data = knot_data.sample(frac=train_split)
test_data = knot_data.drop(train_data.index)

x_train = []
y_train = []

train_braids = train_data["Equivalent Braids"].apply(eval)
train_answers = train_data["Q-Positive"].apply(lambda x: 1 if x == "Y" else 0)
train_positive = train_data["Positive"].apply(lambda x: 1 if x == "Y" else 0)

for equivalentBraids, positive, answer in zip(train_braids, train_positive, train_answers):
    for braid in equivalentBraids:
        x_train.append(np.append(one_hot(braid, maxStrands, maxCrossings), positive))
        y_train.append(answer)

x_train = np.asarray(x_train)
y_train = np.asarray(y_train)


In [220]:
# parse testing data
x_test = []
y_test = []

test_braids = test_data["Equivalent Braids"].apply(eval)
test_answers = test_data["Q-Positive"].apply(lambda x: 1 if x == "Y" else 0)
test_positive = test_data["Positive"].apply(lambda x: 1 if x == "Y" else 0)

for equivalentBraids, positive, answer in zip(test_braids, test_positive, test_answers):
    for braid in equivalentBraids:
        x_test.append(np.append(one_hot(braid, maxStrands, maxCrossings), positive))
        y_test.append(answer)

x_test = np.asarray(x_test)
y_test = np.asarray(y_test)


In [224]:
# network parameters
input_size = (maxStrands * 2) * maxCrossings + 1
hidden_layer_sizes = [100, 100]

In [225]:
# build basic sequential model
model = keras.Sequential()

model.add(keras.Input(input_size,))

for size in hidden_layer_sizes:
    model.add(layers.Dense(size, activation="relu"))

model.add(layers.Dense(1, activation="sigmoid"))

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [226]:
history = model.fit(
    x_train,
    y_train,
    batch_size=16,
    epochs=5
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [227]:
results = model.evaluate(x_test, y_test, batch_size=128)

