In [1]:
import pandas as pd
import numpy as np
import statistics
import time

import tensorflow as tf
from tensorflow import keras
from keras import layers

In [2]:
# one-hot encode braid word and pad to consistent length
def encode_braid(braid, maxStrands, maxCrossings):

    b = tf.keras.utils.to_categorical(braid, num_classes=maxStrands*2)

    for i in range(maxCrossings - len(b)):
            b = np.append(b, [np.zeros(maxStrands*2)], axis=0)

    return b

In [3]:
# turn list of lists of equivalent braids with corresponding answers into trainable data
def parse_equivalent_braids(allBraids, answers, maxStrands, maxCrossings):

    x_data = []
    y_data = []

    # zip list of equivalent braids with same label
    for equivalentBraids, answer in zip(allBraids, answers):
        for braid in equivalentBraids:
            x_data.append(braid)
            y_data.append(answer)

    # encode each braid word
    x_data = [encode_braid(x, maxStrands, maxCrossings) for x in x_data]

    return np.asarray(x_data), np.asarray(y_data)

In [4]:
# read pre-parsed knot data with braid words and equivalent braids

knot_data = pd.read_csv('../data/knot_data_jones.csv')
knot_data["Equivalent Braids"] = knot_data["Equivalent Braids"].apply(eval)

In [5]:
eq = knot_data["Equivalent Braids"]

maxCrossings = max([max([len(braid) for braid in braids]) for braids in eq])
maxStrands = max([max([max([abs(b) for b in braid]) for braid in braids]) for braids in eq])

minPower = -13
maxPower = 16
nCoefficients = maxPower - minPower + 1

In [15]:
# normalize volume by stdev
stdev = statistics.stdev(knot_data["Volume"])

In [16]:
knot_data["Volume Normalized"] = knot_data["Volume"].apply(lambda x: x / stdev)

In [17]:
# split dataset for training

train_split = .8

train_data = knot_data.sample(frac=train_split)
test_data = knot_data.drop(train_data.index)

x_train, y_train = parse_equivalent_braids(train_data["Equivalent Braids"],
                    train_data["Volume Normalized"], maxStrands, maxCrossings)

x_test, y_test = parse_equivalent_braids(test_data["Equivalent Braids"],
                    test_data["Volume Normalized"], maxStrands, maxCrossings)

In [18]:
# Linear model
inputs = keras.Input(shape=(maxCrossings, maxStrands*2))

x = layers.Flatten()(inputs)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dense(64, activation="relu")(x)

outputs = layers.Dense(1)(x)

In [19]:
model = keras.Model(inputs=inputs, outputs=outputs, 
    name="volume")

model.compile(loss="mean_squared_error", optimizer="adam")

model.summary()

Model: "volume"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 19, 12)]          0         
                                                                 
 flatten_1 (Flatten)         (None, 228)               0         
                                                                 
 dense_3 (Dense)             (None, 64)                14656     
                                                                 
 dense_4 (Dense)             (None, 64)                4160      
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                                 
Total params: 18,881
Trainable params: 18,881
Non-trainable params: 0
_________________________________________________________________


In [20]:
earlyStop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, min_delta=.01)

In [22]:
start_time = time.time()

history_volume_linear = model.fit(
    x_train,
    y_train,
    validation_split=.1,
    batch_size=32,
    epochs=50,
    callbacks=[earlyStop],
    verbose=1
)

volume_linear_time = time.time() - start_time
volume_linear_eval = model.evaluate(x_test, y_test)

Epoch 1/50
Epoch 2/50

KeyboardInterrupt: 