In [14]:
import random
import tensorflow as tf

options = 4

# The following methods are used for randomized data generation

def generateKey(q):
    key = []
    for i in range(q):
        key.append(random.randint(1, options))
    return key

normal_accuracy = 0.80

def generateAns(key):
    answers = []
    for i in key:
        if (random.random() > normal_accuracy):
            answers.append(random.choice([n for n in range(1, options) if n != i]))
        else: 
            answers.append(i)
    return answers

# List contains a 1 or 0 depending on whether the key and ans list match
def generateAnsList(key, ans):
    ansL = []
    for i in range(len(key)):
        if (key[i] == ans[i]):
            ansL.append(1)
        else:
            ansL.append(0) 
    return ansL

def calcAcc(key, ans):
    ansL = generateAnsList(key, ans)
    return sum(ansL)/len(ansL)
    
def frameShift(ans, f):
    shiftedAns = ans.copy()
    a = 1
    while (a > f):
        shift = random.choice([n for n in range(-len(ans), len(ans)) if n != 0])
        if (shift < 0): 
            shift = -(shift + 1)
            shiftedAns.pop(shift)
            shiftedAns.append(random.randint(1, options))
        else: 
            shift = shift - 1
            shiftedAns.pop(-1)
            shiftedAns.insert(shift, random.randint(1, options))
        
        a = random.random()
        
    return shiftedAns

In [15]:
# Method that builds a neural network to predict if the input list has been shifted or not
# input_list must either contain 1 or 0s according to whether it matches the key

def detectFrameshift(input_list):
    q = len(input_list)
    model = tf.keras.models.Sequential([
        tf.keras.layers.Input(shape=(q,)),
        tf.keras.layers.Dense(q / 2, activation='relu'),
        tf.keras.layers.Dense(10, activation='relu'), 
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam',
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])
    
    samples = q * 75
    shiftProb = 0.80


    ansInputs = []
    ansOutputs = []
    ansProbs = []

    for i in range(samples):
        key = generateKey(q)
        ansL = generateAnsList(key, generateAns(key))
        ansInputs.append(ansL)
        ansOutputs.append(0)
        ansProbs.append(calcAcc(key, ansL))
        ansL = generateAnsList(key, frameShift(generateAns(key), shiftProb))
        ansInputs.append(ansL)
        ansOutputs.append(1)
        ansProbs.append(calcAcc(key, ansL))   

    ansInputTrain = ansInputs[:int(samples*.8)]
    ansOutputTrain = ansOutputs[:int(samples*.8)]

    ansInputTest = ansInputs[int(samples*.8):]
    ansOutputTest = ansOutputs[int(samples*.8):]

    model.fit(ansInputTrain, ansOutputTrain, epochs=25)
    
    print(model.evaluate(ansInputTrain, ansOutputTrain))
    print(model.evaluate(ansInputTest, ansOutputTest))
    
    pred = model.predict([input_list])
    
    print(pred)
    
    return pred

In [16]:
# Method that builds a neural network to predict the initial accuracy of a frameshifted input list
# input_list must either contain 1 or 0s according to whether it matches the key list

def unshiftAccuracy(input_list):
    q = len(input_list)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Input(shape=(q,)),
        tf.keras.layers.Dense(q / 2, activation='relu'),
        tf.keras.layers.Dense(q / 2, activation='relu'),
        tf.keras.layers.Dense(q / 2, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer='sgd',
        loss='mae',
        metrics=[tf.keras.metrics.MeanAbsolutePercentageError()])

    samples = q * 25
    shiftProb = 0.80


    ansInputs = []
    ansOutputs = []
    ansProbs = []

    for n in range(samples):
        for i in range(int(100/options),95,5):
            normal_accuracy = i/100
            key = generateKey(q)
            ansL = generateAnsList(key, frameShift(generateAns(key), shiftProb))
            ansInputs.append(ansL)
            ansOutputs.append(normal_accuracy)
            ansProbs.append(calcAcc(key, ansL))

    samples = len(ansInputs)

    ansInputTrain = ansInputs[:int(samples*.8)]
    ansOutputTrain = ansOutputs[:int(samples*.8)]


    ansInputTest = ansInputs[int(samples*.8):]
    ansOutputTest = ansOutputs[int(samples*.8):]

    print(len(ansInputTest))

    model.fit(ansInputTrain, ansOutputTrain, epochs=25)

    print(model.evaluate(ansInputTrain, ansOutputTrain))
    print(model.evaluate(ansInputTest, ansOutputTest))

    pred = model.predict([input_list])

    print(pred)

In [9]:
test1 = [1, 1, 1, 1, 1, 1, 1, 
        1, 1, 0, 0, 0, 0, 0, 0, 1]

test2 = [1, 1, 1, 1, 1, 1, 
          1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 
          1, 0, 1, 0, 0, 1, 0, 0]

In [10]:
detectFrameshift(test1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
[0.28793007135391235, 0.8864583373069763]
[0.31567099690437317, 0.8569444417953491]
[[0.96375537]]


array([[0.96375537]], dtype=float32)

In [11]:
detectFrameshift(test2)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
[0.18855465948581696, 0.9255555272102356]
[0.23279350996017456, 0.9085184931755066]
[[0.99470973]]


array([[0.99470973]], dtype=float32)

In [12]:
unshiftAccuracy(test1)

1120
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
[0.17474059760570526, 37.387611389160156]
[0.17467837035655975, 37.397220611572266]
[[0.580899]]


In [13]:
unshiftAccuracy(test2)

2100
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
[0.17473508417606354, 37.553550720214844]
[0.1748654693365097, 37.61775588989258]
[[0.56386065]]


In [18]:
input_list = test1

q = len(input_list)

model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(q,)),
    tf.keras.layers.Dense(q / 2, activation='relu'),
    tf.keras.layers.Dense(q / 2, activation='relu'),
    tf.keras.layers.Dense(q / 2, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='sgd',
    loss='mae',
    metrics=[tf.keras.metrics.MeanAbsolutePercentageError()])

samples = q * 75
shiftProb = 0.80


ansInputs = []
ansOutputs = []
ansProbs = []

for n in range(samples):
    for i in range(int(100/options),95,5):
        normal_accuracy = i/100
        key = generateKey(q)
        ansL = generateAnsList(key, frameShift(generateAns(key), shiftProb))
        ansInputs.append(ansL)
        ansOutputs.append(normal_accuracy)
        ansProbs.append(calcAcc(key, ansL))

samples = len(ansInputs)

ansInputTrain = ansInputs[:int(samples*.8)]
ansOutputTrain = ansOutputs[:int(samples*.8)]


ansInputTest = ansInputs[int(samples*.8):]
ansOutputTest = ansOutputs[int(samples*.8):]

print(len(ansInputTest))

model.fit(ansInputTrain, ansOutputTrain, epochs=25)

print(model.evaluate(ansInputTrain, ansOutputTrain))
print(model.evaluate(ansInputTest, ansOutputTest))

pred = model.predict([input_list])

print(pred)

3360
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
[0.13163353502750397, 26.49607276916504]
[0.1316196322441101, 26.57259178161621]
[[0.8240852]]


In [20]:
input_list = test2

q = len(input_list)

model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(q,)),
    tf.keras.layers.Dense(q / 2, activation='relu'),
    tf.keras.layers.Dense(q / 2, activation='relu'),
    tf.keras.layers.Dense(q / 2, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='sgd',
    loss='mae',
    metrics=[tf.keras.metrics.MeanAbsolutePercentageError()])

samples = q * 25
shiftProb = 0.80


ansInputs = []
ansOutputs = []
ansProbs = []

for n in range(samples):
    for i in range(int(100/options),95,5):
        normal_accuracy = i/100
        key = generateKey(q)
        ansL = generateAnsList(key, frameShift(generateAns(key), shiftProb))
        ansInputs.append(ansL)
        ansOutputs.append(normal_accuracy)
        ansProbs.append(calcAcc(key, ansL))

samples = len(ansInputs)

ansInputTrain = ansInputs[:int(samples*.8)]
ansOutputTrain = ansOutputs[:int(samples*.8)]


ansInputTest = ansInputs[int(samples*.8):]
ansOutputTest = ansOutputs[int(samples*.8):]

print(len(ansInputTest))

model.fit(ansInputTrain, ansOutputTrain, epochs=25)

print(model.evaluate(ansInputTrain, ansOutputTrain))
print(model.evaluate(ansInputTest, ansOutputTest))

pred = model.predict([input_list])

print(pred)

2100
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
[0.11564060300588608, 23.082149505615234]
[0.11904745548963547, 23.546825408935547]
[[0.8612501]]
