In [1]:
import os
os.system('color')
import random
from termcolor import colored
from typing import List, Optional
from copy import deepcopy
import csv
from enum import Enum

import tensorflow as tf
from keras import layers, Model
import numpy as np
from sklearn.preprocessing import OneHotEncoder
# import matplotlib.pyplot as plt
# from matplotlib.ticker import MaxNLocator

import utility as ut

os.system('color')
random.seed(0)
np.random.seed(0)

DATAPATH = os.path.join(os.getcwd(), "newdatasets")
RESULTS_PATH = os.path.join(os.getcwd(), "autoencoder_results")

sh: 1: color: not found
2023-08-17 08:45:58.808426: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-17 08:45:58.939577: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-17 08:45:58.965060: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-08-17 08:45:59.457967: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic lib

In [7]:
'''
Trial summary
Hyperparameters:
conv1_filter_size: 10
conv2_filter_size: 9
conv1_filters: 50
conv2_filters: 66
bridge_filters: 120
max_pool: 2
dropout_: : 0.35
Score: 0.9727120995521545


Hyperparameters:
conv1_filter_size: 5
conv2_filter_size: 9
conv1_filters: 46
conv2_filters: 90
bridge_filters: 160
bridge_filter_size: 5
max_pool: 2
dropout_: : 0.5
Score: 0.9742878675460815
'''

# =============================================================================
class Encoder(layers.Layer):
    """Encoder part of autoencoder"""

    # -------------------------------------------------------------------------
    def __init__(self, conv1_filters, conv2_filters, conv1_filter_size=10, conv2_filter_size=9, maxpool=2, dropout=0.25, name="encoder", **kwargs):
        super().__init__(name=name, **kwargs)

        self.conv1 = layers.Conv1D(conv1_filters, conv1_filter_size, padding="same", activation="relu")
        self.conv2 = layers.Conv1D(conv2_filters, conv2_filter_size, padding="same", activation="relu")
        self.maxpool = layers.MaxPooling1D(maxpool, padding="same")
        self.dropout = layers.Dropout(dropout)
        

    # -------------------------------------------------------------------------
    def call(self, inputs):
        return self.dropout(self.maxpool(self.conv2(self.dropout(self.maxpool(self.conv1(inputs))))))

# =============================================================================
class Decoder(layers.Layer):
    """Decoder part of autoencoder"""

    # -------------------------------------------------------------------------
    def __init__(self, conv1_filters, conv2_filters, conv1_filter_size=5, conv2_filter_size=9, maxpool=2, dropout=0.25, name="decoder", **kwargs):
        super().__init__(name=name, **kwargs)

        self.conv1 = layers.Conv1DTranspose(conv1_filters, conv1_filter_size, padding="same", activation="relu")
        self.conv2 = layers.Conv1DTranspose(conv2_filters, conv2_filter_size, padding="same", activation="relu")
        self.upsample = layers.UpSampling1D(maxpool)
        self.dropout = layers.Dropout(dropout)

    # -------------------------------------------------------------------------
    def call(self, inputs):
        return self.dropout(self.upsample(self.conv2(self.dropout(self.upsample(self.conv1(inputs))))))

# =============================================================================
class Autoencoder(Model):
    """Autoencoder"""
    # -------------------------------------------------------------------------
    def __init__(self, num_classes, name="autoencoder", **kwargs):
        super().__init__(name=name, **kwargs)

        self.num_classes = num_classes
        self.hyperparameters = {
            "conv1_filters": 46,
            "conv2_filters": 90,
            "conv1_filter_size": 5,
            "conv2_filter_size": 9,
            "bridge_filters": 160,
            "bridge_filter_size": 5,
            "dropout": 0.50,
            "maxpool": 5,
        }

        hp = self.hyperparameters

        self.encoder = Encoder(hp["conv1_filters"], hp["conv2_filters"])
        self.bridge = layers.Conv1D(hp["bridge_filters"], hp["bridge_filter_size"], padding="same", activation="relu")
        self.decoder = Decoder(hp["conv2_filters"], hp["conv1_filters"])
        self.finallayer = layers.Conv1D(self.num_classes, hp["conv1_filter_size"], padding="same", activation="softmax")
    # -------------------------------------------------------------------------
    def call(self, inputs):
        return self.finallayer(self.decoder(self.bridge(self.encoder(inputs))))
# =============================================================================
class PaddingType(Enum):
    ZERO=1
    TRUNCATE=2
    EMPTY=3

# =============================================================================
class NoisificationMethod(Enum):
    RANDOMSCATTER=1
    RANDOMCONTIG=2

weights = None
# =============================================================================
class ProteinScaffoldFixer():
    """Class to correct errors in a protein scaffold and fill gaps"""

    # -------------------------------------------------------------------------
    def __init__(self, 
                output_seqs, 
                paddingtype=PaddingType.EMPTY, 
                noise_percent=0.2, 
                noisemethod=NoisificationMethod.RANDOMSCATTER, 
                numgaps=5, 
                mingapsize=3, 
                mincontigsize=1, 
                epochs=300,
                optimizer="adam",
                early_stopping=False):

        self.epochs = epochs
        self.optimizer = optimizer
        self.paddingtype = paddingtype
        self.noise_percent = noise_percent
        self.early_stopping = early_stopping

        self.classes = np.array(["-", 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z'])
        self.ohe = OneHotEncoder(sparse_output=False, categories=[self.classes])

        self.max_seq_length = max((len(seq) for seq in output_seqs))
        self.maxpool = 2
        while self.max_seq_length % (self.maxpool * self.maxpool) != 0:
            self.max_seq_length += 1

        self.output_seqs = output_seqs
        self.input_seqs = self.noisify_sequences(output_seqs, noise_percent, noisemethod, numgaps, mingapsize, mincontigsize)

        self.train_x = self.preprocess_sequences(self.input_seqs)
        self.train_y = self.preprocess_sequences(self.output_seqs)

        #self.history = None

        self.autoencoder = Autoencoder(len(self.classes))

    # =============================================================================
    def noisify_sequences(self, seqs, noise_percent, noisemethod, numgaps=5, mingapsize=3, mincontigsize=3):
        if noisemethod == NoisificationMethod.RANDOMSCATTER:
            return self.noisify_by_random_scatter(seqs, noise_percent)
        elif noisemethod == NoisificationMethod.RANDOMCONTIG:
            return self.noisify_by_random_contigs(seqs, noise_percent, numgaps, mingapsize, mincontigsize)

    # =============================================================================
    def noisify_by_random_scatter(self, seqs, noise_percent):
        sequences = deepcopy(seqs)
        # To noisify our input data, we will replace random amino acids with something else
        for seq in sequences:

            # We randomly sample from all the possible indices of seq
            indices_to_replace = random.sample(range(len(seq)), int(noise_percent * len(seq)))
            seq[indices_to_replace] = "-"

        return sequences

    # =============================================================================
    def noisify_by_random_contigs(self, seqs, noise_percent, numgaps, mingapsize, mincontigsize):

        sequences = deepcopy(seqs)
        for seqind, seq in enumerate(sequences):

            amino_acid_length = len(seq)
            amino_acids_to_replace = int(amino_acid_length * noise_percent)

            # The idea is to build a gap queue, randomly putting each amino acid to replace into each gap
            # Then, we build a contig queue, randomly putting each amino acid into each contig
            gap_queue = np.zeros(numgaps).astype(int)
            for _ in range(amino_acids_to_replace):

                # Any gap can be considered
                valid_gaps = np.arange(numgaps)

                # ... so long as we have no underfilled gaps
                underfilled = gap_queue < mingapsize
                # If we have any underfilled, then consider only those until they are no longer underfilled
                if np.any(underfilled):
                    valid_gaps = valid_gaps[np.where(underfilled)]
                
                # Once valid gap indices have been determined, randomly pick one to increment
                gap_queue[np.random.choice(valid_gaps)] += 1

            # There can always be one more contig than gaps (if there's a contig at beginnning and end of sequence)
            contig_queue = np.zeros(numgaps+1).astype(int)

            # We have to allocate all the amino acids NOT in gaps into contigs BETWEEN the gaps
            for _ in range(amino_acid_length - amino_acids_to_replace):

                # Any contig can be considered
                valid_contigs = np.arange(len(contig_queue))

                # ... so long as we have no underfilled contigs
                underfilled = contig_queue < mincontigsize

                # The exceptions are the first and last contigs. They are never considered underfilled
                underfilled[0] = False
                underfilled[-1] = False

                # If we have any underfilled, then consider only those until they are no longer underfilled
                if np.any(underfilled):
                    valid_contigs = valid_contigs[np.where(underfilled)]
                
                # Once valid gap indices have been determined, randomly pick one to increment
                contig_queue[np.random.choice(valid_contigs)] += 1
            
            # Once we have determined gap_queue and contig_queue, we iterate over them to set the gaps equal
            # to our blank amino acid character

            sequence_pointer = 0
            iscontig=True
            while sequence_pointer < len(seq):
                if iscontig:
                    # Don't do anything for the contig except increment the pointer and pop off the contig queue
                    sequence_pointer += contig_queue[0]
                    contig_queue = np.delete(contig_queue, 0)
                else:
                    seq[sequence_pointer:sequence_pointer+gap_queue[0]] = '-'
                    sequence_pointer += gap_queue[0]
                    gap_queue = np.delete(gap_queue, 0)

                # We alternate between contigs and gaps
                iscontig = not iscontig

        return sequences

    # -------------------------------------------------------------------------
    def predict_sequence(self, seq, predict_only_gaps):

        scaffold = self.preprocess_sequences([seq])
        pred = self.autoencoder.predict(scaffold).reshape(self.max_seq_length, len(self.classes))

        # Set the probability of empty "-" to zero, since we always want to predict something
        emptyclass = np.where(self.ohe.transform(np.array("-").reshape(-1, 1))[0])[0][0]
        pred[:, emptyclass] = 0.0

        # Convert the probability distribution to a one-hot encoded vector
        mask = pred == np.amax(pred, axis=1).reshape(pred.shape[0], 1)
        indices = list((i, np.where(mask[i])[0][0]) for i in range(mask.shape[0]))

        pred = np.zeros(pred.shape)
        for i in indices:
            pred[i] = 1

        # Then we can use our one hot encoder to convert back to the original sequence of classes
        pred = self.ohe.inverse_transform(pred[:len(seq), :len(self.classes)]).reshape(len(seq))

        if predict_only_gaps:
            # We only care about predicting the gaps in seq, so replace amino acids in prediction with original nongaps
            nongaps = np.where(seq != '-')[0]
            pred[nongaps] = seq[nongaps]

        return pred

    # =============================================================================
    def preprocess_sequences(self, seqs: List[np.array]) -> np.array:

        # the value -1 lets numpy know to infer the shape. So it's just a column vector of length num_samples
        seqs = [np.array(seq).reshape(-1, 1) for seq in seqs]

        # One-hot encode each sequence
        seqs = [self.ohe.fit_transform(seq) for seq in seqs]

        if self.paddingtype == PaddingType.ZERO:
            # The sequences may have different lengths, so we will pad them with zeros
            # We are padding to fill up to max length, then we can turn it into a single numpy tensor
            return np.array([np.pad(seq, ((0, self.max_seq_length-len(seq)), (0, 0))) for seq in seqs])

        elif self.paddingtype == PaddingType.EMPTY:

            # We pad with the "empty" class
            emptyclass = self.ohe.fit_transform(np.array("-").reshape(-1, 1))[0]

            # dynamically extend each seq by enough emptyvals to make a single sequence length
            return np.array([np.vstack((seq, *(emptyclass for _ in range(self.max_seq_length-len(seq))))) for seq in seqs])

        elif self.paddingtype == PaddingType.TRUNCATE:
            # FIXME: Implement me
            raise Exception("Have not implemented Padding Type TRUNCATE!")

    # =============================================================================
    def train(self, verbose="auto", optimizer="adam", epochs=None):
        self.optimizer = optimizer
        if epochs:
            self.epochs = epochs

        self.autoencoder.compile(optimizer=self.optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
        callbacks = []
        if self.early_stopping:
            callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10))
        self.history = self.autoencoder.fit(self.train_x, self.train_y, epochs=self.epochs, validation_split=0.15, verbose=verbose, callbacks=callbacks)
        print(self.autoencoder.summary())
        
# =============================================================================
def highlight_indices(seq: np.array, indices: np.array, color: str):
    # We use deepcopy to prevent mutation and we cast to object so that we can treat contents as python strings
    # otherwise, it gets messed up as it treats each element as a single character
    newseq = deepcopy(seq).astype('object')
    newseq[indices] = np.vectorize(lambda x: colored(x, color, attrs=["bold"]))(seq[indices])
    return newseq

# =============================================================================
def print_sequence(seq, 
                   header: str=None, 
                   incorrect_indices: Optional[np.array]=None, 
                   correct_indices: Optional[np.array]=None):

    newseq = deepcopy(seq)
    if correct_indices is not None and correct_indices.size != 0:
        newseq = highlight_indices(newseq, correct_indices, "green")
    if incorrect_indices is not None and incorrect_indices.size != 0:
        newseq = highlight_indices(newseq, incorrect_indices, "red")

    line_length = 40
    if header:
        print(header)
    print("=" * line_length)

    i = 0
    while i < len(newseq):
        print(" ".join(newseq[i: i+line_length]))
        i += line_length

# =============================================================================
def get_sequences(fasta_file: str) -> List[np.array]:
    sequences = []
    lines = []
    with open(fasta_file, "r") as input_file:
        lines = list(filter(None, input_file.read().split("\n")))

    parts = []
    for line in lines:
        if line.startswith(">"):
            if parts:
                sequences.append(np.array([c for c in "".join(parts)]))
            parts = []
        else:
            parts.append(line)
    if parts:
        sequences.append(np.array([c for c in "".join(parts)]))
    return sequences

# =============================================================================
def snake_case_prettify(s):
    return " ".join(w.capitalize() for w in s.split("_"))

# =============================================================================
def save_models(models):
    modeldir = os.path.join(os.getcwd(), "1Dmodels")
    if not os.path.exists(modeldir):
        os.makedirs(modeldir)

    for name, model in models.items():
        model.save(os.path.join(modeldir, name))

# =============================================================================
def load_models():
    modeldir = os.path.join(os.getcwd(), "1Dmodels")
    if os.path.exists(modeldir):
        return {f: tf.keras.models.load_model(os.path.join(modeldir, f)) for f in os.listdir(modeldir)}
    return {}

# =============================================================================
def print_diff_between_target_and_de_novo():

    dataset_dir = os.path.join(os.getcwd(), "newdatasets")
    
    de_novo_sequence = get_sequences(os.path.join(dataset_dir, "denovo_0.20_6.txt"))[0]
    target_sequence = get_sequences(os.path.join(dataset_dir, "target_sequence.txt"))[0]

    gap_indices = np.where(de_novo_sequence == '-')[0]
    incorrect_indices = np.where(de_novo_sequence != target_sequence)[0]
    correct_indices = np.where(de_novo_sequence == target_sequence)[0]
    incorrect_non_gaps = np.setdiff1d(incorrect_indices, gap_indices)
    print(f"Length of target: {len(target_sequence)}")
    print(f"Number of incorrect non-gaps: {len(incorrect_non_gaps)}")
    print(f"Number of gaps: {len(gap_indices)}")

    print_sequence(de_novo_sequence, "Protein Scaffold", incorrect_indices, correct_indices)

# =============================================================================
def test_original_protein_scaffold():

    ut.mkdir_if_not_exists(RESULTS_PATH)
    ut.mkdir_if_not_exists(DATAPATH)

    epochs = 300
    noise_percent = 0.25

    cwd = os.getcwd()

    # change to the location of the data for reading
    os.chdir(DATAPATH)
    de_novo_sequence = get_sequences("de_novo_sequence.txt")[0]
    target_sequence = get_sequences("target_sequence.txt")[0]
    alltrainingdata = get_sequences("training_sequences.txt")
    metadata = "training_sequences_metadata.csv"

    # then change to the results directory for writing
    os.chdir(RESULTS_PATH)

    sequences_to_train_on = [1000]
    # sequences_to_train_on = [100, 500, 1000, 2000]

    trainfilename = "cda_breakdown_breakdown_of_numtrain.csv"
    headers = ["Num Training Instances", "Full Accuracy", "Gap Accuracy", "Nongap Accuracy"]
    with open(trainfilename, "w+", encoding="utf-8", newline="") as f:
        train_writer = csv.writer(f)
        train_writer.writerow(headers)

    train_file = open(trainfilename, "a+", encoding="utf-8", newline="")
    train_writer = csv.writer(train_file)

    for numtrain in sequences_to_train_on:

        trainingdata = alltrainingdata[:numtrain]
        random.shuffle(trainingdata)

        incorrect_indices = np.where(de_novo_sequence != target_sequence)[0]
        correct_indices = np.where(de_novo_sequence == target_sequence)[0]

        fixer = ProteinScaffoldFixer(trainingdata, noise_percent=noise_percent, epochs=epochs)
        weights = fixer.train(verbose=1)

        hist = fixer.history.history
        training_loss = hist["loss"]
        validation_loss = hist["val_loss"]
        training_acc = hist["accuracy"]
        validation_acc = hist["val_accuracy"]

        headers = ["Training Accuracy", "Validation Accuracy", "Training Loss", "Validation Loss"]
        with open(f"autoencoder_results_on_original_{numtrain}.csv", "w", encoding="utf-8", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(headers)
            writer.writerows(zip(training_acc, validation_acc, training_loss, validation_loss))

        predict_only_gaps = False
        pred = fixer.predict_sequence(de_novo_sequence, predict_only_gaps)

        incorrect_indices = np.where(target_sequence != pred)[0]
        correct_indices = np.where(target_sequence == pred)[0]
        indices_to_predict = np.where(de_novo_sequence != target_sequence)[0]
        correct_indices_to_predict = np.intersect1d(correct_indices, indices_to_predict)

        gap_indices = np.where(de_novo_sequence == "-")[0]
        error_indices = np.setdiff1d(indices_to_predict, gap_indices)

        correct_gap_indices = np.intersect1d(gap_indices, correct_indices)
        correct_error_indices = np.intersect1d(error_indices, correct_indices)

        gap_acc = len(correct_gap_indices) / len(gap_indices)
        err_acc = len(correct_error_indices) / len(error_indices)
        full_acc = len(np.where(target_sequence == pred)[0]) / len(target_sequence)

        # print to console the prediction
        print_sequence(pred, f"Predicted for {numtrain}", incorrect_indices, correct_indices_to_predict)

        # write the final accuracies according to the number of training instances
        with open(trainfilename, "a+", encoding="utf-8", newline="") as f:
            train_writer = csv.writer(f)
            train_writer.writerow([numtrain, full_acc, gap_acc, err_acc])

        # and save the predictions as images for use in the paper
        ut.write_protein_scaffold_image(pred, incorrect_indices, correct_indices_to_predict, f"cda_predictions_on_original_{numtrain}")
    
    
    train_file.close()
    os.chdir(cwd)
    return weights

# =============================================================================
def test_generated_datasets():

    ut.mkdir_if_not_exists(RESULTS_PATH)
    ut.mkdir_if_not_exists(DATAPATH)

    epochs = 200
    noise_percent = 0.25

    cwd = os.getcwd()

    os.chdir(DATAPATH)
    target_sequence = get_sequences("target_sequence.txt")[0]

    sequences_to_train_on = np.array([1000])
    # sequences_to_train_on = np.array([100, 500, 1000, 2000])
    percent_missings = np.array([0.20, 0.30, 0.40])
    num_gaps = np.array([4, 6, 8, 10])

    resultsfilename = "cda_generated_accuracies.csv"

    os.chdir(RESULTS_PATH)
    with open(resultsfilename, "w", encoding="utf-8", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["Percent Missing", "Num Gaps", "Training Instances", "Full Accuracy", "Gap Accuracy", "Non-gap Accuracy", "Most Similar Reference", "Least Similar Reference"])

    for percent_missing in percent_missings:
        for num_gap in num_gaps:

            os.chdir(DATAPATH)

            scaffold_sequence = get_sequences(f"denovo_{percent_missing:.2f}_{num_gap}.txt")[0]
            alltrainingdata = get_sequences(f"training_{percent_missing:.2f}_{num_gap}.txt")
            metadata = []

            with open(f"training_{percent_missing:.2f}_{num_gap}_metadata.csv", "r") as f:
                metadata = list(csv.reader(f))

            for num_train in sequences_to_train_on:

                most_similar = metadata[0][2]
                least_similar = metadata[num_train-1][2]

                trainingdata = alltrainingdata[:num_train]
                fixer = ProteinScaffoldFixer(trainingdata, noise_percent=noise_percent, epochs=epochs, early_stopping=True)
                fixer.train(verbose=1)

                predict_only_gaps = False
                pred = fixer.predict_sequence(scaffold_sequence, predict_only_gaps)

                incorrect_indices = np.where(target_sequence != pred)[0]
                correct_indices = np.where(target_sequence == pred)[0]

                indices_to_predict = np.where(scaffold_sequence != target_sequence)[0]
                correct_indices_to_predict = np.intersect1d(correct_indices, indices_to_predict)

                gap_indices = np.where(scaffold_sequence == "-")[0]
                error_indices = np.setdiff1d(indices_to_predict, gap_indices)

                correct_gap_indices = np.intersect1d(gap_indices, correct_indices)
                correct_error_indices = np.intersect1d(error_indices, correct_indices)

                gap_acc = len(correct_gap_indices) / len(gap_indices)
                err_acc = len(correct_error_indices) / len(error_indices)
                full_acc = len(np.where(target_sequence == pred)[0]) / len(target_sequence)

                # Print to console
                print(f"Gap Acc for CDA: {gap_acc}")
                print(f"Err Acc for CDA: {err_acc}")
                print(f"Full Acc for CDA: {full_acc}")
                print_sequence(pred, f"Results on gap={num_gap}, {percent_missing}, {num_train}", incorrect_indices, correct_indices_to_predict)

                # And save results 
                os.chdir(RESULTS_PATH)
                with open(resultsfilename, "a+", encoding="utf-8", newline="") as f:
                    writer = csv.writer(f)
                    writer.writerow([percent_missing, num_gap, num_train, full_acc, gap_acc, err_acc, most_similar, least_similar])

                # Also save the images for the paper
                ut.write_protein_scaffold_image(pred, incorrect_indices, correct_indices_to_predict, f"de_novo_{percent_missing:.2f}_{num_gap}_{num_train}")

    os.chdir(cwd)


In [8]:
test_original_protein_scaffold()

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300


Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300


Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300


Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300


Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300
Epoch 238/300
Epoch 239/300
Epoch 240/300
Epoch 241/300
Epoch 242/300
Epoch 243/300
Epoch 244/300
Epoch 245/300
Epoch 246/300
Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300


Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300
Model: "autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder (Encoder)           multiple                  48436     
                                                                 
 conv1d_14 (Conv1D)          multiple                  72160     
                                                                 
 decoder (Decoder)           multiple                  109396    
                                                                 
 conv1d_15 (Conv1D)          multiple                  5544      
                                                                 
Total params: 235,536
Trainable params: 235,536
Non-trainable params: 0
_____________________________________

In [3]:
test_generated_datasets()

Epoch 1/200


2023-06-10 09:51:43.405748: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-10 09:51:45.097786: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14759 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:19:00.0, compute capability: 7.5
2023-06-10 09:51:45.098456: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 14759 MB memory:  -> device: 1, name: Quadro RTX 5000, pci bus id: 0000:1a:00.0, compute capability: 7.5
2023-06-10 09:51:45.098924: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/tas

Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Gap Acc for CDA: 1.0
Err Acc for CDA: 0.9411764705882353
Full Acc for CDA: 0.9299065420560748
Results on gap=6, 0.2, 1000
D I Q M T Q S P S S L S A S V G [1m[32mD[0m [1m[32mR[0m [1m[32mV[0m [1m[32mT[0m [1m[32mI[0m [1m[32mT[0m [1m[32mC[0m [1m[31mR[0m A S [1m[32mQ[0m [1m[31mD[0m I [1m[31mS[0m [1m[31mN[0m Y L N [1m[32mW[0m Y Q Q [1m[32mK[0m P
G K A P K L L I Y [1m[31mD[0m [1m[31mA[0m [1m[31mS[0m N L [1m[31mE[0m [1m[32mT[0m [1m[32mG[0m [1m[32mV[0m P S R F S G S [1m[32mG[0m [1m[32mS[0m [1m[32mG[0m [1m[32mT[0m [1m[32mD[0m [1m[32mF[0m T [1m[31mL[0m T I S [1m[32mS[0m [1m[32mL[0m [1m[32mQ[0m [1m[32mP[0m
E D I A T Y Y C [1m[31mQ[0m Q [1

Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 1

Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Gap Acc for CDA: 0.72
Err Acc for CDA: 1.0
Full Acc for CDA: 0.9345794392523364
Results on gap=8, 0.2, 1000
D I Q M T Q S P [1m[32mS[0m [1m[32mS[0m L S A S V [1m[32mG[0m [1m[32mD[0m R [1m[32mV[0m T I T C [1m[31mR[0m [1m[32mA[0m S Q [1m[31mD[0m I [1m[31mS[0m [1m[31mN[0m Y [1m[32mL[0m N W Y Q Q K P
G K [1m[32mA[0m P K L L I Y [1m[31mD[0m [1m[31mA[0m [1m[31mS[0m [1m[32mN[0m [1m[32mL[0m [1m[31mE[0m [1m[31mS[0m G [1m[32mV[0m [1m[32mP[0m [1m[32mS[0m R F S G S G S [1m[32mG[0m [1m[32mT[0m D F T F [1m[32mT[0m

Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Ep

Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200

Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Gap Acc for CDA: 0.8157894736842105
Err Acc for CDA: 0.8846153846153846
Full Acc for CDA: 0.9205607476635514
Results on gap=4, 0.3, 1000
D I Q M T [1m[32mQ[0m [1m[32mS[0m P S S L [1m[32mS[0m A S V G D R V T I T C [1m[31mR[0m A [1m[32mS[0m Q [1m[31mD[0m I [1m[31mS[0m K Y L N W Y Q Q K P
G [1m[32mK[0m A [1m[32mP[0m [1m[32mK[0m [1m[32mL[0m [1m[32mL[0m [1m[32mI[0m [1m[32mY[0m [1m[31mG[0m [1m[31mA[0m [1m[31mS[0m [1m[31mS[0m [1m[32mL[0m [1m[31mG[0m T G V [1m[32mP[0m S [1m[32mR[0m F S G [1m[32mS[0m G [1m[32mS[0m G T D F T [1m[31mL[0m T I S S L Q P
E D [1m[31mF[0m [1m[32mA[0m T Y Y C [1m[31mQ[0m Q [1m[31mY[0m [1m[31mN[0m S [1m[31mY[0m [1m[32mP[0m [1m[31mW[0m T [

Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200


Gap Acc for CDA: 1.0
Err Acc for CDA: 0.9230769230769231
Full Acc for CDA: 0.9392523364485982
Results on gap=6, 0.3, 1000
D I Q M T Q [1m[32mS[0m P S S L [1m[32mS[0m A [1m[32mS[0m V G D R [1m[32mV[0m T I T C [1m[31mQ[0m A S Q [1m[31mD[0m I [1m[31mN[0m K Y L N W Y [1m[32mQ[0m [1m[32mQ[0m [1m[31mE[0m [1m[32mP[0m
G K A P [1m[32mK[0m L L [1m[32mI[0m Y [1m[31mD[0m [1m[31mA[0m [1m[31mS[0m N L [1m[31mE[0m T G V P S R F S G S G S G T D F T F [1m[32mT[0m [1m[32mI[0m [1m[32mS[0m [1m[32mS[0m [1m[32mL[0m [1m[32mQ[0m [1m[32mP[0m
[1m[32mE[0m D I A T Y Y C [1m[31mQ[0m Q [1m[31mY[0m [1m[31mD[0m [1m[31mN[0m [1m[31mL[0m P R T F [1m[32mG[0m [1m[32mQ[0m [1m[32mG[0m [1m[32mT[0m [1m[32mK[0m [1m[32mV[0m [1m[32mE[0m [1m[32mI[0m K R [1m[32mT[0m V A A P S V F [1m[32mI[0m [1m[32mF[0m P P
S D E Q L K S G T A S V [1m[32mV[0m C [1m[32mL[0m [1m[32mL[0m N N [1m[32mF[0m Y P [1m[32mR

Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Ep

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200


Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Gap Acc for CDA: 0.27450980392156865
Err Acc for CDA: 0.7941176470588235
Full Acc for CDA: 0.7616822429906542
Results on gap=4, 0.4, 1000
[1m[32mD[0m [1m[32mI[0m [1m[32mQ[0m [1m[32mM[0m [1m[32mT[0m Q S P S S L S A S V G D [1m[32mR[0m V [1m[32mT[0m I T C [1m[31mR[0m [1m[32mA[0m S Q N I [1m[31mS[0m [1m[31mS[0m Y L [1m[32mN[0m W Y Q Q [1m[32mK[0m [1m[32mP[0m
G K A P [1m[32mK[0m L L [1m[32mI[0m Y [1m[31mY[0m T [1m[31mS[0m N L Q [1m[31mG[0m G [1m[32mV[0m [1m[32mP[0m [1m[32mS[0m R [1m[32mF[0m [1m[32mS[0m [1m[32mG[0m [1m[32mS[0m [1m[32mG[0m [1m[32mS[0m [1m[32mG[0m T D F T [1m[31mT[0m T I S [1m[32mS[0m L [1m[32mQ[0m P
E [1m[32mD[0m I A T Y Y C [1m[31mQ[0m [1m[32mQ[0m [1

Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200


Gap Acc for CDA: 0.19607843137254902
Err Acc for CDA: 0.6764705882352942
Full Acc for CDA: 0.5794392523364486
Results on gap=6, 0.4, 1000
D I [1m[32mQ[0m M T Q S P S S L S A S V [1m[32mG[0m D R V T I [1m[32mT[0m C [1m[31mQ[0m A S [1m[32mQ[0m [1m[31mD[0m [1m[32mI[0m [1m[31mS[0m [1m[31mN[0m [1m[32mY[0m L N W Y [1m[32mQ[0m Q [1m[32mK[0m P
G K A P [1m[32mK[0m [1m[32mL[0m [1m[32mL[0m I Y [1m[31mD[0m [1m[31mA[0m [1m[31mS[0m N L [1m[31mE[0m [1m[31mP[0m G V [1m[32mP[0m S R [1m[32mF[0m S G S G [1m[32mS[0m G T [1m[32mD[0m F [1m[32mT[0m [1m[31mL[0m [1m[32mT[0m I S S L [1m[32mQ[0m P
[1m[32mE[0m D [1m[31mF[0m A T Y Y [1m[32mC[0m [1m[31mQ[0m Q [1m[31mY[0m [1m[31mN[0m S [1m[31mY[0m P [1m[31mY[0m [1m[32mT[0m [1m[32mF[0m G Q [1m[32mG[0m [1m[32mT[0m [1m[32mK[0m [1m[31mL[0m E I K R T [1m[32mV[0m A [1m[32mA[0m P S V F I F P P
S [1m[32mD[0m E [1m[32mQ[0m L [1m[32mK[0m

Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200


Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Gap Acc for CDA: 0.3137254901960784
Err Acc for CDA: 0.6470588235294118
Full Acc for CDA: 0.6542056074766355
Results on gap=8, 0.4, 1000
D I Q M T [1m[32mQ[0m S [1m[32mP[0m S S L S A S V G [1m[32mD[0m R V [1m[32mT[0m I T C [1m[31mQ[0m [1m[32mA[0m [1m[32mS[0m [1m[32mQ[0m [1m[31mD[0m [1m[32mI[0m [1m[31mS[0m [1m[31mN[0m [1m[32mY[0m L [1m[31mA[0m W Y Q Q K P
[1m[32mG[0m [1m[32mK[0m A P K L L I [1m[32mY[0m [1m[31mD[0m [1m[31mA[0m [1m[31mS[0m N [1m[32mL[0m [1m[31mE[0m T G V P [1m[32mS[0m R F S [1m[32mG[0m [1m[32mS[0m G S G T D F T F [1m[32mT[0m [1m[32mI[0m S [1m[32mS[0m L Q P
E D [1m[31mF[0m A T Y Y C L Q [1m[32mH[0m [1m[31mN[0m S [1m[31mY[0m P R T [1m[32mF[0m [1m[32mG[0m Q G [1m[32mT[0m K V [1m[31mD[0m I K [1m[32mR[0m [1m[32mT[0m [1m[31mA[0m [1m[32mA[0m [1m[32mA[0m [1m[31mA[0m [1m[31mA[0m [1m[

Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200


Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
Gap Acc for CDA: 0.6862745098039216
Err Acc for CDA: 0.8529411764705882
Full Acc for CDA: 0.8551401869158879
Results on gap=10, 0.4, 1000
D I [1m[32mQ[0m M T Q S P S [1m[32mS[0m [1m[32mL[0m [1m[32mS[0m [1m[32mA[0m [1m[32mS[0m V G [1m[32mD[0m [1m[32mR[0m [1m[32mV[0m [1m[32mT[0m [1m[32mI[0m T C 

In [3]:
test_original_protein_scaffold()

2023-06-11 16:48:48.242015: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-11 16:48:49.934996: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14759 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:19:00.0, compute capability: 7.5
2023-06-11 16:48:49.935610: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 14759 MB memory:  -> device: 1, name: Quadro RTX 5000, pci bus id: 0000:1a:00.0, compute capability: 7.5
2023-06-11 16:48:49.936062: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/tas

Epoch 1/10


2023-06-11 16:48:52.473834: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2023-06-11 16:48:53.752564: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7f2b61f68f10 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-06-11 16:48:53.752586: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Quadro RTX 5000, Compute Capability 7.5
2023-06-11 16:48:53.752590: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (1): Quadro RTX 5000, Compute Capability 7.5
2023-06-11 16:48:53.752593: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (2): Quadro RTX 5000, Compute Capability 7.5
2023-06-11 16:48:53.752596: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (3): Quadro RTX 5000, Compute Capability 7.5
2023-06-11 16:48:53.756227: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabl

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder (Encoder)           multiple                  48436     
                                                                 
 conv1d_2 (Conv1D)           multiple                  72160     
                                                                 
 decoder (Decoder)           multiple                  109396    
                                                                 
 conv1d_3 (Conv1D)           multiple                  5544      
                                                                 
Total params: 235,536
Trainable params: 235,536
Non-trainable params: 0
_________________________________________________________________
None
Predicted for 1000
[1m[32mD[0m [1m[32mI[0m [1m[32mQ[0m M T Q S P S 

In [5]:
print(len(weights))

12


In [6]:
for i in weights:
    print(i.shape)

(10, 24, 46)
(46,)
(9, 46, 90)
(90,)
(5, 90, 160)
(160,)
(5, 90, 160)
(90,)
(9, 46, 90)
(46,)
(5, 46, 24)
(24,)


In [7]:
def calculate_conv1d_output(input_size, padding, strides, kernel_size):
    if padding == 'same':
        output_size = input_size
    else:
        output_size = input_size - kernel_size + 1
    output_size = (output_size - 1) // strides + 1
    return output_size


def calculate_maxpool1d_output(input_size, pool_size, strides):
    output_size = (input_size - pool_size) // strides + 1
    return output_size

In [8]:
calculate_conv1d_output((214*1),'same',1,5)

214

In [19]:
calculate_maxpool1d_output((214*1),,2)

100

In [34]:
def calculate_conv_output(input_size, filter_size, padding, stride=1):
    output_size = (((input_size + 2*padding - filter_size) / stride) + 1) 
    return int(output_size)

In [36]:
calculate_conv_output((224*1),5,0,1)

220

In [None]:
 (((n+2p-f)/s)+1)*(((n+2p-f)/s)+1)

In [17]:
p = (32-1)/2
p

15.5

In [5]:
string_mine = 'DIQRTQSPSSLSASVGOORVTTTCRASQN 1 DK ELNWEQQKPGRAPKLL J ENTRALQTGVPSRF SGSGSCTDF TF TISSLOPE DI ATYYCL QHISRPRTF GQGTKVE IKRTVAAPSVF IF PPSOEQLKSGTASVVCL L NNF YPREAKVQWKVDNAL QSGNSQESVTEQOSKDSTYSL SSTL TL SKADYEKHKVYACEVTHQGL SSPVTK SF NRGEC'


In [6]:
string_mine = string_mine.replace(' ','')
string_mine

'DIQRTQSPSSLSASVGOORVTTTCRASQN1DKELNWEQQKPGRAPKLLJENTRALQTGVPSRFSGSGSCTDFTFTISSLOPEDIATYYCLQHISRPRTFGQGTKVEIKRTVAAPSVFIFPPSOEQLKSGTASVVCLLNNFYPREAKVQWKVDNALQSGNSQESVTEQOSKDSTYSLSSTLTLSKADYEKHKVYACEVTHQGLSSPVTKSFNRGEC'