In [20]:
from math import exp
from random import random

class Layer:
    def __init__(self, numberOfNeurons):
        self.numberOfNeurons = numberOfNeurons
        self.biases = []
        i = 0
        while i < self.numberOfNeurons:
            self.biases.append(random() * 2 - 1)
            i += 1
    def activation(self, x):
        return (1.0 / (1.0 + exp(-1 * x)))
    def activationPrime(self, x):
        return self.activation(x) * self.activation(-1 * x)
    def forwardPropagate(self, inputVector):
        self.mostRecentInput = inputVector
        outputVector = []
        i = 0
        while i < self.numberOfNeurons:
            outputVector.append(self.activation(inputVector[i] - self.biases[i]))
            i += 1
        self.mostRecentOutput = outputVector
        return outputVector
    def backPropagate(self, inputVector):
        outputVector = []
        i = 0
        while i < self.numberOfNeurons:
            outputVector.append(inputVector[i] * self.activationPrime(self.mostRecentInput[i]))
            i += 1
        self.errors = outputVector
        return outputVector
    def gradientDescent(self, learningRate):
        i = 0
        while i < self.numberOfNeurons:
            self.biases[i] -= self.errors[i] * learningRate
            i += 1

class WeightMatrix:
    def __init__(self, numberOfInputNeurons, numberOfOutputNeurons):
        self.numberOfInputNeurons = numberOfInputNeurons
        self.numberOfOutputNeurons = numberOfOutputNeurons
        self.weights = []
        y = 0
        while y < self.numberOfInputNeurons:
            self.weights.append([])
            x = 0
            while x < self.numberOfOutputNeurons:
                self.weights[y].append(random() * 2 - 1)
                x += 1
            y += 1
    def forwardPropagate(self, inputVector):
        outputVector = []
        x = 0
        while x < self.numberOfOutputNeurons:
            outputVector.append(0)
            y = 0
            while y < self.numberOfInputNeurons:
                outputVector[x] += self.weights[y][x] * inputVector[y]
                y += 1
            x += 1
        return outputVector
    def backPropagate(self, inputVector):
        outputVector = []
        y = 0
        while y < self.numberOfInputNeurons:
            outputVector.append(0)
            x = 0
            while x < self.numberOfOutputNeurons:
                outputVector[y] += self.weights[y][x] * inputVector[x]
                x += 1
            y += 1
        return outputVector
    def calculateGradients(self, outputOfInputLayer, errorsOfOutputLayer):
        self.gradients = []
        y = 0
        while y < self.numberOfInputNeurons:
            self.gradients.append([])
            x = 0
            while x < self.numberOfOutputNeurons:
                self.gradients[y].append(outputOfInputLayer[y] * errorsOfOutputLayer[x])
                x += 1
            y += 1
    def gradientDescent(self, learningRate):
        y = 0
        while y < self.numberOfInputNeurons:
            x = 0
            while x < self.numberOfOutputNeurons:
                self.weights[y][x] -= self.gradients[y][x] * learningRate
                x += 1
            y += 1

class NeuralNetwork:
    def __init__(self, numberOfNeuronsInFirstLayer):
        self.layers = [Layer(numberOfNeuronsInFirstLayer)]
        self.weightMatrices = []
    def addLayer(self, numberOfNeurons):
        self.weightMatrices.append(WeightMatrix(self.layers[-1].numberOfNeurons, numberOfNeurons))
        self.layers.append(Layer(numberOfNeurons))
    def forwardPropagate(self, inputVector):
        currentOutput = self.layers[0].forwardPropagate(inputVector)
        i = 0
        while i < len(self.weightMatrices):
            currentOutput = self.weightMatrices[i].forwardPropagate(currentOutput)
            currentOutput = self.layers[i + 1].forwardPropagate(currentOutput)
            i += 1
        return currentOutput
    def backPropagate(self, expectedOutput):
        inputVector = []
        i = 0
        while i < self.layers[-1].numberOfNeurons:
            inputVector.append(expectedOutput[i] - self.layers[-1].mostRecentOutput[i])
            i += 1
        currentOutput = self.layers[-1].backPropagate(inputVector)
        i = len(self.weightMatrices) - 1
        while i >= 0:
            currentOutput = self.weightMatrices[i].backPropagate(currentOutput)
            currentOutput = self.layers[i].backPropagate(currentOutput)
            i -= 1
        i = 0
        while i < len(self.weightMatrices):
            self.weightMatrices[i].calculateGradients(self.layers[i].mostRecentOutput, self.layers[i + 1].errors)
            i += 1
    def gradientDescent(self, learningRate):
        i = 0
        while i < len(self.layers):
            self.layers[i].gradientDescent(learningRate)
            i += 1
        i = 0
        while i < len(self.weightMatrices):
            self.weightMatrices[i].gradientDescent(learningRate)
            i += 1

'''
myNeuralNet = NeuralNetwork(1)
myNeuralNet.addLayer(4)
myNeuralNet.addLayer(3)

i = 0
while i < 10000:
    myNeuralNet.forwardPropagate([1])
    myNeuralNet.backPropagate([1, 0, 1])
    myNeuralNet.gradientDescent(0.01)
    i += 1

print myNeuralNet.forwardPropagate([1])
'''

'\nmyNeuralNet = NeuralNetwork(1)\nmyNeuralNet.addLayer(4)\nmyNeuralNet.addLayer(3)\n\ni = 0\nwhile i < 10000:\n    myNeuralNet.forwardPropagate([1])\n    myNeuralNet.backPropagate([1, 0, 1])\n    myNeuralNet.gradientDescent(0.01)\n    i += 1\n\nprint myNeuralNet.forwardPropagate([1])\n'

In [21]:
POSSIBLE_AMINO_ACIDS = "ACDEFGHIKLMNPQRSTVWY*"
POSSIBLE_SECONDARY_STRUCTURES = "CHE"

AMINO_ACID_VECTOR_SIZE = len(POSSIBLE_AMINO_ACIDS)
SECONDARY_STRUCTURE_VECTOR_SIZE = len(POSSIBLE_SECONDARY_STRUCTURES)

MAX_AMINO_ACID_COUNT = 20
MAX_SECONDARY_STRUCTURE_COUNT = 10

INPUT_SIZE = AMINO_ACID_VECTOR_SIZE * MAX_AMINO_ACID_COUNT
OUTPUT_SIZE = SECONDARY_STRUCTURE_VECTOR_SIZE * MAX_SECONDARY_STRUCTURE_COUNT

def aminoAcidToVector(aminoAcid):
    vector = [0] * AMINO_ACID_VECTOR_SIZE
    vector[POSSIBLE_AMINO_ACIDS.index(aminoAcid)] = 1
    return vector

def aminoAcidSequenceToVector(aminoAcidSequence):
    if len(aminoAcidSequence) > MAX_AMINO_ACID_COUNT:
        raise IndexError("Too many amino acids.")
    vector = []
    for aminoAcid in aminoAcidSequence:
        vector += aminoAcidToVector(aminoAcid)
    while len(vector) < INPUT_SIZE:
        vector.append(0)
    return vector

def secondaryStructureToVector(secondaryStructure):
    vector = [0] * SECONDARY_STRUCTURE_VECTOR_SIZE
    vector[POSSIBLE_SECONDARY_STRUCTURES.index(secondaryStructure)] = 1
    return vector

def secondaryStructureSequenceToVector(secondaryStructureSequence):
    if len(secondaryStructureSequence) > MAX_SECONDARY_STRUCTURE_COUNT:
        raise IndexError("Too many secondary structures.")
    vector = []
    for secondaryStructure in secondaryStructureSequence:
        vector += secondaryStructureToVector(secondaryStructure)
    while len(vector) < OUTPUT_SIZE:
        vector.append(0)
    return vector

def vectorToSecondaryStructureSequence(vector):
    secondaryStructureSequence = ""
    i = 0
    while i < MAX_SECONDARY_STRUCTURE_COUNT:
        oneHotVector = vector[i*SECONDARY_STRUCTURE_VECTOR_SIZE:(i+1)*SECONDARY_STRUCTURE_VECTOR_SIZE]
        recordIndex = -1
        record = 0
        j = 0
        while j < SECONDARY_STRUCTURE_VECTOR_SIZE:
            if oneHotVector[j] >= record:
                recordIndex = j
                record = oneHotVector[j]
            j += 1
        if record >= 0.5:
            secondaryStructureSequence += POSSIBLE_SECONDARY_STRUCTURES[recordIndex]
        i += 1
    return secondaryStructureSequence

In [26]:
import csv

dataset = open("2018-06-06-ss.cleaned.csv", "rb")
dataset = csv.reader(dataset)
headers = dataset.next()
print headers

myNeuralNet = NeuralNetwork(INPUT_SIZE)
myNeuralNet.addLayer(100)
myNeuralNet.addLayer(OUTPUT_SIZE)

for row in dataset:
    aminoAcidSequence = row[2]
    secondaryStructureSequence = row[4]
    if (len(aminoAcidSequence) <= MAX_AMINO_ACID_COUNT) and (len(secondaryStructureSequence) <= MAX_SECONDARY_STRUCTURE_COUNT):
        myNeuralNet.forwardPropagate(aminoAcidSequenceToVector(aminoAcidSequence))
        myNeuralNet.backPropagate(secondaryStructureSequenceToVector(secondaryStructureSequence))
        myNeuralNet.gradientDescent(0.01)

['pdb_id', 'chain_code', 'seq', 'sst8', 'sst3', 'len', 'has_nonstd_aa']


KeyboardInterrupt: 