In [44]:
#FINAL CODE
# !/usr/bin/env python3
import sys
from copy import deepcopy
import time

start_time = time.time()


########################################################################
# File:problem17.py
#  executable: problem17.py
#  purpose: Find a Cyclic Peptide with Theoretical Spectrum Matching an Ideal Spectrum
#
# Author: Arushi Mithal
#
#
# Notes:  1. To run the program from command line terminals:
#          Unix/Windows: python  problem17.py < input.txt > output.out
#
# Laptop, where test were running, specs:
#        Windows 10-64bit. Processor i-5 5200U CPU @2.20GHz 2.20 GHz
#        Internal RAM  4.00 GB
########################################################################

class CyclicPeptide:
    """
        Used to find a Cyclic Peptide with Theoretical Spectrum Matching an Ideal Spectrum

        Compare the input spectrum masses with the amino acid masses dictionary, and find the ones that are present
        in the spectrum. Make a list of these 1-mer peptides, then, expand and trim these peptides till you find
        the peptides that have a mass spectrum consistent with the input mass spectrum. Return the peptide strings
        with consistent mass spectrum.

        use commandline: python  problem17.py < input.txt > output.out

    """

    def __init__(self, spectrum):
        """  CyclicPeptide Constructor  """
        self.spectrum = spectrum
        self.geneticCode = {}

    def cycloPeptideSequencing(self):
        """ return every Peptide string for which its cyclospectrum is similar to input Spectrum """
        aminoAcid = {'G': 57, 'A': 71, 'S': 87, 'P': 97, 'V': 99, 'T': 101, 'C': 103, 'I': 113, 'N': 114,
                     'D': 115, 'K': 128, 'E': 129, 'M': 131, 'H': 137, 'F': 147, 'R': 156, 'Y': 163, 'W': 186}
        peptides = {"": 0}
        parentMass = max(self.spectrum)
        #print(parentMass)
        for i in range(len(self.spectrum)):
            for k, v in aminoAcid.items():
                if self.spectrum[i] == v:
                    self.geneticCode[k] = v
        output = []
        #start_time = time.time()
        while len(peptides) != 0:
            #print("inside while", peptides)
            peptides = self.expand(peptides)
            temp1 = deepcopy(peptides)
            for t, mass in temp1.items():
                if mass == parentMass:
                    if self.cyclospectrum(t) == self.spectrum:
                        output.append(t)
                    del peptides[t]
                elif mass not in self.spectrum:
                    del peptides[t]
        #print("%s seconds " % (time.time() - start_time))
        outputPeptide = []
        for pep in output:
            temp = ""
            for p in pep:
                temp += (str(aminoAcid[p]) + '-')
            outputPeptide.append(temp[:-1])
            
            print(temp[:-1])
        print(len(outputPeptide))
        #print("%s seconds " % (time.time() - start_time))
    
    
        return outputPeptide

    def expand(self, peptides):
        """ return a list of expanded peptides """
        expand = {}
        for peptide, mass in peptides.items():
            for aa in self.geneticCode:
                expand[peptide + aa] = mass + self.geneticCode[aa]
        return expand

    def cyclospectrum(self, peptide):
        """ return the collection of all of the masses of the sub-peptides of the input peptide """
        prefixMass = [0]
        for i in range(len(peptide)):
            for k, v in self.geneticCode.items():
                if k == peptide[i]:
                    prefixMass.append(prefixMass[i] + v)
        peptideMass = prefixMass[-1]
        cyclicSpectrum = [0]
        for i in range(len(peptide)):
            for j in range(i + 1, len(peptide) + 1):
                cyclicSpectrum.append(prefixMass[j] - prefixMass[i])
                if i > 0 and j < len(peptide):
                    cyclicSpectrum.append(peptideMass - (prefixMass[j] - prefixMass[i]))
        #print("sorted cyclic spectrum", sorted(cyclicSpectrum))
        return sorted(cyclicSpectrum)


def main():
    """ Used to execute the program"""
    filename1="rosalind_ba4e_762_2_dataset.txt"
    output_file=sys.argv[2]
    #print("first")
    with open(filename1) as file:
    #with sys.stdin as file:
        input1 = file.readlines()
    spec = input1[0].rstrip('\n').split(' ')
    spectrum = []
    for i in spec:
        spectrum.append(int(i))
    #print(spectrum)
    s1 = CyclicPeptide(spectrum)
    s1.cycloPeptideSequencing()


if __name__ == '__main__':
    main()
print("%s seconds " % (time.time() - start_time))



71-128-103-163-131-87-129-186
71-186-129-87-131-163-103-128
87-129-186-71-128-103-163-131
87-131-163-103-128-71-186-129
103-128-71-186-129-87-131-163
103-163-131-87-129-186-71-128
128-71-186-129-87-131-163-103
128-103-163-131-87-129-186-71
129-87-131-163-103-128-71-186
129-186-71-128-103-163-131-87
131-87-129-186-71-128-103-163
131-163-103-128-71-186-129-87
163-103-128-71-186-129-87-131
163-131-87-129-186-71-128-103
186-71-128-103-163-131-87-129
186-129-87-131-163-103-128-71
16
0.7523815631866455 seconds 


In [1]:
#FINAL WITH 100 LOOPING
# !/usr/bin/env python3
import sys
from copy import deepcopy
import time

start_time = time.time()


########################################################################
# File:problem17.py
#  executable: problem17.py
#  purpose: Find a Cyclic Peptide with Theoretical Spectrum Matching an Ideal Spectrum
#
# Author: Arushi Mithal
#
#
# Notes:  1. To run the program from command line terminals:
#          Unix/Windows: python  problem17.py < input.txt > output.out
#
# Laptop, where test were running, specs:
#        Windows 10-64bit. Processor i-5 5200U CPU @2.20GHz 2.20 GHz
#        Internal RAM  4.00 GB
########################################################################

class CyclicPeptide:
    """
        Used to find a Cyclic Peptide with Theoretical Spectrum Matching an Ideal Spectrum

        Compare the input spectrum masses with the amino acid masses dictionary, and find the ones that are present
        in the spectrum. Make a list of these 1-mer peptides, then, expand and trim these peptides till you find
        the peptides that have a mass spectrum consistent with the input mass spectrum. Return the peptide strings
        with consistent mass spectrum.

        use commandline: python  problem17.py < input.txt > output.out

    """

    def __init__(self, spectrum):
        """  CyclicPeptide Constructor  """
        self.spectrum = spectrum
        self.geneticCode = {}

    def cycloPeptideSequencing(self):
        """ return every Peptide string for which its cyclospectrum is similar to input Spectrum """
        aminoAcid = {'G': 57, 'A': 71, 'S': 87, 'P': 97, 'V': 99, 'T': 101, 'C': 103, 'I': 113, 'N': 114,
                     'D': 115, 'K': 128, 'E': 129, 'M': 131, 'H': 137, 'F': 147, 'R': 156, 'Y': 163, 'W': 186}
        peptides = {"": 0}
        parentMass = max(self.spectrum)
        #print(parentMass)
        for i in range(len(self.spectrum)):
            for k, v in aminoAcid.items():
                if self.spectrum[i] == v:
                    self.geneticCode[k] = v
        output = []
        while len(peptides) != 0:
            #print("inside while", peptides)
            peptides = self.expand(peptides)
            temp1 = deepcopy(peptides)
            for t, mass in temp1.items():
                if mass == parentMass:
                    if self.cyclospectrum(t) == self.spectrum:
                        output.append(t)
                    del peptides[t]
                elif mass not in self.spectrum:
                    del peptides[t]

        outputPeptide = []
        for pep in output:
            temp = ""
            for p in pep:
                temp += (str(aminoAcid[p]) + '-')
            outputPeptide.append(temp[:-1])
            #print(outputPeptide)
        return outputPeptide
            #print(temp[:-1])
        #print(len(outputPeptide))
        #print("%s seconds " % (time.time() - start_time))
    
    
        return outputPeptide

    def expand(self, peptides):
        """ return a list of expanded peptides """
        expand = {}
        for peptide, mass in peptides.items():
            for aa in self.geneticCode:
                expand[peptide + aa] = mass + self.geneticCode[aa]
        return expand

    def cyclospectrum(self, peptide):
        """ return the collection of all of the masses of the sub-peptides of the input peptide """
        prefixMass = [0]
        for i in range(len(peptide)):
            for k, v in self.geneticCode.items():
                if k == peptide[i]:
                    prefixMass.append(prefixMass[i] + v)
        peptideMass = prefixMass[-1]
        cyclicSpectrum = [0]
        for i in range(len(peptide)):
            for j in range(i + 1, len(peptide) + 1):
                cyclicSpectrum.append(prefixMass[j] - prefixMass[i])
                if i > 0 and j < len(peptide):
                    cyclicSpectrum.append(peptideMass - (prefixMass[j] - prefixMass[i]))
        #print("sorted cyclic spectrum", sorted(cyclicSpectrum))
        return sorted(cyclicSpectrum)


def main():
    """ Used to execute the program"""
    filename1="rosalind_ba4e_762_24_dataset.txt"
    output_file=sys.argv[2]
    #print("first")
    with open(filename1) as file:
    #with sys.stdin as file:
        input1 = file.readlines()
    spec = input1[0].rstrip('\n').split(' ')
    spectrum = []
    for i in spec:
        spectrum.append(int(i))
    #print(spectrum)
    for i in range(0,100):
        s1 = CyclicPeptide(spectrum)
        o = s1.cycloPeptideSequencing()
        if i == 99:
            for each in o:
                print(each)
            print(len(o))
   

if __name__ == '__main__':
    main()
print("%s seconds " % (time.time() - start_time))

101-113-137-114-129-131-113-103-186-114-131
101-131-114-186-103-113-131-129-114-137-113
103-113-131-129-114-137-113-101-131-114-186
103-186-114-131-101-113-137-114-129-131-113
113-101-131-114-186-103-113-131-129-114-137
113-103-186-114-131-101-113-137-114-129-131
113-131-129-114-137-113-101-131-114-186-103
113-137-114-129-131-113-103-186-114-131-101
114-129-131-113-103-186-114-131-101-113-137
114-131-101-113-137-114-129-131-113-103-186
114-137-113-101-131-114-186-103-113-131-129
114-186-103-113-131-129-114-137-113-101-131
129-114-137-113-101-131-114-186-103-113-131
129-131-113-103-186-114-131-101-113-137-114
131-101-113-137-114-129-131-113-103-186-114
131-113-103-186-114-131-101-113-137-114-129
131-114-186-103-113-131-129-114-137-113-101
131-129-114-137-113-101-131-114-186-103-113
137-113-101-131-114-186-103-113-131-129-114
137-114-129-131-113-103-186-114-131-101-113
186-103-113-131-129-114-137-113-101-131-114
186-114-131-101-113-137-114-129-131-113-103
22
258.82756757736206 seconds 
