In [None]:
import numpy
import matplotlib
import lxml
import pandas
import pyteomics
import csv
import math
import multiprocessing
from pyteomics import mass

In [None]:
pwd

In [None]:
vclh_seq = 'MNHKVHMHHHHHHADEQEEKAKVRTELIQELAQGLGGIEKKNFPTLGDEDLDHTYMTKLLTYLQEREQAENSWRKRLLKGIQDHALDLVPRGSPGLPGPRGEQGPTGPTGPAGPRGLQGLQGLQGERGEQGPTGPAGPRGLQGERGEQGPTGLAGKAGEAGAKGETGPAGPQGPRGEQGPQGLPGKDGEAGAQGRPGKRGKQGQKGEKGEPGTQGAKGDRGETGPVGPRGERGEAGPAGKDGERGPVGPAGKDGQNGQDGLPGKDGKDGQNGKDGLPGKDGKDGQNGKDGLPGKDGKDGQDGKDGLPGKDGKDGLPGKDGKDGQPGKPGKY'

In [None]:
type(vclh_seq)

In [None]:
file = '/Users/user/Documents/fragment_finder/files/VCLH_T-145-DSP-04_input.csv'

In [None]:
def mass_cal(peptide_seq):
    return(round(mass.calculate_mass(peptide_seq, average = True), 1))

In [None]:
mass_cal(vclh_seq)

In [None]:
def import_obs_masses(file_location):
    with open(file_location, "r") as csv_file:
        obs_masses = []
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader)
        for lines in csv_reader:
            obs_masses.append(float(lines[1]))
    return(obs_masses)

In [None]:
def mass_diff(prot_mass, obs_masses):
    mass_diffs = [prot_mass - masses for masses in obs_masses]
    return(int(min(mass_diffs) // 100))

In [None]:
def fragments(prot_seq, obs_masses, mass_diffs, tolerance):

    found = []
    start = 0
    s = int(min(obs_masses)//140)
    e = len(prot_seq)
    for frag in prot_seq:
        for i in range(s, e):
            #print(round(mass.calculate_mass(prot_seq[start:i], average = True), 1))
            for num in obs_masses:
                if math.isclose(round(mass.calculate_mass(prot_seq[start:i], average = True), 1), num, abs_tol = tolerance):
                    if prot_seq[start:i] not in found:
                        found.append(prot_seq[start:i]) 
                        found.append(round(mass.calculate_mass(prot_seq[start:i], average = True), 1))
        s += 1
        e += 1
        start += 1
    print(found)

In [None]:
whole_prot_mass = mass_cal(vclh_seq)
observed_masses = import_obs_masses(file)
mass_differences = mass_diff(whole_prot_mass, observed_masses)

In [None]:
trial = (vclh_seq, observed_masses[0], mass_differences, 0.5)

In [None]:
print(mass.calculate_mass(vclh_seq[0:4], average = True))

In [None]:
mass.calculate_mass(vclh_seq[0:4])

In [None]:
%%time
fragments(vclh_seq, observed_masses[0:2], mass_differences, 0.5)

In [None]:
def fragments_multi(prot_seq, obs_mass, mass_diffs, tolerance):

    found = []
    start = 0
    s = int(obs_mass)//140
    e = len(prot_seq)
    for frag in prot_seq:
        for i in range(s, e):
            if math.isclose(round(mass.calculate_mass(prot_seq[start:i], average = True), 1), obs_mass, abs_tol = tolerance):
                if prot_seq[start:i] not in found:
                    found.append(prot_seq[start:i]) 
                    found.append(round(mass.calculate_mass(prot_seq[start:i], average = True), 1))
        s += 1
        e += 1
        start += 1
    print(found)

In [None]:
multi = []

for mass in observed_masses:
    multi.append((vclh_seq, mass, mass_differences, 0.5))

In [None]:
multi

In [None]:
%%time
if __name__ == '__main__':
    with multiprocessing.Pool(processes=2) as pool:
        results = pool.starmap(fragments_multi, multi)
    print(results)

In [None]:
def fragments_speedup(prot_seq, obs_mass, mass_diffs, tolerance):

    found = []
    start = 0
    s = mass_diffs
    e = len(prot_seq)
    for frag in prot_seq:
        for i in range(s, e):
            if math.isclose(round(mass.calculate_mass(prot_seq[start:i], average = True), 1), obs_mass, abs_tol = tolerance):
                if prot_seq[start:i] not in found:
                    found.append(prot_seq[start:i]) 
                    found.append(round(mass.calculate_mass(prot_seq[start:i], average = True), 1))
        s += 1
        e += 1
        start += 1
    print(found)
    
pool = multiprocessing.Pool(processes=2)
whole_prot_mass = mass_cal(vclh_seq)
observed_masses = import_obs_masses(file)
mass_differences = mass_diff(whole_prot_mass, observed_masses)

input_list = []

part_list = []
for mass in observed_masses[0:2]:
    part_list = (vclh_seq, mass, mass_differences, 0.5)
    input_list.append(part_list)
#print(input_list)

%%time
pool.starmap(fragments_speedup, input_list)
pool.close()