In [20]:
#Watershed on combined image region

import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt

img = cv.imread('/home/rcardiff/combined_img-4.png')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
gray_inv = np.invert(gray)

kernel1 = np.array([[-1,-1, -1], [0, 0, 0], [1, 1, 1]], np.uint8)
opening = cv.morphologyEx(gray_inv,cv.MORPH_OPEN,kernel1, iterations = 3)
#opening = dilation. turns pixel to 1 if a pixel under the kernel is 1, increasing size of object and 
#helping connect broken objects

kernel2 = np.ones((3,3),np.uint8)
grad = cv.morphologyEx(opening,cv.MORPH_GRADIENT, kernel2, iterations =1)


# Marker labelling
ret, markers = cv.connectedComponents(grad)
# Add one to all labels so that sure background is not 0, but 1
markers = markers+1
markers = cv.watershed(img,markers)

from skimage.measure import regionprops
print(len(regionprops(markers)))

196678


In [62]:
from pyteomics import mzxml, auxiliary
import matplotlib.patches as mpatches
import os
from statistics import median
from tqdm import tqdm_notebook as tqdm
import numpy as np

feature_data = [x.bbox for x in regionprops(markers)]
feature_list = [{} for x in regionprops(markers)]

min_mzs = np.array([x[0] for x in feature_data])
min_rts = np.array([x[1] for x in feature_data])
max_mzs = np.array([x[2] for x in feature_data])
max_rts = np.array([x[3] for x in feature_data])

min_mzs = np.round(150.0 + (min_mzs * .001), decimals = 4)
min_rts = np.round(min_rts * .02, decimals = 4)
max_mzs = np.round(150.0 + (max_mzs * .001), decimals = 4)
max_rts = np.round(max_rts * .02, decimals = 4)


num_features = len(min_mzs)

directory = "/home/rcardiff/ryan/ryan/meyer_raw_data/C18_neg/CL/"
in_files = [os.path.join(directory, x) for x in os.listdir(directory) if x.endswith('.mzXML')]
feature_list = [{} for r in regionprops(markers)]

#2D lists with (number files) x (number features) dimensions
running_feature_list_mz = [[[] for f in in_files] for r in regionprops(markers)]
running_feature_list_rt = [[[] for f in in_files] for r in regionprops(markers)]
running_feature_list_i = [[[] for f in in_files] for r in regionprops(markers)]



In [63]:
for fname in in_files:

    file_index = in_files.index(fname)
    f_data = mzxml.read(fname)

    all_data =[(x['m/z array'], x['intensity array'], [float(x['retentionTime']) for y in x['m/z array']]) for x in f_data]


    mzs = np.concatenate([x[0] for x in all_data])
    intensities = np.concatenate([x[1] for x in all_data])
    rts = np.concatenate([x[2] for x in all_data])
    
    c = 0
    for i_mz, mz in enumerate(tqdm(mzs)):
        c+=1
        rt = rts[i_mz]
        intensity = intensities[i_mz]

        minSub = mz - min_mzs
        minSub = (minSub > 0) * minSub
        maxSub = max_mzs - mz
        maxSub = (maxSub > 0) * maxSub
        mzCheck = np.multiply(minSub, maxSub)

        if any(mzCheck):
            minSub = rt - min_rts
            minSub = (minSub > 0) * minSub
            maxSub = max_rts - rt
            maxSub = (maxSub > 0) * maxSub
            rtCheck = np.multiply(minSub, maxSub)
            allCheck = np.multiply(mzCheck, rtCheck)

            if any(allCheck):
                for index in np.nonzero(allCheck)[0]:
                    running_feature_list_mz[index][file_index].append(mz)
                    running_feature_list_rt[index][file_index].append(rt)
                    running_feature_list_i[index][file_index].append(intensity)

            if c >= 10000:
                break

HBox(children=(IntProgress(value=0, max=1687980), HTML(value='')))

KeyboardInterrupt: 

In [61]:
#second version
for fname in in_files:

    file_index = in_files.index(fname)
    f_data = mzxml.read(fname)

    all_data =[(x['m/z array'], x['intensity array'], [float(x['retentionTime']) for y in x['m/z array']]) for x in f_data]


    mzs = np.concatenate([x[0] for x in all_data])
    intensities = np.concatenate([x[1] for x in all_data])
    rts = np.concatenate([x[2] for x in all_data])
    
    c = 0
    for i, min_mz in enumerate(tqdm(min_mzs)):
        c+=1
        min_rt = min_rts[i]
        
        minSub = mzs - min_mz
        minSub = (minSub > 0) * minSub
        maxSub = max_mzs[i] - mzs
        maxSub = (maxSub > 0) * maxSub
        #mzCheck = minSub * maxSub
        mzCheck = np.multiply(minSub, maxSub)
        
        #minSub = np.subtract(mzs, min_mz)
        #minSub = np.multiply(minSub>0,minSub)
        #maxSub = np.subtract(max_mzs[i],mzs)
        #maxSub = np.multiply(maxSub>0,maxSub)
        #mzCheck = np.multiply(minSub, maxSub)
        
        if any(mzCheck):
            minSub = rts - min_rt
            minSub = (minSub > 0) * minSub
            maxSub = max_rts[i] - rts
            maxSub = (maxSub > 0) * maxSub
            rtCheck = np.multiply(minSub, maxSub)
            allCheck = np.multiply(mzCheck, rtCheck)

            if any(allCheck):
                for index in np.nonzero(allCheck)[0]:

                    running_feature_list_mz[i][file_index].append(mzs[index])
                    running_feature_list_rt[i][file_index].append(rts[index])
                    running_feature_list_i[i][file_index].append(intensities[index])

            if c >= 10000:
                break

HBox(children=(IntProgress(value=0, max=196678), HTML(value='')))

KeyboardInterrupt: 

In [5]:
from statistics import mean,median

for i,ft in tqdm(enumerate(running_feature_list_mz)): #for every feature
    min_mz, min_rt, max_mz, max_rt = min_mzs[i],min_rts[i], max_mzs[i], max_rts[i]
    if max_rt - min_rt >= 1499 or max_mz - min_mz >= 99990:
        continue
    sample_list = []
    
    for j,file in enumerate(ft): #for each file contributing to feature
        sample_val = []
        if file: #if that file contributed to the feature, get values
            sample_min_mz, sample_max_mz, sample_med_mz = min(file), max(file), median(file)
            rt_list = running_feature_list_rt[i][j]
            i_list = running_feature_list_i[i][j]
            sample_min_rt, sample_max_rt, sample_med_rt = min(rt_list), max(rt_list), median(rt_list)
            sample_tot_i, sample_mean_i = sum(i_list), mean(i_list)
        
            sample_val = [sample_min_mz, sample_max_mz, sample_med_mz, sample_min_rt, 
                        sample_max_rt, sample_med_rt, sample_tot_i, sample_mean_i] #compute values for sample
    
        sample_list.append(sample_val)
        
    feature_list[i] = {"Feature ID": "FT"+str(i), "FT med mz":median([min_mz, max_mz]), "FT min mz": min_mz, "FT max mz": max_mz,
                                       "FT med rt": median([min_rt,max_rt]), "FT min rt": min_rt, "FT max rt": max_rt, 
                                       "CL1":sample_list[0],
                                      "CL2": sample_list[1], "CL3":sample_list[2],"CL5":sample_list[3],
                                      "CL6":sample_list[4],"CL7": sample_list[5],"CL8":sample_list[6],
                                      "CL9":sample_list[7],"CL10":sample_list[8],"CL11":sample_list[9],
                                      "CL14":sample_list[10],"CL16":sample_list[11]}
    

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

In [9]:
import csv
directory = "/home/rcardiff/"
os.chdir(directory)

summary = {"Feature ID", "FT med mz", "FT min mz", "FT max mz", "FT med rt", "FT min rt", "FT max rt"}
#subList = { key:value for key,value in feature_list[2].items() if key in summary}
#print(subList)

with open('summary_test.csv', 'w') as f:
    dict_writer = csv.DictWriter(f, summary)
    dict_writer.writeheader()
    subList = ( { key:value for key,value in x.items() if key in summary} for x in feature_list[2:] )
    dict_writer.writerows(subList)

In [10]:
import pickle

#Write to pickle
filename = 'pickle_test'
outfile = open(filename, 'wb')
pickle.dump(feature_list,outfile)
outfile.close()




In [11]:
#Open pickle
infile = open('pickle_test','rb')
new_dict = pickle.load(infile)
infile.close()

keys = new_dict[2].keys()

with open('feature_full_test.csv', 'w') as f:
    dict_writer = csv.DictWriter(f, keys)
    dict_writer.writeheader()
    dict_writer.writerows(new_dict[2:])