This notebook will take a tab delimited text file that contains peak intensities in a DESI experiment exported from the Waters HDI software. An image will be constructed for each feature in the datafile. The generated images (as tab delimited text files) are added to a zip file for download. 

Enter the filename of the uploaded file in the cell below. You must also enter the desired resolution in the nrows (number of rows) and ncols (number of columns) fields in the cell below. Only downsampling is permited. If you select a resolution higher than the datafile actually is, no resizing will occur. After entering this information run this cell by clicking into the cell and pressing shift+enter or clicking the play button in the top left hand corner of the cell. 


In [1]:
filename = "20200228_mouse_liver_13C_nontumor6 Analyte 3SRD15.txt"
nrows = 60
ncols = 60

After entering the information in the cell above, run the cell below. After it has completed you can download the zip archive from the panel on the left. This may take a few minutes if the input data has lots of features.

In [None]:
#load necessary packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from zipfile import ZipFile
import os

#simple image scaling to (nR x nC) size
def scale(im, nR, nC):
    nR0 = len(im)     # source number of rows 
    nC0 = len(im[0])  # source number of columns 
    blockSizeR = int(np.ceil(nR0/nR)) 
    blockSizeC = int(np.ceil(nC0/nC))
    
    if nR0 >= nR and nC0 >= nC: #ensure resolution is possible

        outarray = np.zeros((nR,nC)) #generate output array

        #use a max-pooling like summation
        for r in range(nR):
            for c in range(nC):
                if blockSizeR*(r+1) > nR0 - 1: #check for end cases
                    stopR = nR0
                else:
                    stopR = blockSizeR*(r+1)
                if blockSizeC*(c+1) > nC0 - 1:
                    stopC = nC0
                else:
                    stopC = blockSizeC*(c+1) 
                #sum signal in window
                outarray[r,c] = np.sum(im[blockSizeR*r:stopR,blockSizeC*c:stopC])
        return outarray
    else:
        print("image can only be downsampled")
        return im

#load datafile
data = [r.strip().split() for r in open(filename,"r").readlines()[3:]]
data = {(x[0],float(x[1]),float(x[2])):{mz:float(i) for mz,i in zip(data[0],x[3:])} for x in data[1:] if len(x) > 0}
data = pd.DataFrame.from_dict(data,orient="index")

#get mz indices
cols = data.columns.values
mzs = cols

# create a ZipFile object
zipObj = ZipFile(filename.replace(".txt",'_featureImages.zip'), 'w')

#iterate over each feature
for mz in mzs:

    #generate hash of x,y coordinates and intensity
    picDict = {}
    for index,row in data.iterrows():
        picDict[(index[2],index[1])] = row[mz]

    #extract unique x,y coordinates
    xcords = [float(y) for y in list(set([x[0] for x in picDict]))]
    ycords = [float(y) for y in list(set([x[1] for x in picDict]))]

    #make output array
    outarray = np.zeros((len(xcords),len(ycords)))
    
    #sort coordinates and map to pixel position
    xcords.sort()
    ycords.sort()
    xcordMap = {x:i for x,i in zip(xcords,range(len(xcords)))}
    ycordMap = {x:i for x,i in zip(ycords,range(len(ycords)))}

    #convert hash to ordered image
    for [x,y],intens in picDict.items():
        outarray[xcordMap[float(x)],ycordMap[float(y)]] = intens
        
    #rescale for desired resolution    
    outarray = scale(outarray,nrows,ncols)
    
    #optional plotting of the images (too slow for lots of features)
    #plt.figure()
    #plt.imshow(outarray)
    #plt.title(mz)
    
    #write output file
    outfile = open(mz+".txt","w")
    for r in outarray:
        for c in r:
            outfile.write(str(c)+"\t")
        outfile.write("\n")
    outfile.close()

    #add to zip 
    zipObj.write(mz+".txt")

    #delete text file
    os.remove(mz+".txt")

#close zip
zipObj.close()

    
        