### LDH Script
This script calculates cell nuclei locations based on tissue boundaries and determines the number of live and dead cells based on those boundaries. The script also bins the cells based on distance from the boundary.
The input data comes from an image processing algorythm that distinguishes between live and dead cells and calculates the x/y coordinates on a hystologucal slice.

In [None]:
import os
import re
import time
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import scipy as sio
import multiprocessing as mp

from scipy import interpolate
from scipy import signal
from joblib import Parallel, delayed

print("import of packages successful")

In [None]:
# Define parameters
split = 50
runAverage = 5
threshhold = 180
figPath = '/mnt/nfs/python_docs/projects/Microscopy LDH ETH/Figures/'
outpuPath = '/mnt/nfs/python_docs/projects/Microscopy LDH ETH/Output/'
Samples = {
    14:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Intact','Direction':'Trans'}, # done
    15:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Intact','Direction':'Trans'}, # done
    17:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Intact','Direction':'Trans'}, # done
    92:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Intact','Direction':'Horz'}, # done
    94:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Intact','Direction':'Horz'}, # done
    97:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Intact','Direction':'Horz'}, # done
    32:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Damaged','Direction':'Trans'}, # done
    35:{'Bound':['Top','Bottom','Injury'],'Data':['Top','Bottom','Injury'],'Type':'Damaged','Direction':'Trans'}, # done
    38:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Damaged','Direction':'Trans'}, # done
    72:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Damaged','Direction':'Horz'}, # done
    74:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Damaged','Direction':'Horz'}, # done
    76:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Damaged','Direction':'Horz'}, # done
    57:{'Bound':['Top','Bottom','Injury'],'Data':['Top','Bottom','Injury'],'Type':'Repaired','Direction':'Trans'}, # done
    82:{'Bound':['Injury','Other'],'Data':['Injury','Other'],'Type':'Repaired','Direction':'Horz'}, # needs more work
    83:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Repaired','Direction':'Horz'}, # done
    86:{'Bound':['Top','Bottom'],'Data':['Top','Bottom'],'Type':'Repaired','Direction':'Horz'} # done (check again, too much positive on the edge)
        }
print('parameters defined')

In [None]:
# Run folder
print("Number of processors: ", mp.cpu_count())
start = time.time()
for folder in Samples:
    # set current working directory
    os.chdir('/mnt/nfs/python_docs/projects/Microscopy LDH ETH/'+str(folder))
    os.getcwd()

    # get sample name based on folder
    for Samp in Samples[folder]['Data']:
        SampleStart = time.time()
        Sample = str(folder) + '_' + Samp
        print(Sample)
        # Run single sample
        nuclei, bounds = readCSV(Sample) # get sample data from file
        bounds, edge = CalcBounds(bounds) # calculate smaple bounds
        output = CalcDistMulti(nuclei,edge)
        plotData = CalcPlotMulti(output,split,threshhold,runAverage)
        # save output data
        plotData.to_csv(outpuPath + Sample + "_Output.csv",index=False, header=True)
        # plot graphs
        plotGraph(plotData,Sample,figPath)
        plotMap(edge,output,Sample,figPath)
        SampleEnd = time.time()
        print('done: ' + str(SampleEnd - SampleStart) + ' sec')
end = time.time()
print('total run time: ' + str(end - start) + ' sec')

In [None]:
Num = '86'
# set current working directory
os.chdir('/mnt/nfs/python_docs/projects/Microscopy LDH ETH/' + Num)
# print current working directory
os.getcwd()

# Run single sample
start = time.time()
Sample = Num + '_Bottom'

nuclei, bounds = readCSV(Sample)
bounds, edge = CalcBounds(bounds)
output = CalcDistMulti(nuclei,edge)
plotData = CalcPlotMulti(output,split,threshhold,runAverage)

plotData.to_csv(outpuPath + Sample + "_Output.csv",index=False, header=True)

end = time.time()
print('done: ' + str(end - start) + ' sec')

In [None]:
def readCSV(Sample):
    nuclei = pd.read_csv(Sample+'.txt',sep='\t')
    bounds = pd.read_csv(Sample+'_edge.csv')
    print('import done: nuclei: ' + str(len(nuclei)) + ', bounds: ' + str(len(bounds)))
    return nuclei, bounds

def CalcBounds(bounds):
    # Calculate end values
    bounds['CX'] = bounds.BX + bounds.Width
    bounds['CY'] = bounds.BY + bounds.Height
    bounds['pointN'] = round((bounds.Length / 5) - 1)
    # Adjust the angle values
    for ind,val in zip(bounds.iloc[:,0],bounds.iloc[:,7]):
        if (ind % 2) == 0:
            bounds.iloc[(ind-1),7] = bounds.iloc[(ind-1),7] - 180
        bounds.iloc[(ind-1),7] = -bounds.iloc[(ind-1),7]
    print('bounds values calculated')
    # Make points out of lines and write to a data structure
    edge = pd.DataFrame(columns = ['x','y','Segment'])
    m = 1
    n = 1
    for x0,y0,angle,length in zip(bounds.iloc[:,1],bounds.iloc[:,2],bounds.iloc[:,7],bounds.iloc[:,11]):
        #print('Segment ' + str(m))
        #print('x0=' + str(x0) + '; y0=' + str(-y0))
        t = 1
        for val in range(int(length/2)):
            edge.loc[n] = [x0+(5*np.cos(np.deg2rad(angle))*(t)),y0+(5*np.sin(np.deg2rad(angle))*(t)),m]
            edge.loc[n+1] = [x0-(5*np.cos(np.deg2rad(angle))*(t)),y0-(5*np.sin(np.deg2rad(angle))*(t)),m]

            xx1 = (5*np.cos(np.deg2rad(angle))*(t))
            yy1 = (5*np.sin(np.deg2rad(angle))*(t))
            #print(str(n)+' Segment '+str(m)+': x0='+str(x0)+', y0='+str(y0)+'; x='+str(xx1)+', y='+str(yy1)+'; angle='+str(angle)+';')

            t += 1
            n += 2

        #print('x1=' + str(xx1) + '; y1=' + str(-yy1))
        m += 1
    print('points interpolated; edge length: ' + str(len(edge)))
    return bounds, edge

def CalcDist(nuclei,edge):
    # Calculate distances
    output = pd.DataFrame(columns = ['X','Y','Distance','Value'])
    n = 0
    for xn,yn,nucInt in zip(nuclei.iloc[:,4],nuclei.iloc[:,5],nuclei.loc[:,'Nucleus: Channel 2 mean']):
        output.loc[n],n = CalcDistF(xn,yn,nucInt),n
    print('distances calculated')
    return output

def CalcDistMulti(nuclei,edge):
    # Calculate distances
    pool = mp.Pool(mp.cpu_count())
    table = pool.starmap(CalcDistF, [(xn,yn,nucInt) for xn,yn,nucInt in zip(nuclei.iloc[:,4],nuclei.iloc[:,5],nuclei.loc[:,'Nucleus: Channel 2 mean'])])
    pool.close()
    output = pd.DataFrame(table,columns = ['X','Y','Distance','Value'])
    print('distances calculated; points n: ' + str(len(output)))
    return output

def CalcDistF(xn,yn,nucInt):
    out = [xn,yn,min(((edge.x - xn)**2 + (edge.y - yn)**2)**0.5),nucInt]
    return out

def CalcPlot(output,split,threshhold,runAverage):
    # Format data for plotting
    plotData = pd.DataFrame(columns = ['Distance','Dead','Alive','Survivability'])

    for n in range(round(max(output.Distance)/split)):
        dead = output.Distance[(output.Value>threshhold)&(output.Distance<((n+1)*split))&(output.Distance>(n*split))].count()
        alive = output.Distance[(output.Value<threshhold)&(output.Distance<((n+1)*split))&(output.Distance>(n*split))].count()
        distance = (n+1)*split
        survived = alive/(dead+alive)*100
        plotData.loc[n] = [distance,dead,alive,survived]
        # Calculate running average
    #    if n < runAverage:
    #        avSurvived = np.average(plotData.loc[0:n,'Survivability'])
    #    else:
    #        avSurvived = np.average(plotData.loc[(n-runAverage):n,'Survivability'])
    #    plotData.loc[n] = [distance,dead,alive,survived,avSurvived]

    # Apply smoothing filter
    # plotData.loc[:,'SurvivabilityAvg'] = sio.signal.savgol_filter(plotData.loc[:,'Survivability'], 11, 3)
    print('plots calculated')
    return plotData

def CalcPlotMulti(output,split,threshhold,runAverage):
    # Format data for plotting
    pool = mp.Pool(mp.cpu_count())        
    table = pool.starmap(CalcPlotF, [(output,split,threshhold,runAverage,n) for n in range(round(max(output.Distance)/split))])
    pool.close()
    
    plotData = pd.DataFrame(table,columns = ['Distance','Dead','Alive','Survivability'])
    print('plots calculated; bin n: ' + str(len(plotData)))
    return plotData

def CalcPlotF(output,split,threshhold,runAverage,n):
    dead = output.Distance[(output.Value>threshhold)&(output.Distance<((n+1)*split))&(output.Distance>(n*split))].count()
    alive = output.Distance[(output.Value<threshhold)&(output.Distance<((n+1)*split))&(output.Distance>(n*split))].count()
    distance = (n+1)*split
    survived = alive/(dead+alive)*100
    out = [distance,dead,alive,survived]
    return out

def plotGraph(plotData,Sample,figPath):
    fig = plt.figure()
    plt.scatter(plotData.Distance,plotData.Survivability)
    plt.title(Sample)
    plt.savefig(figPath + Sample + '_graph.png')
    plt.close(fig)
    print('graph plotted')

def plotMap(edge,output,Sample,figPath):
    # Plot point map
    deadpoints = (output.X[output.Value>threshhold],output.Y[output.Value>threshhold])
    livepoints = (output.X[output.Value<=threshhold],output.Y[output.Value<=threshhold])
    borderpoints = (edge.x,edge.y)
    data = (livepoints,deadpoints,borderpoints)
    colors = ( "green","red", "blue")
    groups = ("Alive", "Dead", "Border")

    # Create plot
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    for data, color, group in zip(data, colors, groups):
        x, y = data
        ax.scatter(x, -y, alpha=0.8, c=color, edgecolors='none', s=30, label=group)

    plt.title(Sample)
    plt.legend(loc=4)
    plt.savefig(figPath + Sample + '_map.png')
    plt.close(fig)
    print('map plotted')
    
print('functions defined')