Date: 
14/9/2022

Author: 
David Elliott (Hotchkiss Brain Institute - Advanced Microscopy Platform)

Goal:
Collate Mean signal intensity from multiple channels for each Cell.
Cell objects have been created using the Imaris Cell Module and all statistics have been exported (in csv format). The following script will prompt the User to provide the directory location of this statistics folder, then automatically open the Mean signal intensity file for each channel, extract values and combine into a Summary csv/spreadsheet. In step 8, the User has the option to rename each channel. 


In [8]:
# STEP 1:
# Import modules

import numpy as np
import pandas as pd

import os.path
from os import path

import csv





# STEP 2:
# Prompt User for location of the folder containing csv files for Cell objects, exported from Imaris using Export All. Then ask for the outputFolder location.
# Alternatively, we could simply save a 'summary' csv file to the original source folder? 
# Example:
# C:\Users\david\Desktop\Rajiv\WM edge peri\WM peri 1_Statistics
# C:\Users\david\Desktop\Rajiv\WM edge peri\Test_output

# Use the function 'checkDir' to handle the input, check that it is valid and return the correct directory.
# Store these directories into: sourceFolder and outputFolder


def checkDir(data):
    valid = False
    while (valid == False):
        
        location = input("Please enter the location of the " + data + " :")
        
        if path.exists(location) == False:  # Confirm that a valid directory was entered
            print('Sorry, was unable to locate that folder. Please try again.')
        else:
            return(location)
             
sourceFolder = checkDir("source folder")        
outputFolder = checkDir("output folder")

#print(sourceFolder)
#print(outputFolder)





# STEP 3:
# Define the exact Directory location and prefix for all of the files (ie., need to remove the 'Statistics' as it is not in each filename)

def userFolderInput(dataFolder):
    '''
    
    '''
    fileNamePath = dataFolder.strip('_Statistics') # Remove '_Statistics' from folder name as it isn't present
                                                   # in the filenames
    fileNamePath = fileNamePath.split('\\')        # Create list of directory sections, to isolate filename prefix 
    
    # Create path to folder and filename prefixes
    fileNamePrefix = dataFolder + "\\" + fileNamePath[-1]
    #print (fileNamePrefix)
    return (fileNamePrefix)

filePrefix = userFolderInput(sourceFolder)
# print(filePrefix)





# STEP 4:
# Check number of channels

def checkNumberOfChannels(testFolder):
    '''
    
    '''
    testFile = testFolder + "_Cell_Intensity_Mean_Ch=1_Img=1.csv"
    #print(testFile)
    
     # Calculate length of testFile, then add length of '_Cell_Intensity_Mean_Ch=' which is 24
    testNumber = len(testFolder) + 24
    #print(testNumber)
    
    # Use a while loop with a counter to determine the number of channels. The testFile and TestNumber values
    # calculated above will be used to parse the file path and sub in a number for channel. Try/except will be
    # used with a counter to determine how many channles there are.
    
    filePresent = True
    channelNumber = 1

    while filePresent == True:
        try:
            with open((testFile[:testNumber] + str(channelNumber) + testFile[-10:]), newline = "\n") as csvfile:   
                channelNumber += 1
        except:
            filePresent = False
            
    return (channelNumber-1)

numberOfChannels = checkNumberOfChannels(filePrefix)
print('Number of Channels: ' + str(numberOfChannels))





# STEP 5:
# Use a loop to create a DataFrame for each 'Cell_Intensity_Mean_Ch..' csv file and store it in a Dictionary (could also use a List).

dfDict = {}  # Create empty Dictionary to store Dfs.

for i in range(1, numberOfChannels + 1):
    dfDict[i] = pd.read_csv((filePrefix + '_Cell_Intensity_Mean_Ch=' + str(i) + '_Img=1.csv'), header = 2)
    
    

    
    
# STEP 6:
# Create a masterDf.
# This will be built from the df for ch1. All columns other than 'ID' and 'Ch1'/Cell Int Mean will be dropped, 'ID' will become the first column 
# and then, in the next step, Channel signal data from all the other channels will be imported into new columns. 

masterDf = dfDict[1]

# Change name of 'Cell Int mean..' to Ch1
masterDf = masterDf.rename(columns={'Cell Intensity Mean' : 'Ch1'})

# Drop all unused columns
masterDf = masterDf.drop(masterDf.columns[[1,2,3,4,5,7]], axis=1)

# Make 'ID' be in the first column position:
masterDf = masterDf[['ID', 'Ch1']]





# STEP 7:
# Add the 'Cell Intensity Mean' for all of the other channels to masterDf:

for i in range(2, numberOfChannels + 1):
    
    masterDf['Ch' + str(i)] = dfDict[i]['Cell Intensity Mean']
    
# masterDf

Please enter the location of the source folder : C:\Users\david\Desktop\Rajiv\WM edge peri\WM peri 2_Statistics
Please enter the location of the output folder : C:\Users\david\Desktop\Rajiv\Histoflow paper\Scripts used\Test_output


Number of Channels: 12


In [9]:
# STEP 8 (OPTIONAL):
# Rename channel numbers (Ch1, Ch2, etc.,) to names of markers.
# Example code:
# df = df.rename(columns={'oldName1': 'newName1', 'oldName2': 'newName2'})

# Adjust number of channels and column names as appropriate.
# The code below is adjusted for the current project (Jain et al.,) where there are consistently either 10 or 12 channels in each image dataset.

if numberOfChannels == 12:
    masterDf = masterDf.rename(columns={
    'Ch1' : 'Iba1',
    'Ch2' : 'Nuclear Yellow',
    'Ch3' : 'CD4',
    'Ch4' : 'B220',
    'Ch5' : 'Ly6G',
    'Ch6' : 'CD3',
    'Ch7' : 'Ly6C',
    'Ch8' : 'CD45',
    'Ch9' : 'Not Used',
    'Ch10' : 'Not Used',
    'Ch11' : 'Outside CNS',
    'Ch12' : 'Not Used'
})
if numberOfChannels == 10:
    masterDf = masterDf.rename(columns={
    'Ch1' : 'Laminin',
    'Ch2' : 'Nuclear Yellow',
    'Ch3' : 'CD4',
    'Ch4' : 'B220',
    'Ch5' : 'CD68',
    'Ch6' : 'CD45',
    'Ch7' : 'Cells',
    'Ch8' : 'Nuclei',
    'Ch9' : 'CNS Barriers',
    'Ch10' : 'Outside CNS',
})


In [10]:
masterDf

Unnamed: 0,ID,Iba1,Nuclear Yellow,CD4,B220,Ly6G,CD3,Ly6C,CD45,Not Used,Not Used.1,Outside CNS,Not Used.2
0,0,2.98442,13.61690,51.4844,11.60260,7.77143,85.00520,0.585714,235.423,0.0,162.536,0.0,0.000000
1,1,1.11885,16.76950,57.6447,4.00120,2.38295,17.61700,2.811520,244.605,0.0,157.347,0.0,0.000000
2,2,1.18415,17.58850,42.0676,6.85164,1.50901,1.48284,6.486050,217.041,0.0,226.793,0.0,0.000000
3,3,2.90244,30.55950,49.3704,39.47410,2.10518,5.26982,44.862800,181.739,0.0,255.000,0.0,2.714940
4,4,1.48429,8.96662,51.1852,5.23822,2.54712,16.63420,0.316099,246.919,0.0,0.000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2601,2601,1.27085,7.50772,40.6195,3.09131,1.50412,7.58634,0.531239,236.353,0.0,0.000,0.0,0.000000
2602,2602,1.48299,8.34694,39.1644,2.43764,1.72562,6.80385,2.531750,231.603,0.0,0.000,0.0,2.887760
2603,2603,3.25404,7.21333,40.5498,17.19550,54.28070,193.06200,1.036690,183.100,0.0,255.000,0.0,0.000000
2604,2604,3.49593,6.32745,33.3230,23.14800,26.79730,146.43700,0.087468,165.963,0.0,255.000,0.0,0.000000


In [11]:
# STEP 9:
# Save to csv file in outputFolder. 

cellFileList = filePrefix.split('\\') # Create a list of sections within filePrefix to isolate the filename
#print(cellFileList)


# Modify filename and save to outputFolder
# By default, '_summary' will be added to the end of the CellFile, but this can be easily modified below.
masterDf.to_csv(outputFolder + '\\' + cellFileList[-1] + '_summary.csv', index = False)
              