In [1]:
import numpy as np
import pandas as pd

#get all the files from your experiment
import glob

#deal with the .fcs file format
import fcsparser

#for writing info back to excel ID sheet
import openpyxl

## Use glob to get all the fcs files you want to deal with

### functions

In [2]:
def write_file_assoc_to_xlsx (ids_frame_w_fnames, ids_xlsx_path):

    #the check to see if the ids xlsx file already has filenames is done in the function that calls this one [assoc_fname_well]
    
    #open the workbook
    book = openpyxl.load_workbook(ids_xlsx_path)

    #get the worksheet you want to edit by name. Assumes standard sheet naming where single sheet in book is called 'Sheet1'
    sheet = book["Sheet1"]

    #initialize row counter (because lazy)
    r = 1
    #set the file column name in row 1
    sheet.cell(row=r, column=4).value = "file"

    #loop over file dataframe column entries and assign to the xlsx in successive rows (r) in same column
    for filename in ids_frame_w_fnames['file']:
        #go to the next row
        r += 1

        #assign the values of successive cells in the col
        sheet.cell(row=r, column=4).value = filename

    #save it to same place so its overwritten with new good file
    book.save(ids_xlsx_path)
    
    print ("filename associations are written to the ids xlsx file")
    
    return None

In [3]:
def assoc_fname_well (ids_frame, fcs_filename_list, ids_xlsx_path):

    #make copy so you aren't editing the original id frame in the function
    ids = ids_frame.copy()

    #if the ids dataframe loaded from xlsx doesn't already have a "file" column with associated filenames
    if 'file' not in ids.columns:

        #add the filename to the IDs dataframe so you can look for well and get filename for analysis

        #get the index and the fcs file num as a tuple.
        #I'm pretty sure if the index was noncontinuous, this would still associate correct index to fcs num entry
        for idx, num in zip(ids['fcs num'].index, ids['fcs num']):


            #get the list of filenames that contain the fcs num you're looking for
            #this list should ideally be len = 1
            fname = [f for f in fcs_filename_list if num in f]

            #this list that will be added to the 'file' column is sometimes just a string, sometimes a list with one element
            #I want just the value inside the list
            if len(fname) == 1:
                add = fname[0]
            elif len(fname) == 0:
                add = 'NO MATCH'
            elif len(fname) > 1:
                add = '> 1 MATCH'


            ids.loc[idx, 'file'] = add

        
        #check how the filename assignment went
        
        #if the value of these statements are TRUE, then there's a non-assignment or a multiple assignment of filenames
        if any(ids['file'] == 'NO MATCH'):
            print("there's a non-assignment of at least one well and filename!!!")

        elif any(ids['file'] == '> 1 MATCH'):
            print("there's an assignment of multiple filenames to at least one well!!!")

        else:
            print("assignment completed without issue, all wells have a single filaname assignment")

            
        #write the new IDs frame with file associations to the original ids xlsx file
        write_file_assoc_to_xlsx(ids, ids_xlsx_path)    
        
        #return the frame with the new filename associations
        return ids

    #if the ids dataframe already has the filename associations
    else:
        #do nothing
        print ("the id dataframe and its parent xlsx file already contain a column called \
               'file' that has the fcs filename associations, the id dataframe and its parent file have not been modified")
        #return it as is
        return ids

### work

In [40]:
dir_with_fcs_files_path = '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr'

#get all the .fcs files in the directory (doesn't walk down directories to aggregate all further along dir tree)
allfcs = glob.glob(dir_with_fcs_files_path + '/*.fcs')

In [41]:
allfcs[0:9]

['../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-bfpfinal-RDM2018-10-25.0001.fcs',
 '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-blankfinal-RDM2018-10-25.0001.fcs',
 '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-DI-RDM2018-10-25.0001.fcs',
 '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-RDM2018-10-25.0001.fcs',
 '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-RDM2018-10-25.0002.fcs',
 '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-RDM2018-10-25.0003.fcs',
 '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-RDM2018-10-25.0004.fcs',
 '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-RDM2018-10-25.0005.fcs',
 '../Local Data/20181009 top 4 A B cell vars A=B flow samples/23hr\\23hr-RDM2018-10-25.0006.fcs']

In [42]:
#ugly nested list comprehension to get all the non control files

#using list comprehensionn to exclude files that have ctrl information in them
expfcs = [c for c in 
 [b for b in 
  [a for a in 
   [s for s in allfcs if 'bfp' not in s]
  if 'yfp' not in a]
 if 'DI' not in b]
if 'blank' not in c]

#nicer way to use previous list to get ctrls out of allfcs

ctrlfcs = [d for d in allfcs if d not in expfcs]

finalctrlfcs = [f for f in ctrlfcs if 'final' in f]

In [43]:
#I created a csv file that correlates fcs file number to well and volume flowed
ids_xlsx_path = dir_with_fcs_files_path + '/23hr-fcs num to well ID.xlsx'

ids = pd.read_excel(ids_xlsx_path, dtype=str)

In [44]:
ids.head(11)

Unnamed: 0,fcs num,well,vol,file
0,1,A1,30,../Local Data/20181009 top 4 A B cell vars A=B...
1,2,B1,30,../Local Data/20181009 top 4 A B cell vars A=B...
2,3,C1,20,../Local Data/20181009 top 4 A B cell vars A=B...
3,4,D1,20,../Local Data/20181009 top 4 A B cell vars A=B...
4,5,E1,20,../Local Data/20181009 top 4 A B cell vars A=B...
5,6,F1,25,../Local Data/20181009 top 4 A B cell vars A=B...
6,7,G1,25,../Local Data/20181009 top 4 A B cell vars A=B...
7,8,H1,25,../Local Data/20181009 top 4 A B cell vars A=B...
8,9,BAD-h1,40,../Local Data/20181009 top 4 A B cell vars A=B...
9,10,A2,25,../Local Data/20181009 top 4 A B cell vars A=B...


In [45]:
ids = assoc_fname_well (ids, expfcs, ids_xlsx_path)

the id dataframe and its parent xlsx file already contain a column called                'file' that has the fcs filename associations, the id dataframe and its parent file have not been modified


### Now move to the notebook for gating and plotting to do your analysis