In [3]:
import fcsparser
import pandas as pd
import glob
import openpyxl
import os

In [4]:
def parse_fcs_file(fcs_file_path):
    try:
        meta_data, raw_data = fcsparser.parse(fcs_file_path, reformat_meta=True)
        return raw_data, meta_data

    except Exception as e:
        print("Error while parsing the FCS file:", e)
        return None, None

## Manually Parse Single File in FACS Data

In [5]:
# Replace "path/to/your/fcs_file.fcs" with the actual path to your FCS file
file_path1 = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files/14Jun23_INX_NKC 084_LCE662.fcs"
file_path2 = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files/14Jun23_INX_NKC 084_LCE663.fcs"
file_path3 = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files/14Jun23_INX_NKC 084_LCE664.fcs"
filepath4 = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files/14Jun23_INX_Ref Ctrl_LCE662.fcs"
filepath5 = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files/14Jun23_INX_Ref Ctrl_LCE663.fcs"
filepath6 = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files/14Jun23_INX_Ref Ctrl_LCE664.fcs"

raw_data, meta_data = parse_fcs_file(file_path1)

# Checking the parsed data
file_no = 1 
output_excel_file = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/parsed_data.xlsx"

if raw_data is not None:
    
    print("Raw data:")
    print(raw_data.head())
    print("\nMeta data:")
    print(meta_data)


Raw data:
           FSC-A    FSC-H          SSC-A    SSC-H    CD16 FITC       CD56 PE  \
0  115089.296875  69562.0   75108.632812  44791.0   129.710007  37108.621094   
1  127671.296875  83696.0   64815.761719  43674.0   112.270004  17709.240234   
2  102016.796875  74176.0   50605.429688  35155.0   105.730003  25967.160156   
3  123290.093750  71681.0  100057.640625  55735.0   553.720032     66.299995   
4  110112.296875  68001.0   86295.304688  55742.0  1148.859985    149.940002   

         DAPI         Time  
0   61.410000  1698.400024  
1   52.509998  1865.300049  
2   41.829998  2052.899902  
3  203.809998  2330.300049  
4   97.900002  2758.300049  

Meta data:
{'__header__': {'FCS format': b'FCS3.0', 'text start': 256, 'text end': 4597, 'data start': 4603, 'data end': 15994, 'analysis start': 0, 'analysis end': 0}, '$BEGINANALYSIS': '0', '$ENDANALYSIS': '0', '$BEGINSTEXT': '0', '$ENDSTEXT': '0', '$BEGINDATA': '4603', '$ENDDATA': '0000000000000015994', '$FIL': '14Jun23_INX_NKC 0

## Automatically Loop and Parse Files in FACS Data 

### <u> Index Files </u>

#### Note
Note that metadata cannot be merged as it is not data number
* Think of a solution to transform and merge metadata fields (Preferred)
* If not necessary, we can just ignore it

In [6]:
# Replace your directory path for index files
directory_path = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files"

# List all files in the directory
files_in_directory = os.listdir(directory_path)

# Filter files with the ".fcs" extension
fcs_files = [file for file in files_in_directory if file.endswith(".fcs")]

# Initialise dataframe
raw_df = pd.DataFrame()
#metadata_df = pd.DataFrame()

# Loop through each FCS file and parse the data
for fcs_file in fcs_files:

    print(fcs_file)
    fcs_file_path = os.path.join(directory_path, fcs_file)
    raw_data, meta_data = parse_fcs_file(fcs_file_path)
    
    # Checking the parsed data
    if raw_data is not None:
        print("File:", fcs_file_path)
        print("Raw data:")
        print(raw_data.head())
        print("\nMeta data:")
        print(meta_data)
        print("\n")

        # Append the data of each file to the combined dataframes
        raw_df = pd.concat([raw_df, raw_data])
        #metadata_df = pd.concat([metadata_df, meta_data])

14Jun23_INX_Ref Ctrl_LCE662.fcs
File: /Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files/14Jun23_INX_Ref Ctrl_LCE662.fcs
Raw data:
           FSC-A    FSC-H         SSC-A    SSC-H   CD16 FITC       CD56 PE  \
0  103059.000000  75474.0  50749.312500  33419.0  197.290009  59648.578125   
1   76914.000000  64132.0  20557.400391  15926.0   65.400002  17639.878906   
2   72203.398438  58039.0  22769.009766  16914.0   59.950001  11540.279297   
3   64366.199219  54005.0  28015.181641  24125.0   59.950001   7833.599609   
4   79505.101562  63489.0  35856.640625  26323.0   37.060001  15918.120117   

         DAPI         Time  
0  116.589996  1673.800049  
1   45.389999  1864.000000  
2   30.260000  2106.300049  
3   -3.560000  2295.199951  
4  -24.029999  2463.899902  

Meta data:
{'__header__': {'FCS format': b'FCS3.0', 'text start': 256, 'text end': 2600, 'data start': 2606, 'data end': 3245, 'analysis start': 0, 'analysis end': 0}, '$BEGINANALYSIS': '0

In [8]:
# Save the combined data into one Excel file
combined_excel_file = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data Merging/merged_index_files.xlsx"  # replace with your directory path
with pd.ExcelWriter(combined_excel_file) as writer:
    raw_df.to_excel(writer, sheet_name="Raw_Data", index=False)
    #combined_meta_data.to_excel(writer, sheet_name="Meta_Data", index=False)

print("Data has been combined and saved to Excel:", combined_excel_file)

Data has been combined and saved to Excel: /Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data Merging/merged_index_files.xlsx


### <u> Index Files 2022 </u>

#### Note
Note that metadata cannot be merged as it is not data number
* Think of a solution to transform and merge metadata fields (Preferred)
* If not necessary, we can just ignore it

In [9]:
# Replace your directory path for index files 2022
directory_path = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files 2022"

# List all files in the directory
files_in_directory = os.listdir(directory_path)

# Filter files with the ".fcs" extension
fcs_files = [file for file in files_in_directory if file.endswith(".fcs")]

# Initialise dataframe
raw_2022_df = pd.DataFrame()
#metadata_2022_df = pd.DataFrame()

# Loop through each FCS file and parse the data
for fcs_file in fcs_files:

    print(fcs_file)
    fcs_file_path = os.path.join(directory_path, fcs_file)
    raw_data, meta_data = parse_fcs_file(fcs_file_path)
    
    # Checking the parsed data
    if raw_data is not None:
        print("File:", fcs_file_path)
        print("Raw data:")
        print(raw_data.head())
        print("\nMeta data:")
        print(meta_data)
        print("\n")

        # Append the data of each file to the combined dataframes
        raw_2002_df = pd.concat([raw_2022_df, raw_data])
        #metadata_2022_df = pd.concat([metadata_2022_df, meta_data])

18Nov22_INX_Sample 2 Enriched_001.fcs
File: /Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data/FACS Data/Index files 2022/18Nov22_INX_Sample 2 Enriched_001.fcs
Raw data:
          FSC-A    FSC-H    SSC-A   BV421 MR1    BV650 CD8    BV711 Vd2  \
0  70336.632812  60293.0  15443.0  467.099976   996.299988   817.199951   
1  68224.523438  57219.0  31645.0  631.799988  1074.599976   490.500000   
2  82145.703125  67553.0  24865.0  535.500000  1386.899902  3900.599854   
3  84343.351562  67864.0  27565.0  738.899963  1430.099976   677.700012   
4  88578.492188  69112.0  43958.0  162.899994  1566.899902  2534.399902   

   FITC GD TcR  PerCP CD45ra   PE Tetramer  PE Cy5 Tetramer  PE Cy7 Tetramer  \
0         83.0        2320.0  21053.851562      1665.489990       445.230011   
1         52.0         449.0   6725.979980       490.820007       262.869995   
2         98.0       11688.0  21243.970703      3265.989990      1452.090088   
3         85.0         803.0  16073.870117  

In [11]:
# Save the combined data into one Excel file
combined_excel_file = "/Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data Merging/merged_index_files_2022.xlsx"  # replace with your directory path
with pd.ExcelWriter(combined_excel_file) as writer:
    raw_df.to_excel(writer, sheet_name="Raw_Data", index=False)
    #combined_meta_data.to_excel(writer, sheet_name="Meta_Data", index=False)

print("Data has been combined and saved to Excel:", combined_excel_file)

Data has been combined and saved to Excel: /Users/andylenguyen/Documents/Genomics-Metadata-Multiplexing/Data Merging/merged_index_files_2022.xlsx
