# Generate platemap files from original position files

In this notebook, position files are used to generate plate map files to use for saving max projection images and annotating single-cell outputs downstream. Platemap files are saved which are only one row per Well with relevant perturbation and cell line info.

Intermediate files called `cellprofiler_csvs` are created to use for outputting TIFFs from the nd2 file in the next module (1.max_projection).

## Import libraries

In [1]:
import pathlib
import pandas as pd

## Generate intermediate CellProfiler CSV files for metadata module

In [2]:
# Dir path for output of platemap CSV files
cp_csv_dir = pathlib.Path("./cellprofiler_csvs")
cp_csv_dir.mkdir(parents=True, exist_ok=True)

# Find all position txt files in the current directory starting with "slide" using glob
position_files = pathlib.Path().resolve().glob('slide*')

# Instantiate a empty list to append cellprofiler csvs to
cp_csv_dfs = []

# Iterate through each file to update "Point Name" and "Image" columns
for file in position_files:
    # Read the CSV file
    df = pd.read_csv(file, delimiter='\t', encoding='utf-16')
    
    # Remove '#' prefix from 'Point Name' column
    df['Point Name'] = df['Point Name'].str.lstrip('#')
    
    # Zero-index the 'Image' column
    df['Image'] = df['Image'] - 1
    
    # Save the processed DataFrame to the cellprofiler csvs directory
    output_file = pathlib.Path(f"{cp_csv_dir}/{file.stem}.csv")
    df.to_csv(output_file, index=False)
    
    # Append the processed DataFrame to the list
    cp_csv_dfs.append(df)

# Print the list of dataframes to verify that the process worked
for df in cp_csv_dfs:
    print(df.head())

  Point Name  X Pos[µm]  Y Pos[µm]  Image Well CellLine Condition
0          2    12065.8    -8723.4      0   A1     786O       NTC
1          4    10465.8    -8735.5      1   A1     786O       NTC
2          6     8866.0    -8747.5      2   A1     786O       NTC
3          9     7259.9    -7959.6      3   A1     786O       NTC
4         10     8060.0    -7953.5      4   A1     786O       NTC
  Point Name  X Pos[µm]  Y Pos[µm]  Image Well CellLine Condition
0          1    12865.9    -8717.4      0   A1     786O       NTC
1          4    10466.0    -8735.5      1   A1     786O       NTC
2          6     8865.9    -8747.5      2   A1     786O       NTC
3          7     8065.9    -8753.5      3   A1     786O       NTC
4         10     8060.1    -7953.5      4   A1     786O       NTC
  Point Name  X Pos[µm]  Y Pos[µm]  Image Well CellLine Condition
0         14    11260.1    -7929.7      0   A1     786O       NTC
1         15    12059.8    -7923.8      1   A1     786O       NTC
2         

## Generate platemap files

In [5]:
# Dir path for output of platemap CSV files
platemap_dir = pathlib.Path("./platemaps")
platemap_dir.mkdir(parents=True, exist_ok=True)

# Find all position txt files in the current directory starting with "slide" using glob
position_files = pathlib.Path().resolve().glob('slide*')

# Instantiate an empty list to append platemaps to
platemap_dfs = []

# Iterate through each file to update and reduce the rows to one per well
for file in position_files:
    # Read the CSV file
    df = pd.read_csv(file, delimiter='\t', encoding='utf-16')
    
    # Only keep relevant columns to perturbation and cell line
    df = df[['Well', 'CellLine', 'Condition']]
    
    # Reduce rows down to one per well
    df = df.drop_duplicates(subset='Well')
    
    # Save the processed DataFrame to the platemap directory
    output_file = pathlib.Path(f"{platemap_dir}/{file.stem.split('.')[0]}_platemap.csv")
    df.to_csv(output_file, index=False)
    
    # Append the processed DataFrame to the list
    platemap_dfs.append(df)

# Print the list of dataframes to verify that the process worked
for df in platemap_dfs:
    print(df)

    Well CellLine  Condition
0     A1     786O        NTC
22    B1     786O    ALY kd5
49    A2     786O    ALY kd8
93    B2     786O  SART1 kd4
153   A3     786O  SART1 kd6
188   B3     786O   FIBP kd6
211   A4     786O   FIBP kd7
261   B4     293T  untreated
    Well CellLine    Condition
0     A1     786O          NTC
38    B1     786O   DDX39A kd1
71    A2     786O   DDX39A kd4
122   B2     786O    SARNP kd3
168   A3     786O    SARNP kd9
231   B3     786O  TMEM259 kd5
290   A4     786O  TMEM259 kd6
343   B4     293T    untreated
    Well CellLine  Condition
0     A1     786O        NTC
24    B1     786O    ALY kd5
65    A2     786O    ALY kd8
128   B2     786O  SART1 kd4
186   A3     786O  SART1 kd6
225   B3     786O   FIBP kd6
265   A4     786O   FIBP kd7
318   B4     293T  untreated
    Well CellLine    Condition
0     A1     786O          NTC
33    B1     786O   DDX39A kd1
60    A2     786O   DDX39A kd4
100   B2     786O    SARNP kd3
124   A3     786O    SARNP kd9
167   B3     