# Code for Sam labeling tool, takes in combined csv file (output from csv combiner tool)

In [57]:
import pandas as pd
import numpy as np
from IPython.display import FileLink
import os

test = pd.read_csv(r'maltez_lab_tools\combined_csvs\final_output.csv')
test = test[['POSITION X', 'POSITION Y', 'POSITION Z', 'CD45', "MDSC", 'CD68']]
test

Unnamed: 0,POSITION X,POSITION Y,POSITION Z,CD45,MDSC,CD68
0,51257.254,42903.758,26,0,0,0
1,51619.285,42545.438,12,1,0,1
2,51489.516,42677.035,14,0,0,0
3,51618.191,42549.020,16,1,1,0
4,51434.113,42733.695,17,1,0,1
...,...,...,...,...,...,...
115110,48725.254,40357.273,24,0,0,0
115111,48741.340,40339.059,24,0,0,0
115112,48718.805,40368.750,25,0,0,0
115113,48733.887,40358.613,24,0,0,0


In [58]:
df = test

num_rows = len(df)
num_ones = max(1, int(0.5 * num_rows))  # ensure at least 1 row gets a 1

new_columns = ["CD11B", "CD3", "LY6G", "LY6C", "H3CIT"]

for col in new_columns:
    # Start with all zeros
    data = np.zeros(num_rows, dtype=int)
    
    # Randomly choose positions to be set to 1
    ones_indices = np.random.choice(num_rows, size=num_ones, replace=False)
    data[ones_indices] = 1

    # Assign to DataFrame
    df[col] = data


In [59]:
df

Unnamed: 0,POSITION X,POSITION Y,POSITION Z,CD45,MDSC,CD68,CD11B,CD3,LY6G,LY6C,H3CIT
0,51257.254,42903.758,26,0,0,0,0,0,0,1,0
1,51619.285,42545.438,12,1,0,1,0,0,1,1,1
2,51489.516,42677.035,14,0,0,0,0,1,0,0,0
3,51618.191,42549.020,16,1,1,0,0,0,0,1,0
4,51434.113,42733.695,17,1,0,1,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
115110,48725.254,40357.273,24,0,0,0,1,0,0,1,1
115111,48741.340,40339.059,24,0,0,0,1,1,1,1,0
115112,48718.805,40368.750,25,0,0,0,1,1,0,1,0
115113,48733.887,40358.613,24,0,0,0,0,1,1,1,0


In [60]:
def classify_cell(row):
    if row['CD11B'] == 0:
        if row['CD3'] == 1:
            return "t-cell"
        else:
            return "DC or other cell"
    else:  # CD11B == 1
        if row.get('CD68', 0) == 1:
            return "macrophage"
        else:
            if row['LY6G'] == 0:
                return "other myeloid cell"
            elif row['LY6G'] == 1:
                if row['LY6C'] == 1:
                    if row['H3CIT'] == 1:
                        return "MDSC netting"
                    else:
                        return "MDSC"
                elif row['LY6C'] == 0:
                    if row['H3CIT'] == 1:
                        return "neutrophil netting"
                    else:
                        return "neutrophil (not netting)"


In [61]:
df['CELL_TYPE'] = df.apply(classify_cell, axis=1)
df.columns
df


Unnamed: 0,POSITION X,POSITION Y,POSITION Z,CD45,MDSC,CD68,CD11B,CD3,LY6G,LY6C,H3CIT,CELL_TYPE
0,51257.254,42903.758,26,0,0,0,0,0,0,1,0,DC or other cell
1,51619.285,42545.438,12,1,0,1,0,0,1,1,1,DC or other cell
2,51489.516,42677.035,14,0,0,0,0,1,0,0,0,t-cell
3,51618.191,42549.020,16,1,1,0,0,0,0,1,0,DC or other cell
4,51434.113,42733.695,17,1,0,1,1,0,0,0,1,macrophage
...,...,...,...,...,...,...,...,...,...,...,...,...
115110,48725.254,40357.273,24,0,0,0,1,0,0,1,1,other myeloid cell
115111,48741.340,40339.059,24,0,0,0,1,1,1,1,0,MDSC
115112,48718.805,40368.750,25,0,0,0,1,1,0,1,0,other myeloid cell
115113,48733.887,40358.613,24,0,0,0,0,1,1,1,0,t-cell


In [62]:
df['CELL_TYPE'].value_counts()

CELL_TYPE
t-cell                      28906
DC or other cell            28652
other myeloid cell          24357
macrophage                   8794
neutrophil (not netting)     6212
MDSC netting                 6112
MDSC                         6042
neutrophil netting           6040
Name: count, dtype: int64

In [63]:
output_dir = "sam_csvs"
os.makedirs(output_dir, exist_ok=True)  

file_name = "sam_test1.csv" # <----------- Change the file name here (leave the .csv extension)
output_path = os.path.join(output_dir, file_name)

df.to_csv(output_path, index=False)