In [40]:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np

#### Read in CSV file from google sheets, that you manually added (2 or 1) for (cell , non-cell) respectivally. in a column titled Manual

In [41]:
# Read the CSV file
csv_file = '/Users/grant/Desktop/G21/G21_gfp_cellClassificaion_450_cells.csv'
df = pd.read_csv(csv_file)


# Convert the Manual column to numeric values
df['Manual'] = pd.to_numeric(df['Manual'], errors='coerce')

# Filter rows based on the Manual column value
manual_1_df = df[df['Manual'] == 1]
manual_2_df = df[df['Manual'] == 2]

# find length of data frams
num_rows_1  = manual_1_df.shape[0]
num_rows_2  = manual_2_df.shape[0]
total_cells = num_rows_1 + num_rows_2
print('total non-cells: ' +  str(num_rows_1) + "percent: " + str((num_rows_1 /total_cells)*100))
print('total cells: ' +  str(num_rows_2)+  "percent: " + str((num_rows_2 /total_cells)*100))

# Print the filtered data frames
print("--------- NOT CELLS --------------")
print(manual_1_df.head())

print("---------- CELLS -----------------")
print(manual_2_df.head())

total non-cells: 274percent: 56.84647302904564
total cells: 208percent: 43.15352697095436
--------- NOT CELLS --------------
    Image_Filename  Type  MarkerZ  MarkerY  MarkerX  Manual rfp_cell notes
0  placeholder.tif     2       13     5123     2880     1.0      NaN   NaN
1  placeholder.tif     2       16     5127     2914     1.0      NaN   NaN
2  placeholder.tif     2       17     5152     2609     1.0      NaN   NaN
3  placeholder.tif     2       17     6612     3396     1.0      NaN   NaN
4  placeholder.tif     2       20     6801     2712     1.0      NaN   NaN
---------- CELLS -----------------
     Image_Filename  Type  MarkerZ  MarkerY  MarkerX  Manual rfp_cell notes
11  placeholder.tif     2       29     4938     4367     2.0        0   NaN
29  placeholder.tif     2       40     5097     5026     2.0        0   NaN
41  placeholder.tif     2       43     5067     4831     2.0        1   NaN
68  placeholder.tif     2       54     5307     4615     2.0        1   NaN
81  placeh

### drop the extra columns from the data frames, to match the layout of the cell_classification.xml files from cellfinder

In [42]:
# -------- non-Cells df -------------
# Drop the unnecessary columns
manual_1_df = manual_1_df.drop(columns=['Manual', 'rfp_cell', 'notes'])

# Update the 'Type' column to have values 1 and 2
# You will need to adjust the condition as needed depending on how you want to distinguish between Type 1 and 2
# manual_1_df['Type'] = np.where(manual_1_df['MarkerZ'] <= 12, 1, 2)

# Reorder the columns
manual_1_df = manual_1_df[['Image_Filename', 'Type', 'MarkerX', 'MarkerY', 'MarkerZ']]
manual_1_df['Type'] = 1

# -------- Cells df -------------
# Drop the unnecessary columns
manual_2_df = manual_2_df.drop(columns=['Manual', 'rfp_cell', 'notes'])

# Reorder the columns
manual_2_df = manual_2_df[['Image_Filename', 'Type', 'MarkerX', 'MarkerY', 'MarkerZ']]

print('------------ non-cells -------------')
print(manual_1_df)

print('------------ cells -------------')
print(manual_2_df)

------------ non-cells -------------
       Image_Filename  Type  MarkerX  MarkerY  MarkerZ
0     placeholder.tif     1     2880     5123       13
1     placeholder.tif     1     2914     5127       16
2     placeholder.tif     1     2609     5152       17
3     placeholder.tif     1     3396     6612       17
4     placeholder.tif     1     2712     6801       20
...               ...   ...      ...      ...      ...
5073  placeholder.tif     1     3646      923     1623
5074  placeholder.tif     1     3568      818     1623
5075  placeholder.tif     1     3585     1046     1625
5076  placeholder.tif     1     3568      822     1625
5077  placeholder.tif     1     5036     1966     1626

[274 rows x 5 columns]
------------ cells -------------
       Image_Filename  Type  MarkerX  MarkerY  MarkerZ
11    placeholder.tif     2     4367     4938       29
29    placeholder.tif     2     5026     5097       40
41    placeholder.tif     2     4831     5067       43
68    placeholder.tif     

### Combine the two manually curiated data frames

In [43]:
combined_df = pd.concat([manual_1_df , manual_2_df ])

# Display the first few rows of the combined DataFrame
print(combined_df.head(-5))

print(len(combined_df))

       Image_Filename  Type  MarkerX  MarkerY  MarkerZ
0     placeholder.tif     1     2880     5123       13
1     placeholder.tif     1     2914     5127       16
2     placeholder.tif     1     2609     5152       17
3     placeholder.tif     1     3396     6612       17
4     placeholder.tif     1     2712     6801       20
...               ...   ...      ...      ...      ...
4356  placeholder.tif     2     4554     1281     1222
4361  placeholder.tif     2     4788     1124     1223
4362  placeholder.tif     2      941     5629     1224
4365  placeholder.tif     2     2787     2037     1226
4367  placeholder.tif     2     4608     1422     1227

[477 rows x 5 columns]
482


### Save out the CSV file and XML file of this new data frame

In [44]:
# Assuming you have a DataFrame named 'df'
# Save DataFrame as a CSV file
combined_df.to_csv("/Users/grant/Desktop/G21/combined_cell_cordinates_output.csv", index=False)

# Create an XML file from the DataFrame
root = ET.Element("root")

for _, row in combined_df.iterrows():
    record = ET.SubElement(root, "record")
    for col in combined_df.columns:
        col_element = ET.SubElement(record, col)
        col_element.text = str(row[col])

# Save the XML data to a file
tree = ET.ElementTree(root)
tree.write("/Users/grant/Desktop/G21/manual_cell_classification.xml", encoding="utf-8", xml_declaration=True)

In [38]:
input_file = '/Users/grant/Desktop/G21/manual_cell_classification.xml'


with open(input_file, 'r', encoding='utf-8') as file:
    input_xml = file.read()

# Parse the input XML
root = ET.fromstring(input_xml)

# Create the output XML structure
output_root = ET.Element("CellCounter_Marker_File")
image_properties = ET.SubElement(output_root, "Image_Properties")
filename = ET.SubElement(image_properties, "Image_Filename")
marker_data = ET.SubElement(output_root, "Marker_Data")
current_type = ET.SubElement(marker_data, "Current_Type")
marker_type = ET.SubElement(marker_data, "Marker_Type")
type_elem = ET.SubElement(marker_type, "Type")

# Set the filename and type from the input XML
filename.text = root.find("record/Image_Filename").text
type_elem.text = root.find("record/Type").text
current_type.text = root.find("record/Type").text

# Iterate through the input XML records and add the markers to the output XML
for record in root.findall("record"):
    marker = ET.SubElement(marker_type, "Marker")
    marker_x = ET.SubElement(marker, "MarkerX")
    marker_y = ET.SubElement(marker, "MarkerY")
    marker_z = ET.SubElement(marker, "MarkerZ")

    marker_x.text = record.find("MarkerX").text
    marker_y.text = record.find("MarkerY").text
    marker_z.text = record.find("MarkerZ").text

# Convert the output XML tree to a string
output_xml = ET.tostring(output_root, encoding="UTF-8", method="xml").decode("UTF-8")

print(output_xml)

<CellCounter_Marker_File><Image_Properties><Image_Filename>placeholder.tif</Image_Filename></Image_Properties><Marker_Data><Current_Type>1</Current_Type><Marker_Type><Type>1</Type><Marker><MarkerX>2880</MarkerX><MarkerY>5123</MarkerY><MarkerZ>13</MarkerZ></Marker><Marker><MarkerX>2914</MarkerX><MarkerY>5127</MarkerY><MarkerZ>16</MarkerZ></Marker><Marker><MarkerX>2609</MarkerX><MarkerY>5152</MarkerY><MarkerZ>17</MarkerZ></Marker><Marker><MarkerX>3396</MarkerX><MarkerY>6612</MarkerY><MarkerZ>17</MarkerZ></Marker><Marker><MarkerX>2712</MarkerX><MarkerY>6801</MarkerY><MarkerZ>20</MarkerZ></Marker><Marker><MarkerX>4912</MarkerX><MarkerY>5094</MarkerY><MarkerZ>23</MarkerZ></Marker><Marker><MarkerX>3772</MarkerX><MarkerY>6873</MarkerY><MarkerZ>26</MarkerZ></Marker><Marker><MarkerX>4477</MarkerX><MarkerY>5073</MarkerY><MarkerZ>26</MarkerZ></Marker><Marker><MarkerX>2755</MarkerX><MarkerY>6686</MarkerY><MarkerZ>26</MarkerZ></Marker><Marker><MarkerX>4404</MarkerX><MarkerY>4792</MarkerY><MarkerZ>2

In [39]:
output_filename = "/Users/grant/Desktop/G21/cell_classification.xml"

with open(output_filename, "w", encoding="utf-8") as output_file:
    output_file.write(output_xml)

print(f"Output XML saved to {output_filename}")

Output XML saved to /Users/grant/Desktop/G21/cell_classification.xml


In [45]:
import xml.etree.ElementTree as ET

input_file = '/Users/grant/Desktop/G21/manual_cell_classification.xml'

with open(input_file, 'r', encoding='utf-8') as file:
    input_xml = file.read()

root = ET.fromstring(input_xml)

output_root = ET.Element("CellCounter_Marker_File")
image_properties = ET.SubElement(output_root, "Image_Properties")
filename = ET.SubElement(image_properties, "Image_Filename")
marker_data = ET.SubElement(output_root, "Marker_Data")

filename.text = root.find("record/Image_Filename").text

# Group markers by type
markers_by_type = {}

for record in root.findall("record"):
    marker_type = record.find("Type").text
    marker_x = record.find("MarkerX").text
    marker_y = record.find("MarkerY").text
    marker_z = record.find("MarkerZ").text

    if marker_type not in markers_by_type:
        markers_by_type[marker_type] = []

    markers_by_type[marker_type].append((marker_x, marker_y, marker_z))

# Add Marker_Type elements for each distinct type
for marker_type, markers in markers_by_type.items():
    type_element = ET.SubElement(marker_data, "Marker_Type")
    type_elem = ET.SubElement(type_element, "Type")
    type_elem.text = marker_type

    for marker_coords in markers:
        marker = ET.SubElement(type_element, "Marker")
        marker_x = ET.SubElement(marker, "MarkerX")
        marker_y = ET.SubElement(marker, "MarkerY")
        marker_z = ET.SubElement(marker, "MarkerZ")

        marker_x.text, marker_y.text, marker_z.text = marker_coords

# Convert the output XML tree to a string
output_xml = ET.tostring(output_root, encoding="UTF-8", method="xml").decode("UTF-8")

print(output_xml)

<CellCounter_Marker_File><Image_Properties><Image_Filename>placeholder.tif</Image_Filename></Image_Properties><Marker_Data><Marker_Type><Type>1</Type><Marker><MarkerX>2880</MarkerX><MarkerY>5123</MarkerY><MarkerZ>13</MarkerZ></Marker><Marker><MarkerX>2914</MarkerX><MarkerY>5127</MarkerY><MarkerZ>16</MarkerZ></Marker><Marker><MarkerX>2609</MarkerX><MarkerY>5152</MarkerY><MarkerZ>17</MarkerZ></Marker><Marker><MarkerX>3396</MarkerX><MarkerY>6612</MarkerY><MarkerZ>17</MarkerZ></Marker><Marker><MarkerX>2712</MarkerX><MarkerY>6801</MarkerY><MarkerZ>20</MarkerZ></Marker><Marker><MarkerX>4912</MarkerX><MarkerY>5094</MarkerY><MarkerZ>23</MarkerZ></Marker><Marker><MarkerX>3772</MarkerX><MarkerY>6873</MarkerY><MarkerZ>26</MarkerZ></Marker><Marker><MarkerX>4477</MarkerX><MarkerY>5073</MarkerY><MarkerZ>26</MarkerZ></Marker><Marker><MarkerX>2755</MarkerX><MarkerY>6686</MarkerY><MarkerZ>26</MarkerZ></Marker><Marker><MarkerX>4404</MarkerX><MarkerY>4792</MarkerY><MarkerZ>28</MarkerZ></Marker><Marker><M

In [46]:
output_filename = "/Users/grant/Desktop/G21/cell_classification.xml"

with open(output_filename, "w", encoding="utf-8") as output_file:
    output_file.write(output_xml)

print(f"Output XML saved to {output_filename}")

Output XML saved to /Users/grant/Desktop/G21/cell_classification.xml
