# Parse Results

Using this right now to iterate through an experiment's directory and parse all the annotation result files
and append them into one csv file per experiment.

### Roadmap:
1. Parse a **manual** results file and generate a dataframe from it
2. Iterate an experiment directory, parse each **manual** result file and append to an experiment dataframe
3. **DETOUR** Fix how we create and populate **automated anotations** for easier storage and reading
4. Add in functionality to read in **automated annotations**
5. Add in functionality to populate Google Sheets with results with **PROPER CHECKING FOR DUPLICATES**
6. Get away from Google Sheets and use a proper database and visualization solution

In [2]:
import os
import pandas as pd

In [12]:
single_cell_dir = "/Volumes/the_box/CURRENT ANNOTATIONS/10-22 Jurkat TNFa varying on rates"
triple_cell_dir = "/Volumes/the_box/CURRENT ANNOTATIONS/10-21 varying on JTIC16"

experiment_dir = triple_cell_dir

lane_dirs = [file for file in os.listdir(experiment_dir) if os.path.isdir(file)]

test_lane = "LN1_2"
manual_dir = "manual/results"
current_dir = f"{experiment_dir}/{test_lane}/{manual_dir}"

In [35]:
def read_results(results_dir):
    # Does changing dir revert back function ends? Should use that instead of specifying full path each time
    print(f"Reading from: {results_dir}")
    results_file = [f for f in os.listdir(results_dir) if f.endswith(".csv")][0]
    #print(results_file)
    results = pd.read_csv(f"{results_dir}/{results_file}")
    coi = get_columns_of_interest(results)
    parsed = parse_results(results,coi)
    return parsed

def get_columns_of_interest(results):
    r_cols = list(results.columns)
    cpos_index = r_cols.index("C-pos")
    channel = r_cols[cpos_index - 1]
    cells = r_cols[1:cpos_index-1]
    position = r_cols[cpos_index+1]
    cols_of_interest = cells + [channel,position]
    return cols_of_interest

def parse_results(results, coi):
    coi = get_columns_of_interest(results)
    
    filtered = pd.DataFrame(results[coi])
    ch_col_old_name = list(filtered.columns)[-2]
    col_swaps = {ch_col_old_name: "Channel", "Z-pos": "Position"}
    parsed = filtered.rename(columns=col_swaps)
    return parsed

def get_cell_pos_counts(cell_name, results):
    cell = results[[cell_name,"Position"]]
    cell_positions = cell.groupby("Position")
    cell_pos_counts = [sum(pos[1][cell_name]) for pos in cell_positions]
    return cell_pos_counts

# Need to output csv rows of the form
# Date Name Lane LagBinder ICD CellName CountsList Sum
# 10-21 varying on rates L16 ICAM  L16 ICAM  32 22 32 86
def summarize(parsed):
    pass


In [38]:
test = read_results(current_dir)
test

Reading from: /Volumes/the_box/CURRENT ANNOTATIONS/10-21 varying on JTIC16/LN1_2/manual/results


Unnamed: 0,Jurkats,PSGL1 Tether,ICAM L16,Channel,Position
0,0,0,0,1.0,1.0
1,0,0,32,2.0,1.0
2,0,8,0,3.0,1.0
3,0,0,0,4.0,1.0
4,0,0,0,1.0,2.0
5,0,0,0,2.0,2.0
6,0,10,22,3.0,2.0
7,0,0,0,4.0,2.0
8,0,0,0,1.0,3.0
9,0,0,0,2.0,3.0


In [70]:


get_cell_pos_counts("ICAM L16", test)

[32, 22, 32]