### Import libraries

In [1]:
import pandas as pd
import pathlib
import re
import numpy as np

import sys
sys.path.append("../utils/")
from locate_utils import get_uncompiled_training_locations, get_final_training_locations, get_control_locations

### Specify paths

In [2]:
annotations_path = pathlib.Path("../mitocheck_metadata/idr0013-screenA-annotation.csv.gz")
feature_samples_path = pathlib.Path("../mitocheck_metadata/features.samples.txt")

locations_dir = pathlib.Path("locations/")
locations_dir.mkdir(exist_ok=True, parents=True)

### Find/save training locations

In [3]:
training_save_path = pathlib.Path(f"{locations_dir}/training_locations.tsv")

uncompiled_training_locations = get_uncompiled_training_locations(feature_samples_path, annotations_path)
training_data_locations = get_final_training_locations(uncompiled_training_locations)
training_data_locations.to_csv(training_save_path, sep="\t")

training_data_locations

Unnamed: 0,Plate,Well,Well Number,Frames,Original Gene Target,Plate_Map_Name,Gene_Replicate,Site,DNA
0,LT0010_27,H5,173,83,RAB21,LT0010_27_173,1,1,LT0010_27/LT0010_27_173_83.tif
1,LT0013_38,B18,42,75,KIF14,LT0013_38_42,1,1,LT0013_38/LT0013_38_42_75.tif
2,LT0013_38,B18,42,95,KIF14,LT0013_38_42,1,1,LT0013_38/LT0013_38_42_95.tif
3,LT0017_19,P5,365,65,KIF20A,LT0017_19_365,1,1,LT0017_19/LT0017_19_365_65.tif
4,LT0019_06,F20,140,70,DDOST,LT0019_06_140,1,1,LT0019_06/LT0019_06_140_70.tif
...,...,...,...,...,...,...,...,...,...
268,LT0038_27,K10,250,38,BUB1B,LT0038_27_250,1,1,LT0038_27/LT0038_27_250_38.tif
269,LT0078_05,A4,4,33,failed QC,LT0078_05_4,1,1,LT0078_05/LT0078_05_4_33.tif
270,LT0153_01,A13,13,81,failed QC,LT0153_01_13,1,1,LT0153_01/LT0153_01_13_81.tif
271,LT0601_01,J1,217,49,ABCB8,LT0601_01_217,1,1,LT0601_01/LT0601_01_217_49.tif


### Find/save negative control locations

In [4]:
negative_control_save_path = pathlib.Path(f"{locations_dir}/negative_control_locations.tsv")

negative_control_locations = get_control_locations(annotations_path, "negative", 0)
negative_control_locations.to_csv(negative_control_save_path, sep="\t")

negative_control_locations

Unnamed: 0,Plate,Well,Well Number,Original Gene Target,Frames,Plate_Map_Name,Gene_Replicate,Site,DNA
0,LT0001_02,A15,15,negative control,43,LT0001_02_15,1,1,LT0001_02/LT0001_02_15_43.tif
1,LT0001_02,B2,26,negative control,46,LT0001_02_26,1,1,LT0001_02/LT0001_02_26_46.tif
2,LT0001_02,C15,63,negative control,52,LT0001_02_63,1,1,LT0001_02/LT0001_02_63_52.tif
3,LT0001_02,D2,74,negative control,31,LT0001_02_74,1,1,LT0001_02/LT0001_02_74_31.tif
4,LT0001_02,M16,304,negative control,34,LT0001_02_304,1,1,LT0001_02/LT0001_02_304_34.tif
...,...,...,...,...,...,...,...,...,...
3848,LT0603_06,D2,74,negative control,51,LT0603_06_74,1,1,LT0603_06/LT0603_06_74_51.tif
3849,LT0603_06,M16,304,negative control,52,LT0603_06_304,1,1,LT0603_06/LT0603_06_304_52.tif
3850,LT0603_06,N3,315,negative control,49,LT0603_06_315,1,1,LT0603_06/LT0603_06_315_49.tif
3851,LT0603_06,O16,352,negative control,47,LT0603_06_352,1,1,LT0603_06/LT0603_06_352_47.tif


### Find/save positive control locations

In [5]:
positive_control_save_path = pathlib.Path(f"{locations_dir}/positive_control_locations.tsv")

positive_control_locations = get_control_locations(annotations_path, "positive", 1)
positive_control_locations.to_csv(positive_control_save_path, sep="\t")

positive_control_locations

Unnamed: 0,Plate,Well,Well Number,Original Gene Target,Frames,Plate_Map_Name,Gene_Replicate,Site,DNA
0,LT0001_02,A4,4,ENSG00000149503,36,LT0001_02_4,1,1,LT0001_02/LT0001_02_4_36.tif
1,LT0001_02,C1,49,ENSG00000149503,42,LT0001_02_49,1,1,LT0001_02/LT0001_02_49_42.tif
2,LT0001_02,C4,52,ENSG00000149503,43,LT0001_02_52,1,1,LT0001_02/LT0001_02_52_43.tif
3,LT0001_09,A4,4,ENSG00000149503,39,LT0001_09_4,1,1,LT0001_09/LT0001_09_4_39.tif
4,LT0001_09,C1,49,ENSG00000149503,62,LT0001_09_49,1,1,LT0001_09/LT0001_09_49_62.tif
...,...,...,...,...,...,...,...,...,...
2885,LT0603_06,N24,336,COPB,33,LT0603_06_336,1,1,LT0603_06/LT0603_06_336_33.tif
2886,LT0603_06,O2,338,KIF11,62,LT0603_06_338,1,1,LT0603_06/LT0603_06_338_62.tif
2887,LT0603_06,O13,349,KIF11,33,LT0603_06_349,1,1,LT0603_06/LT0603_06_349_33.tif
2888,LT0603_06,P21,381,COPB,60,LT0603_06_381,1,1,LT0603_06/LT0603_06_381_60.tif
