### Import libraries

In [1]:
import pandas as pd
import pathlib
import re
import numpy as np

import sys
sys.path.append("../utils/")
from locate_utils import get_uncompiled_training_locations, get_final_training_locations, get_control_locations

### Specify paths

In [2]:
annotations_path = pathlib.Path("../mitocheck_metadata/idr0013-screenA-annotation.csv.gz")
feature_samples_path = pathlib.Path("../mitocheck_metadata/features.samples.txt")

locations_dir = pathlib.Path("locations/")
locations_dir.mkdir(exist_ok=True, parents=True)

### Find/save training locations

In [3]:
training_save_path = pathlib.Path(f"{locations_dir}/training_locations.tsv")

uncompile_training_locations = get_uncompiled_training_locations(feature_samples_path, annotations_path)
training_data_locations = get_final_training_locations(uncompile_training_locations)
training_data_locations.to_csv(training_save_path, sep="\t")

training_data_locations

Unnamed: 0,Plate,Well,Well Number,Frames,Original Gene Target
0,LT0010_27,H5,173,83,RAB21
1,LT0013_38,B18,42,759547,KIF14
2,LT0017_19,P5,365,658393,KIF20A
3,LT0019_06,F20,140,7083,DDOST
4,LT0027_44,M4,292,889592476580,CDK4
...,...,...,...,...,...
72,LT0029_05,C12,60,33,failed QC
73,LT0078_05,A4,4,33,failed QC
74,LT0153_01,A13,13,81,failed QC
75,LT0601_01,J1,217,49,ABCB8


### Find/save negative control locations

In [4]:
negative_control_save_path = pathlib.Path(f"{locations_dir}/negative_control_locations.tsv")

negative_control_locations = get_control_locations(annotations_path, "negative", 0)
negative_control_locations.to_csv(negative_control_save_path, sep="\t")

negative_control_locations

Unnamed: 0,Plate,Well,Well Number,Original Gene Target,Frames
0,LT0001_02,A15,15,negative control,43
1,LT0001_02,B2,26,negative control,46
2,LT0001_02,C15,63,negative control,52
3,LT0001_02,D2,74,negative control,31
4,LT0001_02,M16,304,negative control,34
...,...,...,...,...,...
3848,LT0603_06,D2,74,negative control,51
3849,LT0603_06,M16,304,negative control,52
3850,LT0603_06,N3,315,negative control,49
3851,LT0603_06,O16,352,negative control,47


### Find/save positive control locations

In [5]:
positive_control_save_path = pathlib.Path(f"{locations_dir}/positive_control_locations.tsv")

positive_control_locations = get_control_locations(annotations_path, "positive", 1)
positive_control_locations.to_csv(positive_control_save_path, sep="\t")

positive_control_locations

Unnamed: 0,Plate,Well,Well Number,Original Gene Target,Frames
0,LT0001_02,A4,4,ENSG00000149503,36
1,LT0001_02,C1,49,ENSG00000149503,42
2,LT0001_02,C4,52,ENSG00000149503,43
3,LT0001_09,A4,4,ENSG00000149503,39
4,LT0001_09,C1,49,ENSG00000149503,62
...,...,...,...,...,...
2885,LT0603_06,N24,336,COPB,33
2886,LT0603_06,O2,338,KIF11,62
2887,LT0603_06,O13,349,KIF11,33
2888,LT0603_06,P21,381,COPB,60
