### Import libraries

In [1]:
import pandas as pd
import pathlib
import re
import numpy as np

import sys
sys.path.append("../utils/")
#from locate_utils import get_training_locations, get_control_locations

### Specify paths

In [2]:
annotations_path = pathlib.Path("../mitocheck_metadata/idr0013-screenA-annotation.csv.gz")
feature_samples_path = pathlib.Path("../mitocheck_metadata/features.samples.txt")
features_path = pathlib.Path("../mitocheck_metadata/trainingset_2007_06_21.dat")

locations_dir = pathlib.Path("locations/")
locations_dir.mkdir(exist_ok=True, parents=True)

### Find/save training locations

In [13]:
def get_frame_metadata(frame_details: str):
    """
    get frame metadata from features samples movie details string

    Parameters
    ----------
    frame_details : str
        string from one line of features samples file
        ex: PLLT0010_27--ex2005_05_13--sp2005_03_23--tt17--c5___P00173_01___T00082___X0397___Y0618

    Returns
    -------
    _type_
        _description_
    """
    
    plate = frame_details.split("--")[0].replace("PL", "")
    well_num = int(frame_details.split("___")[1][1:6])
    frame = int(frame_details.split("___")[2][1:6]) + 1
    
    return frame

def get_training_locations(feature_samples_path: pathlib.Path, annotations_path: pathlib.Path) -> pd.DataFrame:
    training_data_locations = pd.DataFrame()
    with open(feature_samples_path) as labels_file:
        for line in labels_file:
            frame_details = line.strip().split("\t")[1]
            print(get_frame_metadata(frame_details))
    
    return training_data_locations

test = get_training_locations(feature_samples_path, annotations_path)
test

83
83
83
75
75
95
95
95
65
70
88
95
8
36
50
50
28
28
28
28
44
44
44
44
44
47
52
71
71
71
71
79
79
84
84
84
72
72
72
49
72
77
37
37
84
93
93
93
93
69
69
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
81
2
17
17
17
61
61
23
23
23
23
23
28
28
28
28
39
39
39
39
39
39
52
52
52
52
52
52
52
63
63
63
63
63
63
63
63
63
63
63
63
76
76
76
76
76
76
76
76
76
76
76
8
46
46
4
4
4
4
4
4
21
21
21
21
21
21
21
21
21
21
21
21
40
40
40
40
40
40
40
40
40
40
40
40
47
55
68
68
68
75
75
75
84
84
89
89
89
89
89
94
94
94
94
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
13
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
29
56
56
56
56
56
56
56
56
56
56
56
56
56
56
56
56
56
56
56
75
75
22
22
22
22
22
22
22
22
22
27
27
27
27
27
27
27
27
27
27
54
54
54
54
54
54
1
1
9
9
9
14
14
14
14
14
14
14
14
14
14
14
14
14
14
14
22
22
22
22
22
22
22
22
22
22
22
22
22
22
22
22
22
22
22
22
22
48
48
48
66
61
66
66
74
25
25
25
25
25
25
25
69
69
69
69
69
69
69
6

In [None]:
training_save_path = pathlib.Path(f"{locations_dir}/training_locations.tsv")

training_data_locations = get_training_locations(features_path, annotations_path)
training_data_locations.to_csv(training_save_path, sep="\t")

print(training_data_locations.shape)
training_data_locations.head()

### Find/save negative control locations

In [None]:
negative_control_save_path = pathlib.Path(f"{locations_dir}/negative_control_locations.tsv")

negative_control_locations = get_control_locations(annotations_path, "negative", 0)
negative_control_locations.to_csv(negative_control_save_path, sep="\t")

print(negative_control_locations.shape)
negative_control_locations.head()

### Find/save positive control locations

In [None]:
positive_control_save_path = pathlib.Path(f"{locations_dir}/positive_control_locations.tsv")

positive_control_locations = get_control_locations(annotations_path, "positive", 1)
positive_control_locations.to_csv(positive_control_save_path, sep="\t")

print(positive_control_locations.shape)
positive_control_locations.head()