### Import libraries

In [1]:
import pathlib
import pandas as pd

## Specify mitocheck_data types and paths

We use multiple types of data for model training/testing.

In [2]:
labeled_data_hash__ic = "20369033f579dca1334cb2c58a1c6d532322f93e"
labeled_data_url__ic = f"https://raw.github.com/WayScience/mitocheck_data/{labeled_data_hash__ic}/3.normalize_data/normalized_data/training_data.csv.gz"

# ADD PATH TO DATA FROM GITHUB
# labeled_data_hash = ""
labeled_data_url__no_ic = f"/home/roshankern/Desktop/Github/mitocheck_data/3.normalize_data/normalized_data__no_ic/training_data.csv.gz"

labeled_data_paths = {
    "ic": labeled_data_url__ic,
    "no_ic": labeled_data_url__no_ic
}

### Load and save labeled data

In [3]:
# make parent directory for labeled data
labeled_data_save_dir = pathlib.Path("data/")
labeled_data_save_dir.mkdir(parents=True, exist_ok=True)

for data_type in labeled_data_paths:
    # Load data
    labeled_data_load_path = labeled_data_paths[data_type]
    labeled_data = pd.read_csv(labeled_data_load_path, compression="gzip", index_col=0)
    
    # save data
    labeled_data_save_path = pathlib.Path(f"{labeled_data_save_dir}/labeled_data__{data_type}.csv.gz")
    labeled_data.to_csv(labeled_data_save_path, compression="gzip")

## Preview data

In [4]:
labeled_data

Unnamed: 0,Mitocheck_Phenotypic_Class,Cell_UUID,Location_Center_X,Location_Center_Y,Metadata_Plate,Metadata_Well,Metadata_Frame,Metadata_Site,Metadata_Plate_Map_Name,Metadata_DNA,...,DP__efficientnet_1270,DP__efficientnet_1271,DP__efficientnet_1272,DP__efficientnet_1273,DP__efficientnet_1274,DP__efficientnet_1275,DP__efficientnet_1276,DP__efficientnet_1277,DP__efficientnet_1278,DP__efficientnet_1279
0,Large,3ba3aae1-489a-4eae-87f5-f3c31c9c91e9,397,618,LT0010_27,173,83,1,LT0010_27_173,LT0010_27/LT0010_27_173_83.tif,...,0.219501,-0.416624,-0.602064,-1.149988,-0.520968,1.292410,11.614993,0.535486,2.875145,-0.837069
1,Large,c16d0e78-70db-49ff-bc48-027342fe00da,359,584,LT0010_27,173,83,1,LT0010_27_173,LT0010_27/LT0010_27_173_83.tif,...,-0.664476,-1.397901,-0.645742,-1.067267,-0.007235,2.299296,1.372604,1.280672,-0.541297,-1.152779
2,Large,4a6e4da9-941d-4a6c-968e-cb1932839313,383,685,LT0010_27,173,83,1,LT0010_27_173,LT0010_27/LT0010_27_173_83.tif,...,0.636564,1.402440,-0.552524,0.266396,-0.329313,0.126110,-0.322944,0.813079,-0.510780,-0.057089
3,Large,bd03dd73-f4fb-4a25-93ce-1720e55de69b,932,532,LT0013_38,42,75,1,LT0013_38_42,LT0013_38/LT0013_38_42_75.tif,...,-0.919341,-0.865719,-0.520659,0.222127,0.371482,2.289868,-0.133843,1.201244,-1.771245,-0.510236
4,Large,9721f66a-6a76-4a22-a257-e021a5da9e01,479,110,LT0013_38,42,75,1,LT0013_38_42,LT0013_38/LT0013_38_42_75.tif,...,0.501800,-0.410976,-0.310428,-1.285884,-0.546935,2.795317,2.514185,0.361808,-0.322569,-0.583547
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2889,OutOfFocus,898c4f3a-08ab-4447-9307-ee4e56c02b53,383,219,LT0601_01,217,49,1,LT0601_01_217,LT0601_01/LT0601_01_217_49.tif,...,0.864102,7.876805,0.277454,-1.392754,-0.385585,-1.372417,-0.006438,-2.641339,0.591877,2.428400
2890,OutOfFocus,777fb679-c278-4895-90a0-bf3f42873f58,975,293,LT0603_03,2,49,1,LT0603_03_2,LT0603_03/LT0603_03_2_49.tif,...,0.446173,6.153245,0.315240,-0.943492,-0.672695,-0.996821,-1.218016,-0.848513,0.826606,3.255578
2891,OutOfFocus,122e3161-b1b8-4ef3-908c-0dc898ce031f,899,302,LT0603_03,2,49,1,LT0603_03_2,LT0603_03/LT0603_03_2_49.tif,...,0.252932,9.818994,0.471850,-1.108161,-0.962377,-1.281038,-0.899536,-0.492517,0.646061,0.867249
2892,OutOfFocus,8bed4ad7-4634-41f5-a7eb-85da98a97e88,946,281,LT0603_03,2,49,1,LT0603_03_2,LT0603_03/LT0603_03_2_49.tif,...,-0.157890,9.674776,0.401878,-0.968502,-0.952200,-1.040677,-2.199418,-2.137904,0.776055,2.449826


### View counts of each class in the labeled dataset

In [5]:
labeled_data["Mitocheck_Phenotypic_Class"].value_counts()

Interphase            419
Polylobed             361
Prometaphase          345
OutOfFocus            304
Apoptosis             276
MetaphaseAlignment    175
Binuclear             174
SmallIrregular        164
Hole                  114
Elongated             107
ADCCM                  89
Anaphase               84
Grape                  78
Large                  77
Metaphase              74
Folded                 53
Name: Mitocheck_Phenotypic_Class, dtype: int64