In [1]:
import warnings

def custom_warn(message, category, filename, lineno, file=None, line=None):
    print(f'[{category.__name__}] {filename}:{lineno} --> {message}', flush=True)
warnings.showwarning = custom_warn


### Porting to Google Colab
The following cell enables this notebook to run from Google Colab as well as from your local machine IDE.<br>
You can change `root_directory` and/or `this_notebook_google_path` to point to the directory in your Google account, which contains this notebook, together with the `imgs` sub-directory and the rest of the files.<br>

In [2]:
import sys
import os
try:
    from google.colab import drive as google_drive
except:
    # no Google Colab --> fall back to local machine
    google_drive = None

if google_drive is not None:
    google_drive_directory = os.path.join('/','content','gdrive')
    google_drive.mount(google_drive_directory)
    all_projects_path = os.path.join(google_drive_directory, 'Othercomputers','My Laptop', 'projects')
else:
    all_projects_path = os.path.join('d:\\', 'projects')

project_path = os.path.join(all_projects_path,'BioTAU','Huntington')
assert os.path.exists(project_path), f'Project path {project_path} not found!'
# enable import python files from this notebook's path
sys.path.append(project_path)
# enable reading images and data files from this notebook's path
os.chdir(project_path)

datasets_path = os.path.join(project_path, 'datasets')
assert os.path.exists(datasets_path), f'Datasets path {datasets_path} not found!'

output_path = os.path.join(project_path, 'output')
os.makedirs(output_path, exist_ok=True)
assert os.path.exists(output_path), f'Output path {output_path} not found!'

print(f'Current working directory: {os.getcwd()}')
print(f'Datasets path: {datasets_path}')
print(f'Output path: {output_path}')

Current working directory: d:\projects\BioTAU\Huntington
Datasets path: d:\projects\BioTAU\Huntington\datasets
Output path: d:\projects\BioTAU\Huntington\output


In [3]:
from python.Mitochondrial_Data import Protein_Data, Morphology_Data, All_Mitochondrial_Data

In [4]:
file_name_mitochondrial_data = os.path.join(datasets_path, 'Mitochondrial', 'HD Mitochondrial Morphology Analysis Dataset (Before normalization).csv')
file_name_drp1_df5 = os.path.join(datasets_path, 'Mitochondrial', 'drp1_df5.csv')
file_name_mfn1_df5 = os.path.join(datasets_path, 'Mitochondrial', 'mfn1_df5.csv')
file_name_mfn2_df5 = os.path.join(datasets_path, 'Mitochondrial', 'mfn2_df5.csv')
file_name_vat1_df5 = os.path.join(datasets_path, 'Mitochondrial', 'vat1_df5.csv')

# Read Mitochondrial Data

In [5]:
mitochondrial_data = Morphology_Data(file_name_mitochondrial_data, verbose=1)

Reading Mitochondrial Morphology data: d:\projects\BioTAU\Huntington\datasets\Mitochondrial\HD Mitochondrial Morphology Analysis Dataset (Before normalization).csv
['group_with_id', 'group_with_pc']


Patient_ID
GM00305    15
GM01650    44
GM01653    24
GM02165     9
GM04287    23
GM04476     5
GM04689    15
GM04693     7
GM04715    28
GM04717    21
GM04719    28
GM04721     1
GM04799     3
GM04819    14
GM04837    15
GM04847    13
GM04887    13
NA0143     41
NA0495     22
NA0561     14
NA0633     11
NA0730     29
NA0848     35
NA0951      8
NA0971     41
NA0981     14
NA1016      8
NA1170     38
Name: Num_Wells, dtype: int64

In [6]:
drp1_df5 = Protein_Data(file_name_drp1_df5, verbose=1)
drp1_df5.evaluate_features_by_xgboost_to_classify_patients_HD_without_HGPS()

Reading Mitochondrial protein data: drp1_df5
DataFrame of drp1_df5 has shape: (153, 6)


Unnamed: 0,drp1_intensity,drp1_area,Local_Outlier_Factor_8,Local_Outlier_Factor_10,Local_Outlier_Factor_30,Patient_ID
0,1.6731,32.1386,-1.1152,-1.1127,-1.0062,HGADFN0143
1,1.5762,49.3063,-0.9045,-0.9472,-0.9944,GM04287
2,2.1034,38.9581,-1.3123,-1.4213,-1.0267,GM01653
3,1.8997,52.6443,-1.0271,-1.0691,-1.0015,GM04287
4,1.6509,24.6237,-1.0216,-0.9725,-0.9968,GM04715


Patient_ID
GM00305        6
GM01650       12
GM01653       23
GM02165        4
GM04212        6
GM04287        5
GM04476        6
GM04687        6
GM04689        6
GM04709        5
GM04715        6
GM04717        6
GM04719       11
GM04721        6
GM04799        4
GM04807        2
GM04837        5
GM04887        1
HGADFN0143    12
NA0495         6
NA0730         5
NA1016         5
NA1170         5
Name: Num_Wells, dtype: int64

Evaluating features using XGBoost: 100%|██████████| 4642/4642 [10:51<00:00,  7.13it/s]

Selected model has 5 features with final accuracy: 100.00%
Feature[ 1] = Local_Outlier_Factor_30
Feature[ 2] = drp1_area
Feature[ 3] = drp1_intensity
Feature[ 4] = Local_Outlier_Factor_8
Feature[ 5] = Local_Outlier_Factor_10





In [7]:
mfn1_df5 = Protein_Data(file_name_mfn1_df5, verbose=1)
mfn1_df5.evaluate_features_by_xgboost_to_classify_patients_HD_without_HGPS()

Reading Mitochondrial protein data: mfn1_df5
DataFrame of mfn1_df5 has shape: (155, 6)


Unnamed: 0,mfn1_intensity,mfn1_area,Local_Outlier_Factor_8,Local_Outlier_Factor_10,Local_Outlier_Factor_30,Patient_ID
0,94.934,583.9132,-1.0374,-0.9571,-1.2348,GM01653
1,93.6412,463.3063,-1.0039,-0.997,-0.9959,GM04689
2,93.2016,399.7425,-1.3267,-1.3388,-1.4355,GM01650
3,99.8635,654.8509,-1.3877,-1.4657,-1.0083,GM04887
4,87.9849,463.1998,-0.9754,-0.9904,-0.9959,GM04837


Patient_ID
GM00305        6
GM01650       17
GM01653       24
GM02165        1
GM04212        5
GM04287        6
GM04476        4
GM04687        6
GM04689        5
GM04709        6
GM04715        6
GM04717        6
GM04719       11
GM04721        4
GM04799        6
GM04807        5
GM04819        6
GM04837        5
GM04887        3
HGADFN0143    10
NA0495         5
NA0951         3
NA1016         5
Name: Num_Wells, dtype: int64

Evaluating features using XGBoost: 100%|██████████| 4642/4642 [11:54<00:00,  6.50it/s]

Selected model has 5 features with final accuracy: 100.00%
Feature[ 1] = Local_Outlier_Factor_30
Feature[ 2] = mfn1_area
Feature[ 3] = Local_Outlier_Factor_8
Feature[ 4] = mfn1_intensity
Feature[ 5] = Local_Outlier_Factor_10





In [8]:
mfn2_df5 = Protein_Data(file_name_mfn2_df5, verbose=1)
mfn2_df5.evaluate_features_by_xgboost_to_classify_patients_HD_without_HGPS()

Reading Mitochondrial protein data: mfn2_df5
DataFrame of mfn2_df5 has shape: (145, 6)


Unnamed: 0,mfn2_intensity,mfn2_area,Local_Outlier_Factor_8,Local_Outlier_Factor_10,Local_Outlier_Factor_30,Patient_ID
0,143.6252,740.4908,-0.98,-0.9712,-1.0401,NA0730
1,127.3654,607.2048,-1.0172,-1.017,-0.9821,NA1016
2,128.1437,690.5424,-0.9502,-1.0122,-0.9893,GM04287
3,130.2045,653.6719,-1.0378,-1.062,-0.9497,NA1016
4,151.8085,733.8424,-1.002,-0.9931,-1.0309,NA0730


Patient_ID
GM00305        6
GM01650       16
GM01653       29
GM02165        1
GM04212        5
GM04287        6
GM04476        2
GM04687        6
GM04689        3
GM04709        6
GM04715        4
GM04717        6
GM04719        9
GM04799        5
GM04807        1
GM04819        6
GM04837        6
GM04887        3
HGADFN0143     6
NA0495         6
NA0730         6
NA0951         1
NA1016         6
Name: Num_Wells, dtype: int64

Evaluating features using XGBoost: 100%|██████████| 4642/4642 [11:43<00:00,  6.60it/s]

Selected model has 5 features with final accuracy: 100.00%
Feature[ 1] = Local_Outlier_Factor_30
Feature[ 2] = mfn2_intensity
Feature[ 3] = Local_Outlier_Factor_8
Feature[ 4] = Local_Outlier_Factor_10
Feature[ 5] = mfn2_area





In [9]:
vat1_df5 = Protein_Data(file_name_vat1_df5, verbose=1)
vat1_df5.evaluate_features_by_xgboost_to_classify_patients_HD_without_HGPS()

Reading Mitochondrial protein data: vat1_df5
DataFrame of vat1_df5 has shape: (147, 6)


Unnamed: 0,vat1_intensity,vat1_area,Local_Outlier_Factor_8,Local_Outlier_Factor_10,Local_Outlier_Factor_30,Patient_ID
0,12.3094,128.9186,-1.1489,-1.209,-0.9917,GM04719
1,15.4491,216.6347,-1.0859,-1.0539,-1.0057,GM04476
2,12.4574,175.537,-0.9775,-0.9573,-0.9966,GM04709
3,14.0197,164.9995,-0.9893,-0.9574,-1.0007,GM04689
4,12.7367,127.5125,-1.1489,-1.2208,-0.9914,GM04689


Patient_ID
GM00305        6
GM01650       14
GM01653       23
GM02165        3
GM04212        6
GM04287        1
GM04476        3
GM04687        6
GM04689        6
GM04709        6
GM04715        6
GM04717        6
GM04719       11
GM04721        3
GM04799        6
GM04807        3
GM04819        5
GM04837        5
GM04887        5
HGADFN0143     5
NA0495         1
NA0730        11
NA1016         6
Name: Num_Wells, dtype: int64

Evaluating features using XGBoost: 100%|██████████| 4642/4642 [11:08<00:00,  6.94it/s]

Selected model has 5 features with final accuracy: 100.00%
Feature[ 1] = Local_Outlier_Factor_30
Feature[ 2] = vat1_area
Feature[ 3] = vat1_intensity
Feature[ 4] = Local_Outlier_Factor_8
Feature[ 5] = Local_Outlier_Factor_10





In [10]:
all_mitochondrial_data = All_Mitochondrial_Data(file_name_mitochondrial_data, [file_name_drp1_df5,file_name_mfn1_df5,file_name_mfn2_df5,file_name_vat1_df5], verbose=1)

['group_with_id', 'group_with_pc']


Unnamed: 0_level_0,Disease_Status,Age,Gender,CAG_Repeats,CAP_Score,Count_Experiments,Mito morphology,Mito drp1_df5,Mito mfn1_df5,Mito mfn2_df5,Mito vat1_df5
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AG0015,Healthy,1.5,M,,,0,,,,,
AG16146,Healthy,31.0,M,,,0,,,,,
GM00305,HD_Severe,56.0,F,45.0,129.43,5,15.0,6.0,6.0,6.0,6.0
GM00726,Healthy,26.0,F,,,0,,,,,
GM01650,Healthy,37.0,F,,,5,44.0,12.0,17.0,16.0,14.0
GM01653,Healthy,37.0,M,,,5,24.0,23.0,24.0,29.0,23.0
GM02147,HD_Severe,55.0,M,44.0,118.64,0,,,,,
GM02165,HD_Severe,57.0,M,46.0,140.52,5,9.0,4.0,1.0,1.0,3.0
GM04196,HD_Mild,51.0,F,44.0,110.02,0,,,,,
GM04200,HD_Severe,53.0,M,46.0,130.66,0,,,,,
