In [3]:
# /*==========================================================================================*\
# **                        _           _ _   _     _  _         _                            **
# **                       | |__  _   _/ | |_| |__ | || |  _ __ | |__                         **
# **                       | '_ \| | | | | __| '_ \| || |_| '_ \| '_ \                        **
# **                       | |_) | |_| | | |_| | | |__   _| | | | | | |                       **
# **                       |_.__/ \__,_|_|\__|_| |_|  |_| |_| |_|_| |_|                       **
# \*==========================================================================================*/


# -----------------------------------------------------------------------------------------------
# Author: Bùi Tiến Thành (@bu1th4nh)
# Title: playground_classification.ipynb
# Date: 2024/10/03 15:27:39
# Description: 
# 
# (c) bu1th4nh. All rights reserved
# -----------------------------------------------------------------------------------------------
import os
import json
import mlflow
import random
import pymongo
import logging
import multiprocessing

import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime
from typing import List, Dict, Any, Tuple, Union, Literal

# -----------------------------------------------------------------------------------------------
# MongoDB
# -----------------------------------------------------------------------------------------------
mongo = pymongo.MongoClient(
    host='mongodb://localhost',
    port=27017,
    username='bu1th4nh',
    password='ariel.anna.elsa',
)
mongo_db = mongo['SimilarSampleCrossOmicNMF']
hparams_runs = mongo_db['HPARAMS_OPTS']

In [7]:
dataset_id = 'BRCA'
target_id = 'ER'


# Load target data
Ariel = list(
    hparams_runs
    .find(
        {
            "dataset": dataset_id,
            "target_id": target_id,
        },
        {
            "_id": 0,
            "test_id": 1,   
            "config": 1,
            "classifier": 1,
            "AUROC": 1,
        }
    )
)
Ariel = pd.DataFrame.from_records(Ariel)
display(Ariel)
Ariel['config'] = Ariel[['config', 'classifier']].apply(lambda row: f"{row['config']}|{row['classifier']}", axis=1)
display(Ariel)
Ariel = Ariel[['config', 'AUROC']].groupby('config').mean()
display(Ariel)

Unnamed: 0,AUROC,test_id,config,classifier
0,0.944575,Test000,k-10-alpha-0-beta-0.01-gamma-overridden,Logistic Regression
1,0.945514,Test000,k-10-alpha-0-beta-0.01-gamma-overridden,Random Forest
2,0.944508,Test000,k-10-alpha-0-beta-0.1-gamma-overridden,Logistic Regression
3,0.943496,Test000,k-10-alpha-0-beta-0.1-gamma-overridden,Random Forest
4,0.943876,Test000,k-10-alpha-0-beta-1-gamma-overridden,Logistic Regression
...,...,...,...,...
48395,0.933463,Test199,k-50-alpha-1000-beta-10-gamma-overridden,Random Forest
48396,0.902854,Test199,k-50-alpha-10000-beta-1-gamma-overridden,Logistic Regression
48397,0.943589,Test199,k-50-alpha-10000-beta-1-gamma-overridden,Random Forest
48398,0.903100,Test199,k-50-alpha-10000-beta-10-gamma-overridden,Logistic Regression


Unnamed: 0,AUROC,test_id,config,classifier
0,0.944575,Test000,k-10-alpha-0-beta-0.01-gamma-overridden|Logist...,Logistic Regression
1,0.945514,Test000,k-10-alpha-0-beta-0.01-gamma-overridden|Random...,Random Forest
2,0.944508,Test000,k-10-alpha-0-beta-0.1-gamma-overridden|Logisti...,Logistic Regression
3,0.943496,Test000,k-10-alpha-0-beta-0.1-gamma-overridden|Random ...,Random Forest
4,0.943876,Test000,k-10-alpha-0-beta-1-gamma-overridden|Logistic ...,Logistic Regression
...,...,...,...,...
48395,0.933463,Test199,k-50-alpha-1000-beta-10-gamma-overridden|Rando...,Random Forest
48396,0.902854,Test199,k-50-alpha-10000-beta-1-gamma-overridden|Logis...,Logistic Regression
48397,0.943589,Test199,k-50-alpha-10000-beta-1-gamma-overridden|Rando...,Random Forest
48398,0.903100,Test199,k-50-alpha-10000-beta-10-gamma-overridden|Logi...,Logistic Regression


Unnamed: 0_level_0,AUROC
config,Unnamed: 1_level_1
k-10-alpha-0-beta-0.01-gamma-overridden|Logistic Regression,0.945728
k-10-alpha-0-beta-0.01-gamma-overridden|Random Forest,0.935426
k-10-alpha-0-beta-0.1-gamma-overridden|Logistic Regression,0.944370
k-10-alpha-0-beta-0.1-gamma-overridden|Random Forest,0.937150
k-10-alpha-0-beta-1-gamma-overridden|Logistic Regression,0.943077
...,...
k-50-alpha-1000-beta-10-gamma-overridden|Random Forest,0.944977
k-50-alpha-10000-beta-1-gamma-overridden|Logistic Regression,0.916524
k-50-alpha-10000-beta-1-gamma-overridden|Random Forest,0.942905
k-50-alpha-10000-beta-10-gamma-overridden|Logistic Regression,0.924667


In [11]:
Ariel.sort_values(by='AUROC', ascending=False, inplace=True)
display(Ariel)
best_cfg

Unnamed: 0_level_0,AUROC
config,Unnamed: 1_level_1
k-100-alpha-0.01-beta-0.01-gamma-overridden|Logistic Regression,0.954292
k-200-alpha-0-beta-10-gamma-overridden|Random Forest,0.949058
k-25-alpha-0.01-beta-0.01-gamma-overridden|Random Forest,0.946964
k-25-alpha-0-beta-0.1-gamma-overridden|Random Forest,0.946506
k-10-alpha-0-beta-0.01-gamma-overridden|Logistic Regression,0.945728
...,...
k-100-alpha-0.001-beta-1-gamma-overridden|Logistic Regression,0.901148
k-200-alpha-1000-beta-0.1-gamma-overridden|Logistic Regression,0.898426
k-25-alpha-10000-beta-0.01-gamma-overridden|Logistic Regression,0.898292
k-50-alpha-0-beta-1-gamma-overridden|Logistic Regression,0.895611


'k-100-alpha-0.01-beta-0.01-gamma-overridden|Logistic Regression'