In [1]:
import argparse

import numpy as np
import pandas as pd
import torch
from joblib import Parallel, delayed
from sklearn.model_selection import KFold
from tqdm import tqdm

In [2]:
%load_ext autoreload
%autoreload 2
    
from model import Optimizer, nihgcn
from myutils import *
from load_data import load_data
from sampler import NewSampler

In [3]:
class Args:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.data = 'nci'
        self.lr = 0.001
        self.wd = 1e-5
        self.layer_size = [1024, 1024]
        self.alpha = 0.25
        self.gamma = 8
        self.epochs = 1000

args = Args()

In [4]:
def nihgcn_new(
    cell_exprs,
    drug_finger,
    res_mat,
    null_mask,
    target_dim,
    target_index,
    evaluate_fun,
    args,
):

    sampler = NewSampler(res.values, null_mask, target_dim, target_index)

    val_labels = sampler.test_data[sampler.test_mask]

    if len(np.unique(val_labels)) < 2:
        print(f"Target {target_index} skipped: Validation set has only one class.")
        return None, None

    model = nihgcn(
        sampler.train_data,
        cell_exprs=cell_exprs,
        drug_finger=drug_finger,
        layer_size=args.layer_size,
        alpha=args.alpha,
        gamma=args.gamma,
        device=args.device,
    )
    opt = Optimizer(
        model,
        sampler.train_data,
        sampler.test_data,
        sampler.test_mask,
        sampler.train_mask,
        evaluate_fun,
        lr=args.lr,
        wd=args.wd,
        epochs=args.epochs,
        device=args.device,
    )
    
    true_data, predict_data = opt()
    return true_data, predict_data

In [5]:
def process_iteration(dim, target_index, args):
    """Function to encapsulate each iteration"""
    if dim:
        if drug_sum.iloc[target_index] < 10:
            return None
    else:
        if cell_sum.iloc[target_index] < 10:
            return None

    true_data, predict_data = nihgcn_new(
        cell_exprs=exprs,
        drug_finger=drug_finger,
        res_mat=res,
        null_mask=null_mask,
        target_dim=dim,
        target_index=target_index,
        evaluate_fun=roc_auc,
        args=args,
    )
    
    return [true_data.detach().cpu().numpy(), predict_data.detach().cpu().numpy()]

In [6]:
# results = []
# for target_index in tqdm(range(samples)):
#     results.append(process_iteration(
#         target_dim, target_index, args
#     ))
#     break

args = Args()

target_dim = 0  # Cell
# target_dim = 1  # Drug

# Load data
res, drug_finger, exprs, null_mask = load_data(args)
samples = res.shape[target_dim]
exprs = exprs.copy()
cell_sum = np.sum(res, axis=1)
drug_sum = np.sum(res, axis=0)

results = Parallel(n_jobs=-1)(  # n_jobs=-1 で利用可能な全コアを使用
    delayed(process_iteration)(target_dim, i, args)
    for i in tqdm(range(samples))
)

load nci


  0%|          | 0/59 [00:00<?, ?it/s]

RuntimeError: Found dtype Double but expected Float