In [1]:
import numpy as np
import pandas as pd
import sys,os 
import random

os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import roc_auc_score

sys.path.insert(0, os.path.join(os.path.abspath(''), '../../', 'magic'))

from tqdm import tqdm
from run_magic import run_magic_birw

In [2]:
def get_data_for_i(i):
    original_ = pd.read_csv('../../data/mid_simulation/data.csv.gz', index_col=0)
    df_ = pd.read_csv('../../data/mid_simulation/drp_{}0.csv.gz'.format(i), index_col=0)
    df_.index = [int(i) for i in df_.index]
    df_.columns = [int(i) for i in df_.columns]

    original_.columns = df_.columns
    original_.index = df_.index

    n = original_.size
    original_val = original_.values.copy()
    t = list(np.ndindex(original_.shape))
    random.Random(42).shuffle(t)

    mask = t[:int(len(t)/10 * i)]

    thr = np.sum(np.sign(df_)) > 0
    original_ = original_.loc[:, list(thr)]
    df_ = df_.loc[:, list(thr)]

    # original = original_.values
    original = np.log(original_+1)

    # df = df_.values
    df = np.log(df_+1)

    tmp = pd.DataFrame(thr)
    remove = [int(i) for i in tmp[tmp[0] == False].index]
    mask = [i for i in mask if i[1] not in remove]
    
    return df, mask, original

In [3]:
kuang_params = {  
    'is_preprocess': False,
    'is_pca': False,
    'Iter': 100, 
    'alpha': 0.1,
    'kernel': 'jp',
#     'method': 'cartesian',
}

In [4]:
mses = {}
corrs = {}
mses_ = {}
corrs_ = {}
mses__ = {}
corrs__ = {}
aucs = {}
method = 'BiRW'

for i in (range(9)):
    print(i)
    df, mask, original = get_data_for_i(i+1)

    pred = run_magic_birw(
        df, 
        **kuang_params
    )
    pred = pd.DataFrame(pred, columns=df.columns, index=df.index)

    origin = np.array([original.loc[i] for i in mask])
    predict = np.array([pred.loc[i] for i in mask])

    mses[i] = mse(origin, predict)
    corrs[i] = np.corrcoef(origin, predict)[0][1]
    mses_[i] = mse(origin[origin != 0], predict[origin != 0])
    corrs_[i] = np.corrcoef(origin[origin != 0], predict[origin != 0])[0][1]
    mses__[i] = mse(origin[origin == 0], predict[origin == 0])
    
    df =  pd.DataFrame(np.array(predict))
    df['rank'] = df.rank()
    df['label'] = np.sign(origin)
    aucs[i] = roc_auc_score(df['label'], df['rank'])

0
Running ...
     Time to compute cell-cell affinity matrix:  6.0065696239471436
     Time to compute gene-gene affinity matrix:  6.175086498260498
Method: Tensor product


100%|██████████| 100/100 [00:05<00:00, 18.75it/s]


     Time to perform bi-random walk:  8.028366327285767
Finish running, Time:  20.434778928756714
1
Running ...
     Time to compute cell-cell affinity matrix:  9.078516483306885
     Time to compute gene-gene affinity matrix:  8.725187540054321
Method: Tensor product


100%|██████████| 100/100 [00:04<00:00, 21.81it/s]


     Time to perform bi-random walk:  4.715755224227905
Finish running, Time:  22.77175521850586
2
Running ...
     Time to compute cell-cell affinity matrix:  5.943708419799805
     Time to compute gene-gene affinity matrix:  6.38700532913208
Method: Tensor product


100%|██████████| 100/100 [00:04<00:00, 21.68it/s]


     Time to perform bi-random walk:  4.7259745597839355
Finish running, Time:  17.295109272003174
3
Running ...
     Time to compute cell-cell affinity matrix:  5.905175685882568
     Time to compute gene-gene affinity matrix:  6.274543285369873
Method: Tensor product


100%|██████████| 100/100 [00:04<00:00, 21.87it/s]


     Time to perform bi-random walk:  4.686960220336914
Finish running, Time:  17.096076011657715
4
Running ...
     Time to compute cell-cell affinity matrix:  6.039364814758301
     Time to compute gene-gene affinity matrix:  6.083848714828491
Method: Tensor product


100%|██████████| 100/100 [00:04<00:00, 21.90it/s]


     Time to perform bi-random walk:  4.69262957572937
Finish running, Time:  17.040567636489868
5
Running ...
     Time to compute cell-cell affinity matrix:  5.764601469039917
     Time to compute gene-gene affinity matrix:  6.0293378829956055
Method: Tensor product


100%|██████████| 100/100 [00:04<00:00, 21.89it/s]


     Time to perform bi-random walk:  4.678162097930908
Finish running, Time:  16.69696307182312
6
Running ...
     Time to compute cell-cell affinity matrix:  6.524371147155762
     Time to compute gene-gene affinity matrix:  6.629716396331787
Method: Tensor product


100%|██████████| 100/100 [00:04<00:00, 21.81it/s]


     Time to perform bi-random walk:  4.698444128036499
Finish running, Time:  18.087451457977295
7
Running ...
     Time to compute cell-cell affinity matrix:  5.8022685050964355
     Time to compute gene-gene affinity matrix:  5.656184911727905
Method: Tensor product


100%|██████████| 100/100 [00:06<00:00, 15.14it/s]


     Time to perform bi-random walk:  6.794634819030762
Finish running, Time:  18.480599880218506
8
Running ...
     Time to compute cell-cell affinity matrix:  5.427140951156616
     Time to compute gene-gene affinity matrix:  5.4734275341033936
Method: Tensor product


100%|██████████| 100/100 [00:04<00:00, 24.03it/s]


     Time to perform bi-random walk:  4.272366762161255
Finish running, Time:  15.400683879852295


In [5]:
pd.DataFrame([
    mses.values(),
    mses_.values(),
    mses__.values(),
    corrs.values(),
    corrs_.values(),
    aucs.values()
], index=['mse', 'mse (nonzero)', 'mse (zero)', 'corr', 'corrs (nonzero)', 'auc'])

Unnamed: 0,0,1,2,3,4,5,6,7,8
mse,0.387162,0.455228,0.529921,0.595692,0.646766,0.682156,0.707129,0.72457,0.73862
mse (nonzero),1.059956,1.288456,1.530444,1.739592,1.901502,2.012743,2.090712,2.144413,2.187605
mse (zero),0.043714,0.030315,0.020179,0.013009,0.008008,0.004656,0.002404,0.001072,0.000282
corr,0.663269,0.650212,0.633009,0.61005,0.580752,0.544698,0.483956,0.290488,0.136265
corrs (nonzero),0.665213,0.647129,0.623078,0.594282,0.560733,0.527518,0.47866,0.332365,0.150641
auc,0.742257,0.733271,0.719888,0.705111,0.694649,0.682732,0.688773,0.639132,0.573125
