In [12]:
import numpy as np
import pandas as pd
import scanpy as sc
import random

from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score, normalized_mutual_info_score
from sklearn.cluster import KMeans, SpectralClustering
from sklearn.cluster import SpectralClustering

from sklearn.decomposition import PCA, SparsePCA, KernelPCA
from sklearn.manifold import TSNE

from rpy2.robjects import r, pandas2ri
from rpy2.robjects.vectors import StrVector

pandas2ri.activate()

# import magic
import scprep

%matplotlib inline

# from sklearnex import patch_sklearn
# patch_sklearn()

import warnings

from sklearn.cluster import KMeans
from tqdm import tqdm
from sklearn.metrics import mean_squared_error as mse

import os 

os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

from dca.api import dca

In [6]:
def get_data_for_i(i):
    original_ = pd.read_csv('../../data/1k_cell//data.csv', index_col=0)
    df_ = pd.read_csv('../../data/1k_cell/drp_{}0.csv'.format(i), index_col=0)
    df_.index = [int(i) for i in df_.index]
    df_.columns = [int(i) for i in df_.columns]

    original_.columns = df_.columns
    original_.index = df_.index

    n = original_.size
    original_val = original_.values.copy()
    t = list(np.ndindex(original_.shape))
    random.Random(42).shuffle(t)

    mask = t[:int(len(t)/10 * i)]

    thr = np.sum(np.sign(df_)) > 0
    original_ = original_.loc[:, list(thr)]
    df_ = df_.loc[:, list(thr)]

    # original = original_.values
    original = np.log(original_+1)

    # df = df_.values
    df = np.log(df_+1)

    tmp = pd.DataFrame(thr)
    remove = [int(i) for i in tmp[tmp[0] == False].index]
    mask = [i for i in mask if i[1] not in remove]
    
    return df_, mask, original

In [7]:
def get_cos_sim(vector1, vector2):
    dot_product = np.dot(vector1, vector2)

    # ベクトルの大きさ（ノルム）を計算します
    norm_vector1 = np.linalg.norm(vector1)
    norm_vector2 = np.linalg.norm(vector2)

    # Cosine Similarityを計算します
    cosine_similarity = dot_product / (norm_vector1 * norm_vector2)
    
    return cosine_similarity

In [8]:
def get_l1norm(vector1, vector2):
    l1_distance = np.sum(np.abs(vector1 - vector2))
    return l1_distance

In [None]:
mses = {}
corrs = {}
coss = {}
l1 = {}

for i in (range(9)):
    print(i)
    df_, mask, original = get_data_for_i(i+1)

    adata = sc.AnnData(df_)
    dca(adata, verbose=False)
    
    pred = np.log(adata.X+1)
    
    pred = pd.DataFrame(pred, index=df_.index, columns=df_.columns)
    origin = np.array([original.loc[i] for i in mask])
    predict = np.array([pred.loc[i] for i in mask])

    mses[i] = mse(origin, predict)
    corrs[i] = np.corrcoef(origin, predict)[0][1]
    coss[i] = get_cos_sim(origin, predict) 
    l1[i] = get_l1norm(origin, predict)

0




dca: Successfully preprocessed 12345 genes and 1000 cells.




dca: Calculating reconstructions...
1




dca: Successfully preprocessed 12333 genes and 1000 cells.




dca: Calculating reconstructions...
2




dca: Successfully preprocessed 12308 genes and 1000 cells.




dca: Calculating reconstructions...
3




dca: Successfully preprocessed 12279 genes and 1000 cells.




dca: Calculating reconstructions...
4




dca: Successfully preprocessed 12237 genes and 1000 cells.




dca: Calculating reconstructions...
5




dca: Successfully preprocessed 12182 genes and 1000 cells.




dca: Calculating reconstructions...
6




dca: Successfully preprocessed 12122 genes and 1000 cells.




dca: Calculating reconstructions...
7




dca: Successfully preprocessed 12029 genes and 1000 cells.




dca: Calculating reconstructions...
8




dca: Successfully preprocessed 11882 genes and 1000 cells.




dca: Calculating reconstructions...


In [None]:
pd.DataFrame(mses.values()).T

In [None]:
pd.DataFrame(corrs.values()).T

In [None]:
pd.DataFrame(coss.values()).T

In [None]:
pd.DataFrame(l1.values()).T