# Example 

In [1]:
import pandas as pd
import numpy as np
import cupy as cp
from sc_jnmf import sc_JNMF

np.random.seed(1)
cp.random.seed(1)

### import the data (gene expression profile quanified by different methods)

In [2]:
df1 = pd.read_csv("../test_data/Pollen_default.csv", index_col=0)
df2 = pd.read_csv("../test_data/Pollen_reprocess.csv", index_col=0)
label = [i.split('_')[0] for i in df1.columns]
df1.columns = label
df2.columns = label

In [3]:
df1.head()

Unnamed: 0,HL60,GW21+2,HL60.1,GW16,Kera,BJ,GW21+2.1,GW21,GW16.1,2339,...,GW16.2,GW16.3,Kera.1,BJ.1,iPS,Kera.2,2338,HL60.2,Kera.3,GW16.4
A1BG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.77,...,0.0,0.0,0.0,1.63,1.61,0.0,0.0,0.0,0.0,0.0
A1BG-AS1,0.0,0.0,0.0,0.0,0.0,0.0,123.61,0.0,0.0,0.0,...,0.0,0.0,0.0,13.34,0.0,0.0,3.47,0.0,0.0,0.0
A1CF,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A2LD1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.33
A2M,0.0,0.0,0.0,0.0,0.0,0.0,6.67,5.07,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
df2.head()

Unnamed: 0,HL60,GW21+2,HL60.1,GW16,Kera,BJ,GW21+2.1,GW21,GW16.1,2339,...,GW16.2,GW16.3,Kera.1,BJ.1,iPS,Kera.2,2338,HL60.2,Kera.3,GW16.4
MT-TT,65.5678,0.0,52.6264,26.5533,65.4145,0.0,0.0,206.43,92.1051,36.4242,...,270.39,0.0,266.197,0.0,255.784,0.0,13.5307,166.095,349.491,142.056
MT-TS2,0.0,0.0,64.3211,0.0,0.0,0.0,0.0,0.0,0.0,21.8545,...,0.0,0.0,177.465,98.1744,95.9191,33.2187,31.9817,0.0,522.079,0.0
MT-TH,120.208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,26.6197,75.5187,47.9595,27.6822,0.0,0.0,0.0,0.0
MT-ND4,8225.55,336.607,966.527,380.442,1655.2,2399.68,1334.09,968.274,1293.63,2400.1,...,6880.02,0.475512,2690.39,4087.5,3345.8,1961.17,4200.92,9153.46,2579.02,2332.8
MT-ND3,2440.61,80.1797,1001.29,593.292,226.187,27.8786,139.886,271.953,0.0,514.627,...,31.4221,0.0,587.483,362.313,766.554,395.091,245.592,1734.98,475.759,438.01


### pass the data to sc_JNMF class

In [5]:
sc_jnmf = sc_JNMF(df1, df2, rank=8,
                  lambda1=df1.shape[0] / df2.shape[0],
                  lambda4=10)

### performe the gene selection as preprocessing

In [6]:
sc_jnmf.gene_selection()

### transform the data to log scale

In [7]:
sc_jnmf.log_scale()

### normalize the data

In [8]:
sc_jnmf.normalize()

### performe joint matrix factorization

In [9]:
sc_jnmf.factorize()

start matrix factorization ......
finished!!


### performe cell clustering

In [10]:
sc_jnmf.clustering(cluster_num=len(np.unique(label)))

### confirme the clustering result

In [11]:
sc_jnmf.cluster

array([10,  3, 10,  4,  9,  8,  3,  3,  2,  6, 10, 10, 10,  2,  8,  8,  7,
       10,  9,  8,  3,  1,  4,  8,  3,  8,  7, 10,  9, 10, 10,  1,  6,  9,
        6,  3, 10,  3,  5, 10,  9,  9,  3,  8,  9, 10,  6,  8,  6,  5,  6,
        1,  1,  9,  4, 10,  7,  8, 10,  9,  9,  5,  4,  5,  7,  8,  6,  7,
        8,  2,  3,  9,  8,  9, 10, 10,  1,  9,  6, 10, 10,  9,  8, 10,  7,
        9,  3,  8,  9,  4,  9,  1,  1, 10,  9,  9,  3,  3,  8, 10,  7,  9,
        9,  7,  8,  6,  6, 10,  2, 10,  9,  8,  7, 10,  4, 10,  4,  9,  8,
        8,  8, 10, 10,  2,  2,  1,  9,  2, 10,  2,  9, 10,  6,  9,  1,  7,
        2, 10, 10,  5,  6,  5,  1,  8, 10,  2,  7,  7,  1,  1,  4,  5,  3,
        1,  1,  9,  3,  5,  6, 10,  1,  1,  7,  8,  6,  1,  9,  7,  7, 10,
        4,  8, 10,  7,  5, 10,  9,  3,  2,  8, 10, 10, 10,  9,  5,  2,  4,
        8,  8,  8, 10,  5,  2,  1, 10,  8, 10,  8, 10,  4,  6,  1, 10,  5,
       10, 10, 10,  9,  2,  3,  8, 10, 10,  8,  1,  2,  7,  6,  7,  8,  8,
        1,  7, 10, 10,  9

### compare the result to the true label

In [12]:
from sklearn.metrics.cluster import adjusted_rand_score

ari = adjusted_rand_score(label, sc_jnmf.cluster)
print("ARI:", ari)

ARI: 0.9434870224766149
