In [1]:
import tracemalloc
import time
import scanpy as sc
import pandas as pd
import scipy

import cupy as cp
import cupyx
import numpy as np
from tqdm import tqdm

import sys

from icecream import ic

from SEACells.core_copy import SEACells

findfont: Font family ['Raleway'] not found. Falling back to DejaVu Sans.
findfont: Font family ['Lato'] not found. Falling back to DejaVu Sans.


In [2]:
num_cells = 1000

n_SEACells = num_cells // 75
build_kernel_on = "X_pca"  # key in ad.obsm to use for computing metacells

n_waypoint_eigs = (10)

In [3]:
ad = sc.read("/home/aparna/DATA/aparnakumar/150000_cells/mouse_marioni_150k.h5ad")
ad = ad[:num_cells]

In [4]:
model2 = SEACells(
    ad,
    use_gpu=False,
    use_sparse=True,
    build_kernel_on=build_kernel_on,
    n_SEACells=n_SEACells,
    n_waypoint_eigs=n_waypoint_eigs,
    convergence_epsilon=1e-5,
)

SPARSE AND NOT GPU
TRYING SEACellsCPU
Welcome to SEACells!


In [5]:
model4 = SEACells( 
    ad,
    use_gpu=True,
    use_sparse=True,
    build_kernel_on=build_kernel_on,
    n_SEACells=n_SEACells,
    n_waypoint_eigs=n_waypoint_eigs,
    convergence_epsilon=1e-5,
)

SPARSE AND GPU
TRYING SEACellsGPU
Welcome to SEACells GPU!


In [6]:
model2.construct_kernel_matrix()

Computing kNN graph using scanpy NN ...
Computing radius for adaptive bandwidth kernel...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


Making graph symmetric...
Parameter graph_construction = union being used to build KNN graph...
Computing RBF kernel...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


Building similarity LIL matrix...


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


Constructing CSR matrix...


In [7]:
K2 = model2.K

In [8]:
kernel_matrix = model2.kernel_matrix
cupyx_kernel_matrix = cupyx.scipy.sparse.csr_matrix(kernel_matrix)

kernel_matrix_T = kernel_matrix.T 
cupyx_kernel_matrix_T = cupyx_kernel_matrix.T


In [9]:
# Check if kernel_matrix and cupyx_kernel_matrix are equal 
print(np.allclose(kernel_matrix.todense(), cupyx_kernel_matrix.get().todense()))

True


In [10]:
# Check if kernel_matrix_T and cupyx_kernel_matrix_T are equal 
print(np.allclose(kernel_matrix_T.todense(), cupyx_kernel_matrix_T.get().todense()))

True


In [11]:
K = kernel_matrix @ kernel_matrix_T 
K_cupy = cupyx_kernel_matrix @ cupyx_kernel_matrix_T

In [12]:
# Check if K and K_cupy are equal 
print(np.allclose(K.todense(), K_cupy.get().todense()))

True


In [13]:
diff = (K_cupy.get().todense() - K.todense()).flatten().tolist()[0]

# Get the frequency of the elements in diff  
freq = {}
for item in diff:
    if (item in freq):
        freq[item] += 1
    else:
        freq[item] = 1

print(freq)

{8.881784197001252e-16: 225, 0.0: 979791, 1.1102230246251565e-16: 2528, -4.440892098500626e-16: 1993, -5.551115123125783e-17: 1971, -2.220446049250313e-16: 2997, 5.551115123125783e-17: 2078, 4.440892098500626e-16: 1862, -1.1102230246251565e-16: 2511, 2.220446049250313e-16: 2953, -2.7755575615628914e-17: 340, -8.881784197001252e-16: 262, 2.7755575615628914e-17: 340, 6.661338147750939e-16: 15, 1.3877787807814457e-17: 25, -1.3322676295501878e-15: 13, -6.661338147750939e-16: 27, 6.938893903907228e-18: 11, -1.3877787807814457e-17: 14, -6.938893903907228e-18: 6, -3.469446951953614e-18: 3, 1.7763568394002505e-15: 7, 1.3322676295501878e-15: 19, -3.3306690738754696e-16: 1, 3.3306690738754696e-16: 1, -1.7763568394002505e-15: 7}


In [14]:
model4.add_precomputed_kernel_matrix(cupyx_kernel_matrix)

In [15]:
K4 = model4.K

In [16]:
# Check if K and K4 are equal 
print(np.allclose(K.todense(), K4.get().todense()))

True


In [17]:
model2.initialize()

Building kernel on X_pca
Computing diffusion components from X_pca for waypoint initialization ... 
Determing nearest neighbor graph...


100%|██████████| 14/14 [00:00<00:00, 709.23it/s]

Done.
Sampling waypoints ...
Done.
Selecting 9 cells from waypoint initialization.
Initializing residual matrix using greedy column selection
Initializing f and g...
Selecting 4 cells from greedy initialization.
Randomly initialized A matrix.





Setting convergence threshold at 0.00055


In [18]:
A0 = model2.A0
B0 = model2.B0

archetypes = model2.archetypes 

In [19]:
k = len(archetypes)
n = K4.shape[0]
cols = cp.arange(k)
rows = cp.array(archetypes)
shape = (n, k)

In [20]:
B0_4 = cupyx.scipy.sparse.csr_matrix((cp.ones(len(rows)), (rows, cols)), shape=shape)

In [21]:
# Compare B0 and B0_4 
print(np.allclose(B0.todense(), B0_4.get().todense()))

True


In [22]:
A0_4 = cupyx.scipy.sparse.csr_matrix(A0)
B0_4 = cupyx.scipy.sparse.csr_matrix(B0) 

# Compare A0 and A0_4
print(np.allclose(A0.todense(), A0_4.get().todense()))

# Compare B0 and B0_4
print(np.allclose(B0.todense(), B0_4.get().todense()))


# Compute the sparsity of A0 and A0_4 
print("Sparsity of A0: ", A0.nnz / (A0.shape[0] * A0.shape[1]))
print("Sparsity of A0_4: ", A0_4.nnz / (A0_4.shape[0] * A0_4.shape[1]))

True
True
Sparsity of A0:  0.21346153846153845
Sparsity of A0_4:  0.21346153846153845


In [23]:
new_A_4 = model4._updateA(B0_4, A0_4)

print(new_A_4)

  (0, 0)	0.36862745098039224
  (0, 1)	0.03686274509803921
  (0, 2)	0.014901960784313736
  (0, 3)	0.03764705882352941
  (0, 4)	0.01647058823529412
  (0, 5)	0.014901960784313736
  (0, 6)	0.02901960784313725
  (0, 7)	0.025882352941176467
  (0, 8)	0.0015686274509803925
  (0, 9)	0.026666666666666672
  (0, 10)	0.014901960784313736
  (0, 11)	0.026666666666666672
  (0, 12)	0.02352941176470589
  (0, 13)	0.02117647058823529
  (0, 14)	0.02352941176470589
  (0, 15)	0.02901960784313725
  (0, 16)	0.025882352941176467
  (0, 17)	0.03215686274509803
  (0, 18)	0.3443137254901962
  (0, 19)	0.014901960784313736
  (0, 20)	0.027450980392156862
  (0, 21)	0.014901960784313736
  (0, 22)	0.03372549019607842
  (0, 24)	0.03215686274509803
  (0, 25)	0.020392156862745106
  :	:
  (12, 975)	0.06431372549019608
  (12, 976)	0.4635294117647058
  (12, 977)	0.06431372549019608
  (12, 978)	0.04549019607843137
  (12, 979)	0.06509803921568627
  (12, 980)	0.03529411764705882
  (12, 981)	0.03372549019607843
  (12, 982)	0.06980

In [24]:
new_A_2 = model2._updateA(B0, A0)

print(new_A_2)

  (0, 0)	0.36862745098039224
  (1, 0)	0.07843137254901959
  (2, 0)	0.08549019607843138
  (4, 0)	0.07921568627450981
  (5, 0)	0.05254901960784313
  (6, 0)	0.054901960784313725
  (7, 0)	0.0611764705882353
  (8, 0)	0.07372549019607842
  (9, 0)	0.029803921568627458
  (10, 0)	0.03607843137254901
  (11, 0)	0.035294117647058816
  (12, 0)	0.04470588235294117
  (0, 1)	0.03686274509803921
  (1, 1)	0.018823529411764704
  (2, 1)	0.06431372549019608
  (3, 1)	0.0203921568627451
  (4, 1)	0.04313725490196079
  (5, 1)	0.03058823529411765
  (6, 1)	0.03058823529411765
  (7, 1)	0.035294117647058816
  (8, 1)	0.05411764705882353
  (9, 1)	0.563921568627451
  (10, 1)	0.04156862745098039
  (11, 1)	0.045490196078431376
  (12, 1)	0.014901960784313736
  :	:
  (1, 998)	0.05019607843137255
  (2, 998)	0.043921568627450974
  (3, 998)	0.04313725490196079
  (4, 998)	0.07137254901960785
  (5, 998)	0.0596078431372549
  (6, 998)	0.05568627450980392
  (7, 998)	0.06431372549019608
  (8, 998)	0.04313725490196079
  (9, 998)	0

In [25]:
# compare new_A_2 and new_A_4 

# print(np.allclose(new_A_2.todense(), new_A_4.get().todense()))

In [26]:
# diff = (new_A_2.todense() - new_A_4.get().todense()).flatten().tolist()[0] 

# # Get the frequency of the elements in diff
# freq = {}
# for item in diff:
#     if (item in freq):
#         freq[item] += 1
#     else:
#         freq[item] = 1

# print(freq)

In [27]:
# Set model2.A_ to new_A_2 
model2.A_ = new_A_2

# Set model4.A_ to new_A_4 
model4.A_ = new_A_4

# Compare model2.A_ and model4.A_ 
# print(np.allclose(model2.A_.todense(), model4.A_.get().todense()))

# Check the shapes of model2.A_ and model4.A_
print(model2.A_.shape)
print(model4.A_.shape)


(13, 1000)
(13, 1000)


In [28]:
# Set model2.B_ to B0 
model2.B_ = B0

# Set model4.B_ to B0_4
model4.B_ = B0_4

# Compare model2.B_ and model4.B_
print(np.allclose(model2.B_.todense(), model4.B_.get().todense()))

# Check the shapes of model2.B_ and model4.B_ 
print(model2.B_.shape)
print(model4.B_.get().shape)

True
(1000, 13)
(1000, 13)


In [29]:
RSS = model2.compute_RSS(model2.A_, model2.B_)
RSS_4 = model4.compute_RSS(model4.A_, model4.B_) 

# Compare RSS and RSS_4
print(np.allclose(RSS, RSS_4))

print(RSS)
print(RSS_4)

# Append RSS_4 to model4.RSS_iters 
model4.RSS_iters.append(RSS_4)

True
55.15852947820507
55.15852947820507


In [30]:
model2.step() 
model4.step() 

# Compare model2.A_ and model4.A_
print(np.allclose(model2.A_.todense(), model4.A_.get().todense()))

# Compare model2.B_ and model4.B_
print(np.allclose(model2.B_.todense(), model4.B_.get().todense()))

# print out RSS_iters for model2 and model4 
print(model2.RSS_iters)
print(model4.RSS_iters)

# Compare model2.RSS_iters and model4.RSS_iters
print(np.allclose(model2.RSS_iters, model4.RSS_iters))

True
True
[55.15852947820507, 53.34160510198556]
[55.15852947820507, 53.34160510198556]
True


In [31]:
# Check the convergence threshold for model2 and model4 
print(model2.convergence_threshold)
print(model4.convergence_threshold)

# Set model4.convergence_threshold to model2.convergence_threshold 
model4.convergence_threshold = model2.convergence_threshold 

# Check the convergence threshold for model2 and model4
print(model2.convergence_threshold)
print(model4.convergence_threshold)

0.0005515852947820507
None
0.0005515852947820507
0.0005515852947820507


In [32]:
model2.fit() 

model4.fit()

Randomly initialized A matrix.
Starting iteration 1.
Completed iteration 1.
Starting iteration 10.
Completed iteration 10.
Starting iteration 20.
Completed iteration 20.
Starting iteration 30.
Completed iteration 30.
Starting iteration 40.
Completed iteration 40.
Converged after 47 iterations.
Building kernel on X_pca
Computing diffusion components from X_pca for waypoint initialization ... 
Determing nearest neighbor graph...
Done.
Sampling waypoints ...
Done.
Selecting 9 cells from waypoint initialization.
Initializing residual matrix using greedy column selection
Initializing f and g...


100%|██████████| 14/14 [00:00<00:00, 75.08it/s]


Selecting 4 cells from greedy initialization.
Randomly initialized A matrix.
Completed iteration 1.
Completed iteration 10.
Completed iteration 20.
Converged after 23 iterations.


In [33]:
# Compare model2.A_ and model4.A_ 
print(np.allclose(model2.A_.todense(), model4.A_.get().todense()))

# Compare model2.B_ and model4.B_
print(np.allclose(model2.B_.todense(), model4.B_.get().todense()))

# Print out model2.RSS_iters and model4.RSS_iters 
print(model2.RSS_iters)
print(model4.RSS_iters)

False
False
[55.15852947820507, 53.34160510198556, 55.15880655288771, 53.33861168560896, 51.70316967646472, 51.423617424024755, 51.292417051740074, 51.17318367925014, 51.08942467739385, 51.02930614746798, 50.98444081506254, 50.953372467798765, 50.919866029247515, 50.888704437842364, 50.86447932257699, 50.8521844084894, 50.83332671073067, 50.81685259194617, 50.80799759612869, 50.805087205682064, 50.79621723634045, 50.78826895267555, 50.78532104341745, 50.78382911694094, 50.78218190404367, 50.7792477491674, 50.778371627869376, 50.772092340790074, 50.7700736912371, 50.76475818787328, 50.76326769691456, 50.75927542831291, 50.753273696088584, 50.75106849805012, 50.74626665923086, 50.74813163372398, 50.74018916744118, 50.734715383867226, 50.72694086648114, 50.72093869293146, 50.710355557288395, 50.70946326195266, 50.71230540185919, 50.71125874364981, 50.70809503188179, 50.71323575076684, 50.7045145806058, 50.70958250601241, 50.70789023773558, 50.70828821283085]
[55.15852947820507, 53.3416051

### Update A

In [34]:
def updateA_compare(B, A_prev):
    n, k = B.shape
    A = A_prev

    Ag = cupyx.scipy.sparse.csc_matrix(A) 
    Bg = cupyx.scipy.sparse.csr_matrix(B)

    K = model2.K
    Kg = model4.K

    t = 0  # current iteration (determine multiplicative update)

    # precompute some gradient terms
    t2 = (K @ B).T
    t1 = t2 @ B

    # precompute the gradient terms for cupy 
    t2g = (Kg.dot(Bg)).T
    t1g = t2g.dot(Bg)

    # Check if t1 and t1g are equal 
    print(np.allclose(t1.todense(), t1g.get().todense())) 

    # Check if t2 and t2g are equal 
    print(np.allclose(t2.todense(), t2g.get().todense()))

    # Create a dataframe that will store the iteration t, is_close_Gg, is_close_amins, is_close_eg, is_close_Ag
    df = pd.DataFrame(columns=["t", "is_close_Gg", "is_close_amins", "is_close_eg", "is_close_Ag", "e", "eg", "G", "Gg"])

    # update rows of A for given number of iterations
    while t < 50:
        # compute gradient (must convert matrix to ndarray)
        G = 2.0 * np.array(t1 @ A - t2)
        # get argmins - shape 1 x n
        amins = np.argmin(G, axis=0)
        amins = np.array(amins).reshape(-1)
        # # loop free implementation
        e = scipy.sparse.csr_matrix((np.ones(len(amins)), (amins, np.arange(n))), shape=A.shape)
        A += 2.0 / (t + 2.0) * (e - A)

        # compute gradient
        Gg = 2.0 * (t1g.dot(Ag) - t2g)

        is_close_Gg = np.allclose(Gg.get().todense(), G.todense())

        aminsG = cp.argmin(Gg.todense(), axis=0)

        is_close_amins = np.allclose(amins, aminsG) 

        # loop free implementaton
        eg = cupyx.scipy.sparse.csr_matrix((cp.ones(len(aminsG)), (aminsG, cp.arange(n))), shape=Ag.shape)
        # eg = cp.zeros((k, n))
        # eg[amins, cp.arange(n)] = 1.0
        # eg = cupyx.scipy.sparse.csr_matrix(eg)

        is_close_eg = np.allclose(eg.get().todense(), e.todense())

        Ag += 2.0/(t+2.0) * (eg - Ag)

        is_close_Ag = np.allclose(Ag.get().todense(), A.todense())

        # Add the iteration t and the is_close_Gg, is_close_amins, is_close_eg, is_close_Ag to the dataframe using df.concat
        df = pd.concat([df, pd.DataFrame([[t, is_close_Gg, is_close_amins, is_close_eg, is_close_Ag, e, eg, G, Gg]], columns=df.columns)], ignore_index=True)

        # # print the iteration t and the is_close_Gg, is_close_amins, is_close_eg, is_close_Ag 
        # print(t, is_close_Gg, is_close_amins, is_close_eg, is_close_Ag)

        t += 1

    return df

In [35]:
A = model2.A0 
B = model2.B0 

df = updateA_compare(B, A)

True
True


  df = pd.concat([df, pd.DataFrame([[t, is_close_Gg, is_close_amins, is_close_eg, is_close_Ag, e, eg, G, Gg]], columns=df.columns)], ignore_index=True)


In [36]:
df

Unnamed: 0,t,is_close_Gg,is_close_amins,is_close_eg,is_close_Ag,e,eg,G,Gg
0,0,True,True,True,True,"(0, 0)\t1.0\n (0, 2)\t1.0\n (0, 18)\t1.0\n...","(0, 0)\t1.0\n (0, 2)\t1.0\n (0, 18)\t1.0\n...","(0, 0)\t-5.664302954085622\n (1, 0)\t1.5075...","(0, 0)\t-5.664302954085623\n (0, 3)\t7.0230..."
1,1,True,True,True,True,"(0, 1)\t1.0\n (0, 3)\t1.0\n (0, 5)\t1.0\n ...","(0, 1)\t1.0\n (0, 3)\t1.0\n (0, 5)\t1.0\n ...","(0, 0)\t11.996006957347323\n (1, 0)\t1.6710...","(0, 0)\t11.996006957347323\n (0, 2)\t18.881..."
2,2,True,True,True,True,"(0, 0)\t1.0\n (0, 39)\t1.0\n (0, 50)\t1.0\...","(0, 0)\t1.0\n (0, 39)\t1.0\n (0, 50)\t1.0\...","(0, 0)\t-0.5917221360067799\n (1, 0)\t-0.24...","(0, 0)\t-0.5917221360067808\n (0, 1)\t12.58..."
3,3,True,True,True,True,"(0, 18)\t1.0\n (0, 27)\t1.0\n (0, 29)\t1.0...","(0, 18)\t1.0\n (0, 27)\t1.0\n (0, 29)\t1.0...","(0, 0)\t5.702142410670272\n (1, 0)\t0.71075...","(0, 0)\t5.702142410670271\n (0, 1)\t6.29386..."
4,4,True,True,True,True,"(0, 39)\t1.0\n (0, 92)\t1.0\n (0, 106)\t1....","(0, 39)\t1.0\n (0, 92)\t1.0\n (0, 106)\t1....","(0, 0)\t0.6670507733286302\n (1, 0)\t-0.057...","(0, 0)\t0.6670507733286293\n (0, 1)\t3.7763..."
5,5,True,True,True,True,"(0, 0)\t1.0\n (0, 56)\t1.0\n (0, 63)\t1.0\...","(0, 0)\t1.0\n (0, 56)\t1.0\n (0, 63)\t1.0\...","(0, 0)\t-0.8901920331362501\n (1, 0)\t1.695...","(0, 0)\t-0.890192033136251\n (0, 1)\t2.5175..."
6,6,True,True,True,True,"(0, 18)\t1.0\n (0, 27)\t1.0\n (0, 53)\t1.0...","(0, 18)\t1.0\n (0, 27)\t1.0\n (0, 53)\t1.0...","(0, 0)\t2.7915791070019127\n (1, 0)\t1.6882...","(0, 0)\t2.791579107001912\n (0, 1)\t1.79824..."
7,7,True,True,True,True,"(0, 39)\t1.0\n (0, 105)\t1.0\n (0, 121)\t1...","(0, 39)\t1.0\n (0, 105)\t1.0\n (0, 121)\t1...","(0, 0)\t0.3722876595804774\n (1, 0)\t0.9636...","(0, 0)\t0.3722876595804765\n (0, 1)\t1.3486..."
8,8,True,True,True,True,"(0, 0)\t1.0\n (0, 27)\t1.0\n (0, 92)\t1.0\...","(0, 0)\t1.0\n (0, 27)\t1.0\n (0, 92)\t1.0\...","(0, 0)\t-1.240573305367147\n (1, 0)\t0.4806...","(0, 0)\t-1.240573305367148\n (0, 1)\t1.0489..."
9,9,True,True,True,True,"(0, 18)\t1.0\n (0, 28)\t1.0\n (0, 39)\t1.0...","(0, 18)\t1.0\n (0, 28)\t1.0\n (0, 39)\t1.0...","(0, 0)\t1.4067427471757474\n (1, 0)\t0.7187...","(0, 0)\t1.4067427471757465\n (0, 1)\t0.8391..."


In [37]:
# Get e and eg from row 1 of df 
e = df.iloc[1]["e"] 
eg = df.iloc[1]["eg"] 

# Compare e and eg 
print(np.allclose(e.todense(), eg.get().todense()))


True


In [38]:
# Compute the sparsity of e and eg 

print("Sparsity of e: ", 1 - (np.count_nonzero(e.todense()) / (e.shape[0] * e.shape[1]))) 
print("Sparsity of eg: ", 1 - (np.count_nonzero(eg.get().todense()) / (eg.shape[0] * eg.shape[1]))) 



Sparsity of e:  0.9230769230769231
Sparsity of eg:  0.9230769230769231


In [39]:
diff = (e.todense() - eg.get().todense()).flatten().tolist()[0]

# Get the frequency of the elements in diff 
freq = {} 
for item in diff:
    if (item in freq):
        freq[item] += 1 
    else:
        freq[item] = 1

print(freq)


{0.0: 13000}


In [40]:
# Get G and Gg from row 1 of df 
G = df.iloc[1]["G"]
Gg = df.iloc[1]["Gg"]

# Compare G and Gg
print(np.allclose(G.todense(), Gg.get().todense()))

True


In [41]:
# compute the sparsity of G and Gg 
print("Sparsity of G: ", 1 - (np.count_nonzero(G.todense()) / (G.shape[0] * G.shape[1])))
print("Sparsity of Gg: ", 1 - (np.count_nonzero(Gg.get().todense()) / (Gg.shape[0] * Gg.shape[1])))

Sparsity of G:  0.8004615384615384
Sparsity of Gg:  0.8004615384615384


In [42]:
diff = (G.todense() - Gg.get().todense()).flatten().tolist()[0]

# Get the frequency of the elements in diff
freq = {}
for item in diff:
    if (item in freq):
        freq[item] += 1
    else:
        freq[item] = 1

print(freq)

{0.0: 11888, -4.440892098500626e-16: 70, -1.7763568394002505e-15: 273, 1.7763568394002505e-15: 54, 3.552713678800501e-15: 11, 2.220446049250313e-16: 186, 4.440892098500626e-16: 17, -5.551115123125783e-17: 3, -8.881784197001252e-16: 55, -6.661338147750939e-16: 1, -2.220446049250313e-16: 209, -3.552713678800501e-15: 45, 8.881784197001252e-16: 80, -1.3322676295501878e-15: 6, -1.1102230246251565e-16: 11, 1.1102230246251565e-16: 49, 5.551115123125783e-17: 1, 3.3306690738754696e-16: 2, -2.6645352591003757e-15: 19, -3.3306690738754696e-16: 2, 6.661338147750939e-16: 1, -2.220446049250313e-15: 2, 2.6645352591003757e-15: 3, -4.440892098500626e-15: 7, -5.329070518200751e-15: 4, 1.3322676295501878e-15: 1}


In [43]:
# aminsG = Gg.argmin(axis=0).ravel() 

# # Calculate aminsG so that we get the lowest possible index for the lowest value in each column of Gg 
# # aminsG = np.argmin(Gg.get().todense(), axis=0).ravel()


# # Compute all the indices of the lowest value in each column of Gg
# minsG = Gg.min(axis=0)
# print(minsG)


aminsG = cp.argmin(Gg.todense(), axis=0)
print(aminsG.shape)

(1000,)


In [44]:
amins = np.argmin(Gg.get(), axis=0)
amins = np.array(amins).reshape(-1)

print(amins)

[ 2  0  2  0  4  0  0  2 11  0  0  0  5  0  1  0  0  0  3  0  0  2  0  1
  0 12  0  0  2  5 12  2  0  0  0  2  0  0  0  1  0  0  0  0  0  0  0  0
  6  0  6  0  0  2  7  2  0  7  0  8  0  7  2  1  5  0  0  8  0  0  1  0
  0  1  2  2  2  0  0  2  0 12  5  5  0  0  0  8  2  0  2  0  2  0  7  2
  2  0  0  5  0  2  2  0  0  2  2  0  0  2  9  0  0  0  0  0  0  2  0  0
  2  0  5  0  2  1  0  2  0  0  8  0  0  2  3  3  0  0  2  1  0  0  0  0
  0  0  0  0  0  0  2 11  2  0  1  0  2  0  5  0  9  3  2  1  0  6  0  0
  0  0  2  0  3  2  0  0  0  0  5  9  0  0  0  5  2  2  8  0  0  0  0 10
  2  8  8  2  0  0  0  0  0  0  0  0  3  6  0  9  0  0  0  2  0 11  0  0
  0  0  0  2 10  0  0  0  2  6  0  0  8  4  4  0  0  0  0  0  0  2  2  0
  2  0  0  2  0  0  0  0  0  0  0  2  0  2  0  0  5  0  0  0  7  2  2  0
  0  2  0  0  0  0  4  8  0  8  0  2  0  0  0  0  1  8  0  9  0  0  2  2
  2  0  0  5 10  0  7  8  0  0  0  0  0  0  8  0  0  2  2 12  0  2  0  5
  0  5  0  0  0  2  1  2  0  0  2  0  2  9 11  0  0

In [45]:
# Compare amins and aminsG 
print(np.allclose(amins, aminsG))

True


In [46]:
Gg.get().todense()[:, 0]

matrix([[11.99600696],
        [ 1.67105834],
        [ 0.        ],
        [ 1.76573645],
        [ 0.10323059],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ]])

In [47]:
# Find the row numbers of every row in aminsG that is not equal to the corresponding row in amins
row_nums = np.where(amins != aminsG.get())[0]
# print(row_nums)

# # Let's make a dataframe that will store the row numbers and the corresponding columns in Gg and G that are not equal. In addition, let's store the amins and aminsG for each row number. 
# df = pd.DataFrame(columns=["row_num", "aminsG", "amins", "Gg"]) 

# for row_num in row_nums:
#     df = pd.concat([df, pd.DataFrame([[row_num,
#                                         aminsG.get()[row_num], 
#                                         amins[row_num], 
#                                         Gg.get().todense()[:, row_num]]], columns=df.columns)], ignore_index=True)

# display(df)

In [48]:
# # Print out the values in the second row of df 
# index = 5
# print("aminsG: ", df.iloc[index]["aminsG"])
# print("amins: ", df.iloc[index]["amins"])
# print("Gg: ", df.iloc[index]["Gg"])

In [49]:
# Where is the behavior of amins and aminsG the same?
# row_nums_same = np.where(amins == aminsG.get())[0] 

# # Make a dataframe that will store the row numbers and the corresponding columns in Gg and G that are equal. In addition, let's store the amins and aminsG for each row number. 
# df_same = pd.DataFrame(columns=["row_num", "aminsG", "amins", "Gg"])

# for row_num in row_nums_same:
#     df_same = pd.concat([df_same, pd.DataFrame([[row_num,
#                                         aminsG.get()[row_num],
#                                         amins[row_num],
#                                         Gg.get().todense()[:, row_num]]], columns=df_same.columns)], ignore_index=True)
    
# display(df_same)

### Fit

In [50]:
# Set model 4 A_ to model 2 A_ 

model4.A_ = cupyx.scipy.sparse.csr_matrix(model2.A_) 

# Set model 4 B_ to model 2 B_ 
model4.B_ = cupyx.scipy.sparse.csr_matrix(model2.B_)

In [51]:
def fit_compare(
        model2, 
        model4, 
        max_iter: int = 50,
        min_iter: int = 10,
        initial_archetypes=None,
        initial_assignments=None,
    ):
        model2.initialize(
            initial_archetypes=initial_archetypes,
            initial_assignments=initial_assignments,
        )

        model4.initialize(
            initial_archetypes=initial_archetypes,
            initial_assignments=initial_assignments,
        ) 

        # Set the model4.A_ to model2.A_ 
        model4.A_ = cupyx.scipy.sparse.csc_matrix(model2.A_) 

        # Set the model4.B_ to model2.B_ 
        model4.B_ = cupyx.scipy.sparse.csr_matrix(model2.B_)

        # Check if model2.A_ and model4.A_ are equal
        print(np.allclose(model2.A_.todense(), model4.A_.get().todense()))

        # Check if model2.B_ and model4.B_ are equal
        print(np.allclose(model2.B_.todense(), model4.B_.get().todense()))

        # Check if model2.K and model4.K are equal 
        print(np.allclose(model2.K.todense(), model4.K.get().todense()))

        # Create a dataframe that will store the iteration n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS
        df = pd.DataFrame(columns=["n_iter", "is_close_A", "is_close_B", "is_close_K", "is_close_RSS", "sparsity_2", "sparsity_4"])

        converged = False
        n_iter = 0

        converged1 = False 
        converged2 = False 

        
        while (not converged and n_iter < max_iter) or n_iter < min_iter:
            n_iter += 1
            if n_iter == 1 or (n_iter) % 10 == 0:
                print(f"Starting iteration {n_iter}.")
            model2.step()
            model4.step() 

            is_close_A = np.allclose(model2.A_.todense(), model4.A_.get().todense()) 
            is_close_B = np.allclose(model2.B_.todense(), model4.B_.get().todense()) 
            is_close_K = np.allclose(model2.K.todense(), model4.K.get().todense()) 
            is_close_RSS = np.allclose(model2.RSS_iters[-1], model4.RSS_iters[-1]) 

            # Compute the sparsity of model2.A_ and model4.A_
            sparsity_2 = model2.A_.count_nonzero()
            sparsity_4 = model4.A_.count_nonzero()

            diff = (model2.A_.todense() - model4.A_.get().todense()).flatten().tolist()[0] 

            # Get the frequency of the elements in diff 
            freq = {}
            for item in diff:
                if (item in freq):
                    freq[item] += 1 
                else:
                    freq[item] = 1

            print(freq)

            # Add the iteration n_iter and the is_close_A, is_close_B, is_close_K, is_close_RSS to the dataframe using df.concat
            df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)

            if n_iter == 1 or (n_iter) % 10 == 0:
                print(f"Completed iteration {n_iter}.")

            # Check for convergence
            if (
                cp.abs(model2.RSS_iters[-2] - model2.RSS_iters[-1])
                < model2.convergence_threshold
            ):
                print(f"Converged after {n_iter} iterations.")
                converged1 = True

            if (cp.abs(model4.RSS_iters[-2] - model4.RSS_iters[-1]) < model4.convergence_threshold):
                print(f"Converged after {n_iter} iterations.")
                converged2 = True

            if converged1 and converged2: 
                converged = True

        # Print out the types of model2.A_ and model4.A_ 
        print(type(model2.A_))
        print(type(model4.A_)) 

        # Print out the shapes of model2.A_ and model4.A_ 
        print(model2.A_.shape) 
        print(model4.A_.shape)

        model2.Z_ = model2.B_.T @ model2.K

        # Label cells by SEACells assignment
        labels = model2.get_hard_assignments()
        model2.ad.obs["SEACell"] = labels["SEACell"]

        model4.Z_ = model4.B_.T @ model4.K

        # Label cells by SEACells assignment
        labels = model4.get_hard_assignments()
        model4.ad.obs["SEACell"] = labels["SEACell"]

        if not converged:
            raise RuntimeWarning(
                "Warning: Algorithm has not converged - you may need to increase the maximum number of iterations"
            )
        return df

In [52]:
fit_compare(model2, model4)

Randomly initialized A matrix.
Randomly initialized A matrix.
True
True
True
Starting iteration 1.
{0.0: 13000}
Completed iteration 1.
{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}
Starting iteration 10.


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}
Completed iteration 10.


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}
Starting iteration 20.


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}
Completed iteration 20.


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}
Starting iteration 30.


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}
Completed iteration 30.


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


{0.0: 13000}
Converged after 32 iterations.
Converged after 32 iterations.
<class 'scipy.sparse.csc.csc_matrix'>
<class 'cupyx.scipy.sparse._csc.csc_matrix'>
(13, 1000)
(13, 1000)


  df = pd.concat([df, pd.DataFrame([[n_iter, is_close_A, is_close_B, is_close_K, is_close_RSS, sparsity_2, sparsity_4]], columns=df.columns)], ignore_index=True)


Unnamed: 0,n_iter,is_close_A,is_close_B,is_close_K,is_close_RSS,sparsity_2,sparsity_4
0,1,True,True,True,True,12655,12655
1,2,True,True,True,True,10104,10104
2,3,True,True,True,True,9461,9461
3,4,True,True,True,True,8972,8972
4,5,True,True,True,True,8789,8789
5,6,True,True,True,True,8585,8585
6,7,True,True,True,True,8587,8587
7,8,True,True,True,True,8581,8581
8,9,True,True,True,True,8723,8723
9,10,True,True,True,True,8744,8744


In [53]:
A = model4.A_ 

In [54]:
# Compute the sparsity of A 
print("Sparsity of A: ", A.getnnz() / (A.shape[0] * A.shape[1])) 

# Compute the sparsity of A using count_nonzero 
print("Sparsity of A: ", A.count_nonzero() / (A.shape[0] * A.shape[1]))

Sparsity of A:  0.6673076923076923
Sparsity of A:  0.6673076923076923


In [55]:
A_new = A.eliminate_zeros()

print(A)

  (0, 0)	0.9050980392156863
  (6, 0)	0.004705882352941178
  (8, 0)	0.01098039215686275
  (9, 0)	0.018039215686274503
  (10, 0)	0.0007843137254901963
  (11, 0)	0.025882352941176467
  (12, 0)	0.03450980392156863
  (0, 1)	0.0015686274509803925
  (2, 1)	0.00627450980392157
  (8, 1)	0.01254901960784314
  (9, 1)	0.8972549019607843
  (10, 1)	0.027450980392156862
  (11, 1)	0.019607843137254905
  (12, 1)	0.03529411764705883
  (0, 2)	0.027450980392156862
  (1, 2)	0.04941176470588236
  (2, 2)	0.05333333333333333
  (3, 2)	0.05333333333333333
  (4, 2)	0.10352941176470587
  (5, 2)	0.02509803921568628
  (6, 2)	0.010196078431372548
  (7, 2)	0.29647058823529415
  (8, 2)	0.25960784313725493
  (9, 2)	0.03215686274509804
  (10, 2)	0.03529411764705882
  :	:
  (12, 996)	0.017254901960784316
  (0, 997)	0.05803921568627451
  (1, 997)	0.04862745098039216
  (2, 997)	0.0776470588235294
  (3, 997)	0.039215686274509796
  (4, 997)	0.0611764705882353
  (5, 997)	0.04784313725490197
  (6, 997)	0.02588235294117646
  (7

In [56]:
# Calculate the sparsity again 

print("Sparsity of A:", A.getnnz() / (A.shape[0] * A.shape[1]))
print("Sparsity of A:", A.count_nonzero() / (A.shape[0] * A.shape[1]))

Sparsity of A: 0.6673076923076923
Sparsity of A: 0.6673076923076923
