In [6]:
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import cna
import multianndata as mad

# For Aparna's Import
import scipy.io

# For reproducibility
np.random.seed(0) 

In [7]:
def run_cna(variable, cna_obj, covs = None):
    if covs is not None:
        cna_res = cna.tl._association.association(cna_obj, #dataset 
                                                  cna_obj.samplem[variable], #phenotype
            #                                       batches=d.samplem.processing_batch, #batches
                                                  covs = cna_obj.samplem[covs],
                                                  Nnull=10000, # number of null permutations to do (defaults to only 1e3)
                                                  ks=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20] # I asked the method to consider up to 10 PCs because
                                                                            #it chose the max number of PCs it considered the default set of [1,2,3,4]
                                                 )
    else:
        cna_res = cna.tl._association.association(cna_obj, #dataset 
                                                  cna_obj.samplem[variable], #phenotype
            #                                       batches=d.samplem.processing_batch, #batches
                                                 # covs = d.samplem[covs]
                                                  Nnull=10000, # number of null permutations to do (defaults to only 1e3)
                                                  ks=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20] # I asked the method to consider up to 10 PCs because
                                                                            #it chose the max number of PCs it considered the default set of [1,2,3,4]
                                                 )
        
    return cna_res
def cna_test(meta_df, harmony_df, umap_df, variable, covars = None):
    print(meta_df.shape)
    print(harmony_df.shape)
    print(umap_df.shape)
    cna_object = mad.MultiAnnData(X=harmony_df, obs=meta_df, sampleid="sample")
    if covars is not None:   
        cna_object.obs_to_sample(covars + [variable])
        umap_df.index = cna_object.obs.index
        cna_object.obsm['X_umap'] = umap_df
        cna.pp.knn(cna_object)
        np.random.seed(0) 
        final_res = run_cna(variable, cna_object, covs = covars)
    else:
        cna_object.obs_to_sample([variable])
        umap_df.index = cna_object.obs.index
        cna_object.obsm['X_umap'] = umap_df
        cna.pp.knn(cna_object)
        np.random.seed(0) 
        final_res = run_cna(variable, cna_object, covs = None)
    print(final_res.k)
    print(final_res.ks)
    print('p =', final_res.p, ',', final_res.k, 'PCs used')
    print('total r^2 between top {} NAM PCs and outcome is {:.2f}'.format(final_res.k, final_res.r2))
    return final_res

In [109]:
univariate_res = pd.DataFrame(columns = ['Variable', 'Technology', 'global_pvalue'])

# CASE/CONTROL

## Single Cell

In [37]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sc_meta.csv')
print(meta.shape)

(79025, 41)


In [39]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sc_harmony.csv')
print(harmony.shape)

(79025, 20)


In [40]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sc_umap.csv')
print(umap.shape)

(79025, 2)


In [41]:
res = cna_test(meta, harmony, umap, 'Case.Control')

(79025, 41)
(79025, 20)
(79025, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Final_ISN' 'Type'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 51.397159576416016
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 33.884281158447266
	20th percentile R2(t,t-1): 0.7785238027572632
	taking step 3
	median kurtosis: 22.908390045166016
	20th percentile R2(t,t-1): 0.9216914176940918
	taking step 4
	median kurtosis: 17.108659744262695
	20th percentile R2(t,t-1): 0.9576030373573303
	taking step 5
	median kurtosis: 13.87435531616211
	20th percentile R2(t,t-1): 0.9738996624946594
	taking step 6
	median kurtosis: 11.900806427001953
	20th percentile R2(t,t-1): 0.9830936789512634
stopping after 6 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test



KeyboardInterrupt



In [10]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sc_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sc_fdrs.csv", 
               res.fdrs, delimiter=",")

## Single Nuclear

In [14]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sn_meta.csv')
print(meta.shape)

(20975, 35)


In [15]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sn_harmony.csv')
print(harmony.shape)

(20975, 20)


In [16]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sn_umap.csv')
print(umap.shape)

(20975, 2)


In [17]:
res = cna_test(meta, harmony, umap, 'Case.Control')

(20975, 35)
(20975, 20)
(20975, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Final_ISN' 'Type'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 13.905879020690918
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 9.825618743896484
	20th percentile R2(t,t-1): 0.7822014451026916
	taking step 3
	median kurtosis: 7.507389545440674
	20th percentile R2(t,t-1): 0.9253754258155823
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD




performing association test




computing neighborhood-level FDRs
16
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 9.999000099990002e-05 , 16 PCs used
total r^2 between top 16 NAM PCs and outcome is 0.81


In [18]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sn_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/case_control/sn_fdrs.csv", 
               res.fdrs, delimiter=",")

# AGE

## Single Cell

In [110]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sc_meta.csv')
print(meta.shape)

(71120, 39)


In [111]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sc_harmony.csv')
print(harmony.shape)

(71120, 20)


In [112]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sc_umap.csv')
print(umap.shape)

(71120, 2)


In [113]:
res = cna_test(meta, harmony, umap, 'Age')

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
8
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.0010998900109989002 , 8 PCs used
total r^2 between top 8 NAM PCs and outcome is 0.19


In [114]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sc_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sc_fdrs.csv", 
               res.fdrs, delimiter=",")

In [115]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Age', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

## Single Nuclear

In [116]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sn_meta.csv')
print(meta.shape)

(18660, 33)


In [117]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sn_harmony.csv')
print(harmony.shape)

(18660, 20)


In [118]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sn_umap.csv')
print(umap.shape)

(18660, 2)


In [119]:
res = cna_test(meta, harmony, umap, 'Age')

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
13
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.2953704629537046 , 13 PCs used
total r^2 between top 13 NAM PCs and outcome is 0.49


In [120]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sn_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/age/sn_fdrs.csv", 
               res.fdrs, delimiter=",")

In [121]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Age', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

# RESPONSE

## Single Cell

In [122]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sc_meta.csv')
print(meta.shape)

(57635, 36)


In [123]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sc_harmony.csv')
print(harmony.shape)

(57635, 20)


In [124]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sc_umap.csv')
print(umap.shape)

(57635, 2)


In [125]:
res = cna_test(meta, harmony, umap, 'Responder_Status')

(57635, 36)
(57635, 20)
(57635, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 34.72321701049805
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 23.200876235961914
	20th percentile R2(t,t-1): 0.7833223342895508
	taking step 3
	median kurtosis: 16.03927230834961
	20th percentile R2(t,t-1): 0.9246415495872498
	taking step 4
	median kurtosis: 12.493077278137207
	20th percentile R2(t,t-1): 0.960143506526947
	taking step 5
	median kurtosis: 10.576120376586914
	20th percentile R2(t,t-1): 0.9760511517524719
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
4
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.05879412058794121 , 4 PCs used
total r^2 between top 4 NAM PCs and outcome is 0.10


In [126]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sc_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sc_fdrs.csv", 
               res.fdrs, delimiter=",")

In [127]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Responder Status', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

## Single Nuclear 

In [128]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sn_meta.csv')
print(meta.shape)

(18191, 32)


In [129]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sn_harmony.csv')
print(harmony.shape)

(18191, 20)


In [130]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sn_umap.csv')
print(umap.shape)

(18191, 2)


In [131]:
res = cna_test(meta, harmony, umap, 'Responder_Status')

(18191, 32)
(18191, 20)
(18191, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 10.796865549205963
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 7.998422585490272
	20th percentile R2(t,t-1): 0.7938765406608581
	taking step 3
	median kurtosis: 6.50522350648204
	20th percentile R2(t,t-1): 0.9306313276290894
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
15
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.5541445855414459 , 15 PCs used
total r^2 between top 15 NAM PCs and outcome is 0.46


In [132]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sn_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/response/sn_fdrs.csv", 
               res.fdrs, delimiter=",")

In [133]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Responder Status', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

# RACE

## Single Cell

In [134]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/race/sc_meta.csv')
print(meta.shape)

(71120, 39)


In [135]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/race/sc_harmony.csv')
print(harmony.shape)

(71120, 20)


In [136]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/race/sc_umap.csv')
print(umap.shape)

(71120, 2)


In [137]:
res = cna_test(meta, harmony, umap, 'Race_[A]')

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
2
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.5637436256374363 , 2 PCs used
total r^2 between top 2 NAM PCs and outcome is 0.02


In [138]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[A]', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

In [139]:
res = cna_test(meta, harmony, umap, 'Race_[A][B]')

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
16
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.390960903909609 , 16 PCs used
total r^2 between top 16 NAM PCs and outcome is 0.13


In [140]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[A][B]', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

In [141]:
res = cna_test(meta, harmony, umap, 'Race_[B]')

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
3
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.6483351664833517 , 3 PCs used
total r^2 between top 3 NAM PCs and outcome is 0.03


In [142]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[B]', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

In [143]:
res = cna_test(meta, harmony, umap, 'Race_[B][AI]')

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
3
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.618938106189381 , 3 PCs used
total r^2 between top 3 NAM PCs and outcome is 0.02


In [144]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[B][AI]', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

In [145]:
res = cna_test(meta, harmony, umap, 'Race_[B][W]')

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
1
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.5257474252574742 , 1 PCs used
total r^2 between top 1 NAM PCs and outcome is 0.01


In [146]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[B][W]', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

In [147]:
res = cna_test(meta, harmony, umap, 'Race_[U]')

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test




computing neighborhood-level FDRs
20
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.05779422057794221 , 20 PCs used
total r^2 between top 20 NAM PCs and outcome is 0.23


In [148]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[U]', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

In [149]:
res = cna_test(meta, harmony, umap, 'Race_[W]')

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
3
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.056394360563943605 , 3 PCs used
total r^2 between top 3 NAM PCs and outcome is 0.07


In [150]:
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[W]', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

## Single Nuclear

In [152]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/race/sn_meta.csv')
print(meta.shape)

(18660, 33)


In [153]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/race/sn_harmony.csv')
print(harmony.shape)

(18660, 20)


In [154]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/race/sn_umap.csv')
print(umap.shape)

(18660, 2)


In [156]:
res = cna_test(meta, harmony, umap, 'Race_[A]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[A]', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
3
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.6703329667033296 , 3 PCs used
total r^2 between top 3 NAM PCs and outcome is 0.16


In [160]:
res = cna_test(meta, harmony, umap, 'Race_[B]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[B]', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
16
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.11728827117288271 , 16 PCs used
total r^2 between top 16 NAM PCs and outcome is 0.59


In [161]:
res = cna_test(meta, harmony, umap, 'Race_[U]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[U]', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
15
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.5904409559044096 , 15 PCs used
total r^2 between top 15 NAM PCs and outcome is 0.44


In [162]:
res = cna_test(meta, harmony, umap, 'Race_[W]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Race_[W]', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
3
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.0024997500249975004 , 3 PCs used
total r^2 between top 3 NAM PCs and outcome is 0.47


# SITE

## Single Cell

In [163]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/site/sc_meta.csv')
print(meta.shape)

(71120, 39)


In [164]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/site/sc_harmony.csv')
print(harmony.shape)

(71120, 20)


In [165]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/site/sc_umap.csv')
print(umap.shape)

(71120, 2)


In [167]:
meta.columns

Index(['cell', 'Sex', 'sample', 'cell.type', 'Responder.Status', 'Age', 'Race',
       'Type', 'Final_ISN', 'Final_Chronicity', 'Final_Activity', 'Final_Site',
       'First_biop', 'Pred_use', 'Responder_Status', 'Race_[A]', 'Race_[A][B]',
       'Race_[B]', 'Race_[B][AI]', 'Race_[B][W]', 'Race_[U]', 'Race_[W]',
       'Final_ISN_[III]', 'Final_ISN_[III][V]', 'Final_ISN_[IV]',
       'Final_ISN_[IV][V]', 'Final_ISN_[V]', 'Final_Site_Cincinnati',
       'Final_Site_Einstein', 'Final_Site_JHU', 'Final_Site_Michigan',
       'Final_Site_MUSC', 'Final_Site_Northwell', 'Final_Site_NYU',
       'Final_Site_Rochester', 'Final_Site_Texas Tech', 'Final_Site_UCLA',
       'Final_Site_UCSD', 'Final_Site_UCSF'],
      dtype='object')

In [169]:
res = cna_test(meta, harmony, umap, 'Final_Site_Cincinnati')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Cincinnati', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
2
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.49035096490350966 , 2 PCs used
total r^2 between top 2 NAM PCs and outcome is 0.02


In [170]:
res = cna_test(meta, harmony, umap, 'Final_Site_Einstein')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Einstein', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
16
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.00029997000299970003 , 16 PCs used
total r^2 between top 16 NAM PCs and outcome is 0.38


In [171]:
res = cna_test(meta, harmony, umap, 'Final_Site_JHU')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site JHU', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
19
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.014698530146985302 , 19 PCs used
total r^2 between top 19 NAM PCs and outcome is 0.23


In [172]:
res = cna_test(meta, harmony, umap, 'Final_Site_Michigan')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Michigan', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
5
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.5487451254874512 , 5 PCs used
total r^2 between top 5 NAM PCs and outcome is 0.04


In [173]:
res = cna_test(meta, harmony, umap, 'Final_Site_MUSC')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site MUSC', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
4
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.18548145185481452 , 4 PCs used
total r^2 between top 4 NAM PCs and outcome is 0.08


In [174]:
res = cna_test(meta, harmony, umap, 'Final_Site_Northwell')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Northwell', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
5
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.7468253174682532 , 5 PCs used
total r^2 between top 5 NAM PCs and outcome is 0.04


In [175]:
res = cna_test(meta, harmony, umap, 'Final_Site_NYU')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site NYU', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
12
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.0010998900109989002 , 12 PCs used
total r^2 between top 12 NAM PCs and outcome is 0.22


In [176]:
res = cna_test(meta, harmony, umap, 'Final_Site_Rochester')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Rochester', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
11
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.4754524547545245 , 11 PCs used
total r^2 between top 11 NAM PCs and outcome is 0.10


In [177]:
res = cna_test(meta, harmony, umap, 'Final_Site_Texas Tech')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Texas Tech', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
1
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.140985901409859 , 1 PCs used
total r^2 between top 1 NAM PCs and outcome is 0.06


In [178]:
res = cna_test(meta, harmony, umap, 'Final_Site_UCLA')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site UCLA', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
4
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.1024897510248975 , 4 PCs used
total r^2 between top 4 NAM PCs and outcome is 0.12


In [180]:
res = cna_test(meta, harmony, umap, 'Final_Site_UCSD')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site UCSD', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
1
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.5049495050494951 , 1 PCs used
total r^2 between top 1 NAM PCs and outcome is 0.01


In [181]:
res = cna_test(meta, harmony, umap, 'Final_Site_UCSF')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site UCSF', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
1
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.973002699730027 , 1 PCs used
total r^2 between top 1 NAM PCs and outcome is 0.00


## Single Nuclear

In [182]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/site/sn_meta.csv')
print(meta.shape)

(18660, 33)


In [183]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/site/sn_harmony.csv')
print(harmony.shape)

(18660, 20)


In [184]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/site/sn_umap.csv')
print(umap.shape)

(18660, 2)


In [185]:
meta.columns

Index(['cell', 'Sex', 'sample', 'cell.type', 'Responder.Status', 'Age', 'Race',
       'Type', 'Final_ISN', 'Final_Chronicity', 'Final_Activity', 'Final_Site',
       'First_biop', 'Pred_use', 'Responder_Status', 'Race_[A]', 'Race_[B]',
       'Race_[U]', 'Race_[W]', 'Final_ISN_[III]', 'Final_ISN_[III][V]',
       'Final_ISN_[IV]', 'Final_ISN_[IV][V]', 'Final_ISN_[V]',
       'Final_Site_Cincinnati', 'Final_Site_JHU', 'Final_Site_Michigan',
       'Final_Site_MUSC', 'Final_Site_Northwell', 'Final_Site_NYU',
       'Final_Site_Rochester', 'Final_Site_Texas Tech', 'Final_Site_UCSF'],
      dtype='object')

In [186]:
res = cna_test(meta, harmony, umap, 'Final_Site_Cincinnati')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Cincinnati', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
20
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.1962803719628037 , 20 PCs used
total r^2 between top 20 NAM PCs and outcome is 0.77


In [187]:
res = cna_test(meta, harmony, umap, 'Final_Site_JHU')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site JHU', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
1
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.9132086791320868 , 1 PCs used
total r^2 between top 1 NAM PCs and outcome is 0.03


In [188]:
res = cna_test(meta, harmony, umap, 'Final_Site_Michigan')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Michigan', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
20
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.7827217278272173 , 20 PCs used
total r^2 between top 20 NAM PCs and outcome is 0.39


In [189]:
res = cna_test(meta, harmony, umap, 'Final_Site_MUSC')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site MUSC', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
18
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.12088791120887911 , 18 PCs used
total r^2 between top 18 NAM PCs and outcome is 0.88


In [190]:
res = cna_test(meta, harmony, umap, 'Final_Site_Northwell')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Northwell', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
3
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.46615338466153383 , 3 PCs used
total r^2 between top 3 NAM PCs and outcome is 0.20


In [191]:
res = cna_test(meta, harmony, umap, 'Final_Site_NYU')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site NYU', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
18
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.6214378562143785 , 18 PCs used
total r^2 between top 18 NAM PCs and outcome is 0.48


In [192]:
res = cna_test(meta, harmony, umap, 'Final_Site_Rochester')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Rochester', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
15
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.29787021297870214 , 15 PCs used
total r^2 between top 15 NAM PCs and outcome is 0.53


In [193]:
res = cna_test(meta, harmony, umap, 'Final_Site_Texas Tech')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site Texas Tech', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
20
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.24997500249975002 , 20 PCs used
total r^2 between top 20 NAM PCs and outcome is 0.75


In [194]:
res = cna_test(meta, harmony, umap, 'Final_Site_UCSF')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Site UCSF', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD




performing association test
computing neighborhood-level FDRs
18
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.33666633336666335 , 18 PCs used
total r^2 between top 18 NAM PCs and outcome is 0.60


# CHRONICITY

## Single Cell

In [3]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sc_meta.csv')
print(meta.shape)

(65570, 39)


In [4]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sc_harmony.csv')
print(harmony.shape)

(65570, 20)


In [5]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sc_umap.csv')
print(umap.shape)

(65570, 2)


In [198]:
res = cna_test(meta, harmony, umap, 'Final_Chronicity')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Chronicity', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(65570, 39)
(65570, 20)
(65570, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 39.45832061767578
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 26.267791748046875
	20th percentile R2(t,t-1): 0.7805158376693726
	taking step 3
	median kurtosis: 18.224061012268066
	20th percentile R2(t,t-1): 0.9237080931663513
	taking step 4
	median kurtosis: 14.09362506866455
	20th percentile R2(t,t-1): 0.9593445897102356
	taking step 5
	median kurtosis: 11.765871047973633
	20th percentile R2(t,t-1): 0.9752609372138977
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test




computing neighborhood-level FDRs
4
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 9.999000099990002e-05 , 4 PCs used
total r^2 between top 4 NAM PCs and outcome is 0.56


In [203]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sc_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sc_fdrs.csv", 
               res.fdrs, delimiter=",")

In [11]:
def run_cna(variable, cna_obj, covs = None):
    if covs is not None:
        cna_res = cna.tl._association.association(cna_obj, #dataset 
                                                  cna_obj.samplem[variable], #phenotype
            #                                       batches=d.samplem.processing_batch, #batches
                                                  covs = covs,
                                                  Nnull=10000, # number of null permutations to do (defaults to only 1e3)
                                                  ks=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20] # I asked the method to consider up to 10 PCs because
                                                                            #it chose the max number of PCs it considered the default set of [1,2,3,4]
                                                 )
    else:
        cna_res = cna.tl._association.association(cna_obj, #dataset 
                                                  cna_obj.samplem[variable], #phenotype
            #                                       batches=d.samplem.processing_batch, #batches
                                                 # covs = d.samplem[covs]
                                                  Nnull=10000, # number of null permutations to do (defaults to only 1e3)
                                                  ks=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20] # I asked the method to consider up to 10 PCs because
                                                                            #it chose the max number of PCs it considered the default set of [1,2,3,4]
                                                 )
        
    return cna_res
def cna_test(meta_df, harmony_df, umap_df, variable, covars = None):
    print(meta_df.shape)
    print(harmony_df.shape)
    print(umap_df.shape)
    cna_object = mad.MultiAnnData(X=harmony_df, obs=meta_df, sampleid="sample")
    if covars is not None:   
        cna_object.obs_to_sample(covars + [variable])
        umap_df.index = cna_object.obs.index
        cna_object.obsm['X_umap'] = umap_df
        cna.pp.knn(cna_object)
        np.random.seed(0) 
        final_res = run_cna(variable, cna_object, covs = covars)
    else:
        cna_object.obs_to_sample([variable])
        umap_df.index = cna_object.obs.index
        cna_object.obsm['X_umap'] = umap_df
        cna.pp.knn(cna_object)
        np.random.seed(0) 
        final_res = run_cna(variable, cna_object, covs = None)
    print(final_res.k)
    print(final_res.ks)
    print('p =', final_res.p, ',', final_res.k, 'PCs used')
    print('total r^2 between top {} NAM PCs and outcome is {:.2f}'.format(final_res.k, final_res.r2))
    return final_res

In [17]:
covars = ['First_biop', 'Responder_Status']
variable = 'Final_Chronicity'

In [18]:
cna_object = mad.MultiAnnData(X=harmony, obs=meta, sampleid="sample")
cna_object.obs_to_sample(covars + [variable])
umap.index = cna_object.obs.index
cna_object.obsm['X_umap'] = umap
cna.pp.knn(cna_object)
np.random.seed(0) 

  super().__init__(*args, **kwargs)


['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


In [22]:
cna_object.samplem

Unnamed: 0_level_0,First_biop,Responder_Status,Final_Chronicity
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AMPSLEkid_cells_1243,0.0,0.0,9.0
AMPSLEkid_cells_1257,0.0,0.0,7.0
AMPSLEkid_cells_1154,1.0,0.0,3.0
AMPSLEkid_cells_2800,0.0,0.0,7.0
AMPSLEkid_cells_1133,0.0,,3.0
...,...,...,...
AMPSLEkid_cells_2802,0.0,0.0,2.0
AMPSLEkid_cells_0373,0.0,2.0,5.0
AMPSLEkid_cells_1148,0.0,1.0,2.0
AMPSLEkid_cells_2749,0.0,1.0,6.0


In [24]:
cna_res = cna.tl._association.association(cna_object, #dataset 
                                          cna_object.samplem[variable], #phenotype
    #                                       batches=d.samplem.processing_batch, #batches
                                          covs = covars,
                                          Nnull=10000, # number of null permutations to do (defaults to only 1e3)
                                          ks=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20] # I asked the method to consider up to 10 PCs because
                                                                    #it chose the max number of PCs it considered the default set of [1,2,3,4]
                                         )

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [12]:
res = cna_test(meta, harmony, umap, 'Final_Chronicity', covars = ['First_biop', 'Responder_Status'])

(65570, 39)
(65570, 20)
(65570, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

## Single Nuclear

In [204]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sn_meta.csv')
print(meta.shape)

(15657, 32)


In [205]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sn_harmony.csv')
print(harmony.shape)

(15657, 20)


In [206]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sn_umap.csv')
print(umap.shape)

(15657, 2)


In [207]:
res = cna_test(meta, harmony, umap, 'Final_Chronicity')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Chronicity', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(15657, 32)
(15657, 20)
(15657, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 9.805901874553381
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 7.405575861951666
	20th percentile R2(t,t-1): 0.7989590883255004
	taking step 3
	median kurtosis: 6.056229520037973
	20th percentile R2(t,t-1): 0.9304667115211487
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD




performing association test
computing neighborhood-level FDRs
6
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.00039996000399960006 , 6 PCs used
total r^2 between top 6 NAM PCs and outcome is 0.68


In [208]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sn_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/chronicity/sn_fdrs.csv", 
               res.fdrs, delimiter=",")

# ACTIVITY

## Single Cell

In [209]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sc_meta.csv')
print(meta.shape)

(65570, 39)


In [210]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sc_harmony.csv')
print(harmony.shape)

(65570, 20)


In [211]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sc_umap.csv')
print(umap.shape)

(65570, 2)


In [212]:
res = cna_test(meta, harmony, umap, 'Final_Activity')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Activity', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(65570, 39)
(65570, 20)
(65570, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 39.45832061767578
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 26.267791748046875
	20th percentile R2(t,t-1): 0.7805158376693726
	taking step 3
	median kurtosis: 18.224061012268066
	20th percentile R2(t,t-1): 0.9237080931663513
	taking step 4
	median kurtosis: 14.09362506866455
	20th percentile R2(t,t-1): 0.9593445897102356
	taking step 5
	median kurtosis: 11.765871047973633
	20th percentile R2(t,t-1): 0.9752609372138977
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
17
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.00039996000399960006 , 17 PCs used
total r^2 between top 17 NAM PCs and outcome is 0.30


In [214]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sc_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sc_fdrs.csv", 
               res.fdrs, delimiter=",")

## Single Nuclear

In [216]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sn_meta.csv')
print(meta.shape)

(15657, 32)


In [217]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sn_harmony.csv')
print(harmony.shape)

(15657, 20)


In [218]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sn_umap.csv')
print(umap.shape)

(15657, 2)


In [221]:
res = cna_test(meta, harmony, umap, 'Final_Activity')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Activity', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(15657, 32)
(15657, 20)
(15657, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 9.805901874553381
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 7.405575861951666
	20th percentile R2(t,t-1): 0.7989590883255004
	taking step 3
	median kurtosis: 6.056229520037973
	20th percentile R2(t,t-1): 0.9304667115211487
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
1
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.29357064293570645 , 1 PCs used
total r^2 between top 1 NAM PCs and outcome is 0.19


In [222]:
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sc_ncorr.csv", 
               res.ncorrs, delimiter=",")
np.savetxt("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/activity/sc_fdrs.csv", 
               res.fdrs, delimiter=",")

# ISN Class

## Single Cell

In [223]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/isn/sc_meta.csv')
print(meta.shape)

(71120, 39)


In [224]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/isn/sc_harmony.csv')
print(harmony.shape)

(71120, 20)


In [225]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/isn/sc_umap.csv')
print(umap.shape)

(71120, 2)


In [226]:
meta.columns

Index(['cell', 'Sex', 'sample', 'cell.type', 'Responder.Status', 'Age', 'Race',
       'Type', 'Final_ISN', 'Final_Chronicity', 'Final_Activity', 'Final_Site',
       'First_biop', 'Pred_use', 'Responder_Status', 'Race_[A]', 'Race_[A][B]',
       'Race_[B]', 'Race_[B][AI]', 'Race_[B][W]', 'Race_[U]', 'Race_[W]',
       'Final_ISN_[III]', 'Final_ISN_[III][V]', 'Final_ISN_[IV]',
       'Final_ISN_[IV][V]', 'Final_ISN_[V]', 'Final_Site_Cincinnati',
       'Final_Site_Einstein', 'Final_Site_JHU', 'Final_Site_Michigan',
       'Final_Site_MUSC', 'Final_Site_Northwell', 'Final_Site_NYU',
       'Final_Site_Rochester', 'Final_Site_Texas Tech', 'Final_Site_UCLA',
       'Final_Site_UCSD', 'Final_Site_UCSF'],
      dtype='object')

In [227]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[III]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class III', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test




computing neighborhood-level FDRs
20
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.12048795120487951 , 20 PCs used
total r^2 between top 20 NAM PCs and outcome is 0.19


In [228]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[III][V]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class III/V', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
13
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.4674532546745325 , 13 PCs used
total r^2 between top 13 NAM PCs and outcome is 0.11


In [229]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[IV]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class IV', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
2
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.15828417158284172 , 2 PCs used
total r^2 between top 2 NAM PCs and outcome is 0.05


In [230]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[IV][V]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class IV/V', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
7
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.1581841815818418 , 7 PCs used
total r^2 between top 7 NAM PCs and outcome is 0.10


In [231]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[V]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class V', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
14
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.07879212078792121 , 14 PCs used
total r^2 between top 14 NAM PCs and outcome is 0.16


## Single Nuclear

In [232]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/isn/sn_meta.csv')
print(meta.shape)

(18660, 33)


In [234]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/isn/sn_harmony.csv')
print(harmony.shape)

(18660, 20)


In [235]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/isn/sn_umap.csv')
print(umap.shape)

(18660, 2)


In [237]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[III]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class III', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
16
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.11458854114588542 , 16 PCs used
total r^2 between top 16 NAM PCs and outcome is 0.60


In [238]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[III][V]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class III/V', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
13
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.7538246175382461 , 13 PCs used
total r^2 between top 13 NAM PCs and outcome is 0.36


In [239]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[IV]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class IV', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
20
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.901009899010099 , 20 PCs used
total r^2 between top 20 NAM PCs and outcome is 0.41


In [240]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[IV][V]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class IV/V', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
9
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.5813418658134186 , 9 PCs used
total r^2 between top 9 NAM PCs and outcome is 0.33


In [241]:
res = cna_test(meta, harmony, umap, 'Final_ISN_[V]')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['ISN Class V', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
12
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.0847915208479152 , 12 PCs used
total r^2 between top 12 NAM PCs and outcome is 0.57


In [242]:
univariate_res.to_csv("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/univariate_results.csv",
                      index = False)

In [3]:
univariate_res = pd.read_csv("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/univariate_results.csv")

# FIRST BIOPSY 

## Single Cell

In [5]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/first_biopsy/sc_meta.csv')
print(meta.shape)

(71120, 39)


In [6]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/first_biopsy/sc_harmony.csv')
print(harmony.shape)

(71120, 20)


In [7]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/first_biopsy/sc_umap.csv')
print(umap.shape)

(71120, 2)


In [8]:
res = cna_test(meta, harmony, umap, 'First_biop')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['First Biopsy Status', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)
  from .autonotebook import tqdm as notebook_tqdm


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test




computing neighborhood-level FDRs
18
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 9.999000099990002e-05 , 18 PCs used
total r^2 between top 18 NAM PCs and outcome is 0.32


## Single Nuclear

In [9]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/first_biopsy/sn_meta.csv')
print(meta.shape)

(18660, 33)


In [10]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/first_biopsy/sn_harmony.csv')
print(harmony.shape)

(18660, 20)


In [11]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/first_biopsy/sn_umap.csv')
print(umap.shape)

(18660, 2)


In [12]:
res = cna_test(meta, harmony, umap, 'First_biop')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['First Biopsy Status', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
10
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.0033996600339966003 , 10 PCs used
total r^2 between top 10 NAM PCs and outcome is 0.63


# PREDNISONE USE

## Single Cell

In [15]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/prednisone_use/sc_meta.csv')
print(meta.shape)

(71120, 39)


In [16]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/prednisone_use/sc_harmony.csv')
print(harmony.shape)

(71120, 20)


In [17]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/prednisone_use/sc_umap.csv')
print(umap.shape)

(71120, 2)


In [19]:
res = cna_test(meta, harmony, umap, 'Pred_use')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Prednisone Use', 'scRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(71120, 39)
(71120, 20)
(71120, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 42.821128845214844
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 28.430946350097656
	20th percentile R2(t,t-1): 0.7792342305183411
	taking step 3
	median kurtosis: 19.353656768798828
	20th percentile R2(t,t-1): 0.9228097796440125
	taking step 4
	median kurtosis: 14.69758415222168
	20th percentile R2(t,t-1): 0.9582582116127014
	taking step 5
	median kurtosis: 12.140487670898438
	20th percentile R2(t,t-1): 0.9747512340545654
stopping after 5 steps
covariate-adjusted NAM not found; computing and saving




computing SVD
performing association test
computing neighborhood-level FDRs
8
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.20037996200379962 , 8 PCs used
total r^2 between top 8 NAM PCs and outcome is 0.10


## Single Nuclear

In [20]:
meta = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/prednisone_use/sn_meta.csv')
print(meta.shape)

(18660, 33)


In [22]:
harmony = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/prednisone_use/sn_harmony.csv')
print(harmony.shape)

(18660, 20)


In [23]:
umap = pd.read_csv('/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/prednisone_use/sn_umap.csv')
print(umap.shape)

(18660, 2)


In [24]:
res = cna_test(meta, harmony, umap, 'Pred_use')
univariate_res = pd.concat([univariate_res,
                            pd.DataFrame(['Prednisone Use', 'snRNAseq', res.p],
                            index = ['Variable', 'Technology', 'global_pvalue']).T])

(18660, 33)
(18660, 20)
(18660, 2)
['cell' 'sample' 'cell.type' 'Responder.Status' 'Race' 'Type' 'Final_ISN'
 'Final_Site']
consider casting to numeric types where appropriate, and
consider re-coding text-valued columns with pandas.get_dummies
computing default knn graph


  super().__init__(*args, **kwargs)


qcd NAM not found; computing and saving
	taking step 1
	median kurtosis: 11.043727199233018
	20th percentile R2(t,t-1): nan
	taking step 2
	median kurtosis: 8.146373564762346
	20th percentile R2(t,t-1): 0.7939578413963317
	taking step 3
	median kurtosis: 6.589556547746421
	20th percentile R2(t,t-1): 0.9295116901397705
stopping after 3 steps
covariate-adjusted NAM not found; computing and saving
computing SVD
performing association test




computing neighborhood-level FDRs
13
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
p = 0.7112288771122888 , 13 PCs used
total r^2 between top 13 NAM PCs and outcome is 0.37


In [25]:
univariate_res.to_csv("/data/srlab/ssg34/SLE_kidney_v2/data/cna_new/downsampled_tissue/univariate_results.csv",
                      index = False)