In [33]:
from scipy.io import mmread
import pandas as pd
import numpy as np

In [16]:
# this data was downloaded from 10x, but any of their filtered matrices in mtx will work. 
data = mmread('filtered_gene_bc_matrices/hg19/matrix.mtx')
genes = pd.read_table('filtered_gene_bc_matrices/hg19/genes.tsv', header=None)
barcodes = pd.read_table('filtered_gene_bc_matrices/hg19/barcodes.tsv', header=None)

df = pd.DataFrame(
    data=data.todense().T, 
    index=np.ravel(barcodes.values),
    columns=np.ravel(genes[1].values)
)

In [50]:
# 10x has duplicate genes, which my method does not support. Here we merge them. 
df = df.groupby(level=0, axis=1).sum()

In [57]:
from rut.testing.generate import SyntheticTest
test_obj = SyntheticTest.from_dataset(df, ['original', 'adjusted'], save='./demo', additional_downsampling=0.2)

In [60]:
ls -l

total 378960
drwxr-xr-x  3 ajc  staff         96 Nov  7  2017 [1m[31manalysis[m[m/
-rw-r--r--  1 ajc  staff  132474726 Jun 14 22:52 demo_ds_0.20.csv
-rw-r--r--  1 ajc  staff     749076 Jun 14 22:52 demo_ds_0.20_labels.p
drwxr-xr-x  4 ajc  staff        128 Nov  7  2017 [1m[31mfiltered_gene_bc_matrices[m[m/
-rw-r--r--  1 ajc  staff      40960 Jun 14 22:16 hgmm_1k_analysis.tar
-rw-r--r--  1 ajc  staff   56780800 Jun 14 22:16 hgmm_1k_filtered_gene_bc_matrices.tar


In [79]:
pd.read_csv('./demo_ds_0.20.csv', index_col=0).head()

Unnamed: 0,hg19_5S_rRNA,hg19_7SK,hg19_A1BG,hg19_A1BG-AS1,hg19_A1CF,hg19_A2M,hg19_A2M-AS1,hg19_A2ML1,hg19_A2ML1-AS1,hg19_A2ML1-AS2,...,hg19_ZZEF1,hg19_ZZZ3,hg19_hsa-mir-1199,hg19_hsa-mir-1253,hg19_hsa-mir-150,hg19_hsa-mir-335,hg19_hsa-mir-490,hg19_hsa-mir-7515,hg19_hsa-mir-8072,hg19_snoU13
original,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
original,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
original,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
original,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
original,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
from rut.differential_expression import WelchsT
import warnings

In [80]:
?test_obj_obj.test_method

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter('ignore', RuntimeWarning)
    results = test_obj.test_method(WelchsT)

In [69]:
results.head()

Unnamed: 0,t,t_low,t_high,p,q
hg19_MT-CO3,-31.946966,-35.55,-29.07,0.0,2.131822e-218
hg19_MT-ND4,-30.786483,-34.31,-27.88,0.0,7.11749e-203
hg19_MT-CYB,-30.526141,-33.71,-27.42,0.0,1.399826e-199
hg19_MT-ATP6,-30.296561,-33.77,-26.7,0.0,1.139124e-196
hg19_RPL35,-30.179328,-33.25,-28.15,0.0,3.168661e-195


In [70]:
# load up the examples
import pickle
with open('demo_ds_0.20_labels.p', 'rb') as f:
    effects = pickle.load(f)

In [77]:
print(effects.keys())
print(effects[0.0][:10])

dict_keys([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, -0.1, -0.2, -0.3, -0.4, -0.5])
['hg19_5S_rRNA', 'hg19_RP11-1060J15.7', 'hg19_RP11-1060J15.4', 'hg19_RP11-1060G2.1', 'hg19_RP11-105N14.3', 'hg19_RP11-105N14.2', 'hg19_RP11-105C20.2', 'hg19_RP11-105C19.2', 'hg19_RP11-1069G10.2', 'hg19_RP11-105C19.1']
