In [1]:
import logging
import os
import gzip

import pandas
import easydict

import starapi.analysis
import starapi.conf

In [2]:
starapi.analysis.logger.propagate = False
starapi.conf.configure('data')

In [3]:
query_df = pandas.read_table('data/queries.tsv')
query_df.head()

Unnamed: 0,slim_id,slim_name,case_query,control_query
0,DOID:0050156,idiopathic pulmonary fibrosis,IPF == 'IPF',IPF_control == 'IPF_control'
1,DOID:0050741,alcohol dependence,alcoholism == 'alcoholism',alcoholism_control == 'alcoholism_control'
2,DOID:0050742,nicotine dependence,Smoker == 'Smoker',Nonsmoker == 'Nonsmoker'
3,DOID:1024,leprosy,borderline_leprosy == 'borderline_leprosy' or ...,leprosy_control == 'leprosy_control'
4,DOID:10283,prostate cancer,PC == 'PC' or PC_tissue_case == 'PC_tissue_case',PC_control == 'PC_control' or PC_tissue_contro...


In [4]:
errors = list()

for i, row in query_df.iterrows():
    print i, row.slim_id, row.slim_name

    name = row.slim_id.replace(':', '_')
    params = easydict.EasyDict(
        analysis_name = name,
        case_query = row.case_query,
        control_query = row.control_query,
        modifier_query = '',
        min_samples = 3,
    )
    
    directory = os.path.join('data', 'doslim', params.analysis_name)
    if not os.path.isdir(directory):
        os.mkdir(directory)

    logfile = logging.FileHandler(os.path.join(directory, 'log.txt'))
    logfile.setLevel(logging.DEBUG)
    starapi.analysis.logger.addHandler(logfile)
    try:
        sample_df, fold_df, balanced_perm_df, perm_df = starapi.analysis.perform_analysis(params)
        for df in fold_df, balanced_perm_df:
            if df is not None:
                df.reset_index(inplace=True)
        
    except Exception as e:
        sample_df, fold_df, balanced_perm_df, perm_df = None, None, None, None
        errors.append({'slim_id': row.slim_id, 'name': name, 'error': e})
        print(e)
    starapi.analysis.logger.removeHandler(logfile)
    
    # write files
    files = [
        (sample_df, 'samples.tsv', open),
        (fold_df, 'fold_change.tsv.gz', gzip.open),
        (balanced_perm_df, 'balanced_permutation.tsv.gz', gzip.open),
    ]
    for df, filename, opener in files:
        if df is None:
            continue
        path = os.path.join(directory, filename)
        with opener(path, 'wt') as write_file:
            df.to_csv(write_file, index=False, sep='\t', float_format='%.5g')


Started DOID_0050156 analysis
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
333.75 ms in get_analysis_df("IPF == 'IPF'", "IPF_control == 'IPF_c..., '')
334.60 ms in Loading dataframe for DOID_0050156
Matching sources: 12
Stats: 12 sources, 12 series, 8 platforms, 679 samples
Loading data and calculating fold change for DOID_0050156
Loading data for GSE10667, id = 179
  1.14 s in get_probes(10)
  4.69 s in load_gse( series_id platform_id..., 179, False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  samples['subset'] = "NA"
649.64 ms in get_gene_fold_change(<Gse GSE10667>, False, 0, None)
Loading 

0 DOID:0050156 idiopathic pulmonary fibrosis
1

Started DOID_0050741 analysis
317.48 ms in get_analysis_df("alcoholism == 'alcoho..., "alcoholism_control ==..., '')
318.35 ms in Loading dataframe for DOID_0050741
Matching sources: 8
Stats: 8 sources, 8 series, 8 platforms, 435 samples
Loading data and calculating fold change for DOID_0050741
Loading data for GSE10356, id = 95
 31.17 ms in get_probes(36)
  3.37 s in load_gse( series_id platform_id..., 95, False)
 39.07 ms in get_gene_fold_change(<Gse GSE10356>, False, 0, None)
  3.44 s in Load/fold for DOID_0050741
  3.79 s in perform_analysis({'analysis_name': 'DOI...)


 DOID:0050741 alcohol dependence
zero-size array to reduction operation maximum which has no identity

Started DOID_0050742 analysis
423.09 ms in get_analysis_df("Smoker == 'Smoker'", "Nonsmoker == 'Nonsmok..., '')
423.96 ms in Loading dataframe for DOID_0050742
Matching sources: 21
Excluded 2 sources as single-class
Stats: 19 sources, 17 series, 6 platforms, 1423 samples
Loading data and calculating fold change for DOID_0050742
Loading data for GSE10006, id = 4
  1.16 s in get_probes(4)
  8.62 s in load_gse( series_id platform_id..., 4, False)
  1.32 s in get_gene_fold_change(<Gse GSE10006>, False, 0, None)
Loading data for GSE11784, id = 450
  1.28 s in get_probes(4)
 14.38 s in load_gse( series_id platform_id..., 450, False)
  2.21 s in get_gene_fold_change(<Gse GSE11784>, False, 0, None)
Loading data for GSE11906, id = 487
  1.33 s in get_probes(4)
 19.17 s in load_gse( series_id platform_id..., 487, False)
  3.00 s in get_gene_fold_change(<Gse GSE11906>, False, 0, None)
Loading data for GSE19407, id = 2430
  1.27 s in get_probes(4)
 11.86 s in load_gse( series_id platform_id..., 24


2 DOID:0050742 nicotine dependence
3

Started DOID_1024 analysis
230.88 ms in get_analysis_df("borderline_leprosy ==..., "leprosy_control == 'l..., '')
231.74 ms in Loading dataframe for DOID_1024
Matching sources: 1
Excluded 1 source by min samples
FAIL Can't perform meta-analysis on no data
252.40 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:1024 leprosy
4

Started DOID_10283 analysis
541.87 ms in get_analysis_df("PC == 'PC' or PC_tiss..., "PC_control == 'PC_con..., '')
542.75 ms in Loading dataframe for DOID_10283
Matching sources: 19
Excluded 5 sources as single-class
Excluded 2 sources by min samples
Stats: 12 sources, 8 series, 11 platforms, 1101 samples
Loading data and calculating fold change for DOID_10283
Loading data for GSE11682, id = 437
817.29 ms in get_probes(10)
  4.03 s in load_gse( series_id platform_id..., 437, False)
535.98 ms in get_gene_fold_change(<Gse GSE11682>, False, 0, None)
Loading data for GSE14857, id = 1267
 31.43 ms in get_probes(477)
662.22 ms in load_gse( series_id platform_id..., 1267, False)
 42.58 ms in get_gene_fold_change(<Gse GSE14857>, False, 0, None)
  5.36 s in Load/fold for DOID_10283
  5.94 s in perform_analysis({'analysis_name': 'DOI...)


 DOID:10283 prostate cancer
zero-size array to reduction operation maximum which has no identity

Started DOID_10534 analysis
171.53 ms in get_analysis_df("stomach_cancer_tissue..., "stomach_cancer_tissue..., '')
172.35 ms in Loading dataframe for DOID_10534
Matching sources: 2
Excluded 1 source as single-class
FAIL Can't perform meta-analysis on single source
191.55 ms in perform_analysis({'analysis_name': 'DOI...)



5 DOID:10534 stomach cancer
6

Started DOID_10608 analysis
157.66 ms in get_analysis_df("celiac == 'celiac'", "celiac_control == 'ce..., '')
158.46 ms in Loading dataframe for DOID_10608
Matching sources: 1
FAIL Can't perform meta-analysis on single source
177.41 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:10608 celiac disease
7

Started DOID_10652 analysis
288.91 ms in get_analysis_df("AD == 'AD'", "AD_Control == 'AD_Con..., '')
289.71 ms in Loading dataframe for DOID_10652
Matching sources: 4
Excluded 2 sources as single-class
Stats: 2 sources, 2 series, 2 platforms, 512 samples
Loading data and calculating fold change for DOID_10652
Loading data for GSE33000, id = 5836
874.28 ms in get_probes(297)
 47.10 s in load_gse( series_id platform_id..., 5836, False)
  3.33 s in get_gene_fold_change(<Gse GSE33000>, False, 0, None)
Loading data for GSE6613, id = 11004
919.61 ms in get_probes(2)
  3.95 s in load_gse( series_id platform_id..., 11004, False)
407.80 ms in get_gene_fold_change(<Gse GSE6613>, False, 0, None)
 54.90 s in Load/fold for DOID_10652
Meta-Analyzing DOID_10652
 27.73 s in meta analysis of real data for DOID_10652
 33.38 ms in meta analysis of permutations for DOID_10652
 21.93 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 27.85 s 

 DOID:10652 Alzheimer's disease
8

Started DOID_10763 analysis
252.45 ms in get_analysis_df("PHT == 'PHT' or hyper..., "PHT_Control == 'PHT_C..., '')
253.26 ms in Loading dataframe for DOID_10763
Matching sources: 8
Excluded 1 source as single-class
Stats: 7 sources, 7 series, 6 platforms, 309 samples
Loading data and calculating fold change for DOID_10763
Loading data for GSE10767, id = 202
  1.25 s in get_probes(4)
  2.47 s in load_gse( series_id platform_id..., 202, False)
404.90 ms in get_gene_fold_change(<Gse GSE10767>, False, 0, None)
Loading data for GSE15197, id = 1359
  1.14 s in get_probes(53)
  3.46 s in load_gse( series_id platform_id..., 1359, False)
535.22 ms in get_gene_fold_change(<Gse GSE15197>, False, 0, None)
Loading data for GSE19617, id = 2473
  1.38 s in get_probes(53)
  3.35 s in load_gse( series_id platform_id..., 2473, False)
423.71 ms in get_gene_fold_change(<Gse GSE19617>, False, 0, None)
Loading data for GSE24988, id = 3871
892.63 ms in get_probes(283)
  5.98 s in load_gse( series_id platform

 DOID:10763 hypertension
9

Started DOID_10871 analysis
224.50 ms in get_analysis_df("AMD == 'AMD'", "AMD_control == 'AMD_c..., '')
225.27 ms in Loading dataframe for DOID_10871
Matching sources: 1
FAIL Can't perform meta-analysis on single source
244.95 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:10871 age related macular degeneration
10

Started DOID_1094 analysis
134.90 ms in get_analysis_df("ADHD == 'ADHD'", "ADHD_control == 'ADHD..., '')
135.68 ms in Loading dataframe for DOID_1094
Matching sources: 1
Excluded 1 source by min samples
FAIL Can't perform meta-analysis on no data
154.87 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:1094 attention deficit hyperactivity disorder
11

Started DOID_11054 analysis
255.80 ms in get_analysis_df("bladder_cancer == 'bl..., "bladder_cancer_contro..., '')
256.58 ms in Loading dataframe for DOID_11054
Matching sources: 0
265.18 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:11054 urinary bladder cancer
'Int64Index' object has no attribute 'labels'

Started DOID_11612 analysis
190.66 ms in get_analysis_df("PCOS == 'PCOS'", "PCOS_control == 'PCOS..., '')
191.44 ms in Loading dataframe for DOID_11612
Matching sources: 11
Stats: 11 sources, 10 series, 5 platforms, 215 samples
Loading data and calculating fold change for DOID_11612
Loading data for GSE10946, id = 254
  1.30 s in get_probes(4)
  3.94 s in load_gse( series_id platform_id..., 254, False)
563.12 ms in get_gene_fold_change(<Gse GSE10946>, False, 0, None)
Loading data for GSE1615, id = 1613
827.54 ms in get_probes(2)
791.13 ms in get_probes(50)
  2.98 s in load_gse( series_id platform_id..., 1613, False)
391.94 ms in get_gene_fold_change(<Gse GSE1615>, False, 0, None)
Loading data for GSE34526, id = 6193
  1.27 s in get_probes(4)
  2.82 s in load_gse( series_id platform_id..., 6193, False)
407.40 ms in get_gene_fold_change(<Gse GSE34526>, False, 0, None)
Loading data for GSE43264, id = 8243
858.81 ms in get_probes(1160)
  1.53 s in load_gse( series_id platform_id..., 8243, 


12 DOID:11612 polycystic ovary syndrome
13

Started DOID_11714 analysis
194.34 ms in get_analysis_df("GDM == 'GDM'", "GDM_control == 'GDM_c..., '')
195.23 ms in Loading dataframe for DOID_11714
Matching sources: 4
Excluded 2 sources by min samples
Stats: 2 sources, 2 series, 2 platforms, 12 samples
Loading data and calculating fold change for DOID_11714
Loading data for GSE49524, id = 9415
858.72 ms in get_probes(482)
  1.47 s in load_gse( series_id platform_id..., 9415, False)
199.38 ms in get_gene_fold_change(<Gse GSE49524>, False, 0, None)
Loading data for GSE65737, id = 13510
 31.55 ms in get_probes(1802)
  1.25 s in load_gse( series_id platform_id..., 13510, False)
 48.64 ms in get_gene_fold_change(<Gse GSE65737>, False, 0, None)
  2.99 s in Load/fold for DOID_11714
  3.21 s in perform_analysis({'analysis_name': 'DOI...)


 DOID:11714 gestational diabetes
zero-size array to reduction operation maximum which has no identity
14

Started DOID_11949 analysis
150.67 ms in get_analysis_df("CJD == 'CJD'", "CJD_control == 'CJD_c..., '')
151.47 ms in Loading dataframe for DOID_11949
Matching sources: 1
Excluded 1 source by min samples
FAIL Can't perform meta-analysis on no data
171.27 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:11949 Creutzfeldt-Jakob disease
15

Started DOID_12306 analysis
130.40 ms in get_analysis_df("vitiligo == 'vitiligo'", "vitiligo_control == '..., '')
131.30 ms in Loading dataframe for DOID_12306
Matching sources: 1
FAIL Can't perform meta-analysis on single source
151.70 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:12306 vitiligo
16

Started DOID_12365 analysis
282.88 ms in get_analysis_df("malaria == 'malaria'", "malaria_control == 'm..., '')
283.70 ms in Loading dataframe for DOID_12365
Matching sources: 8
Excluded 1 source as single-class
Excluded 1 source by min samples
Stats: 6 sources, 6 series, 5 platforms, 295 samples
Loading data and calculating fold change for DOID_12365
Loading data for GSE15221, id = 1372
  1.01 s in get_probes(17)
  3.72 s in load_gse( series_id platform_id..., 1372, False)
367.19 ms in get_gene_fold_change(<Gse GSE15221>, False, 0, None)
Loading data for GSE18323, id = 2164
736.88 ms in get_probes(9)
  8.02 s in load_gse( series_id platform_id..., 2164, False)
402.60 ms in get_gene_fold_change(<Gse GSE18323>, False, 0, None)
Loading data for GSE24849, id = 3840
  1.27 s in get_probes(4)
  4.25 s in load_gse( series_id platform_id..., 3840, False)
454.91 ms in get_gene_fold_change(<Gse GSE24849>, False, 0, None)
Loading data for GSE34404, id = 6155
  1.05 s in get_probes(674)
 12.99 s 

 DOID:12365 malaria
17

Started DOID_12849 analysis
479.89 ms in get_analysis_df("autism == 'autism'", "autism_control == 'au..., '')
480.80 ms in Loading dataframe for DOID_12849
Matching sources: 14
Excluded 1 source as single-class
Excluded 1 source by min samples
Stats: 12 sources, 11 series, 7 platforms, 1302 samples
Loading data and calculating fold change for DOID_12849
Loading data for GSE15402, id = 1425
805.33 ms in get_probes(497)
  8.09 s in load_gse( series_id platform_id..., 1425, False)
765.60 ms in get_gene_fold_change(<Gse GSE15402>, False, 0, None)
Loading data for GSE15451, id = 1434
865.72 ms in get_probes(497)
  3.38 s in load_gse( series_id platform_id..., 1434, False)
355.24 ms in get_gene_fold_change(<Gse GSE15451>, False, 0, None)
Loading data for GSE18123, id = 2111
  1.27 s in get_probes(4)
925.43 ms in get_probes(283)
 21.21 s in load_gse( series_id platform_id..., 2111, False)
  1.89 s in get_gene_fold_change(<Gse GSE18123>, False, 0, None)
Loading data for GSE25507, id = 4004
  1

 DOID:12849 autistic disorder
zero-size array to reduction operation maximum which has no identity

Started DOID_13223 analysis
214.30 ms in get_analysis_df("uterine_fibroid == 'u..., "uterine_fibroid_contr..., '')
215.12 ms in Loading dataframe for DOID_13223
Matching sources: 7
Excluded 1 source as single-class
Stats: 6 sources, 6 series, 5 platforms, 124 samples
Loading data and calculating fold change for DOID_13223
Loading data for GSE2724, id = 4435
916.67 ms in get_probes(2)
  1.72 s in load_gse( series_id platform_id..., 4435, False)
268.02 ms in get_gene_fold_change(<Gse GSE2724>, False, 0, None)
Loading data for GSE28945, id = 4848
121.61 ms in get_probes(929)
317.45 ms in load_gse( series_id platform_id..., 4848, False)
 61.94 ms in get_gene_fold_change(<Gse GSE28945>, False, 0, None)
Loading data for GSE41386, id = 7773
  1.21 s in get_probes(4)
  2.59 s in load_gse( series_id platform_id..., 7773, False)
393.54 ms in get_gene_fold_change(<Gse GSE41386>, False, 0, None)
Loading data for GSE593, id = 10753
919.70 ms in get_probes(2)
  1.52 s in load_gse( series_id platform


18 DOID:13223 uterine fibroid
19

Started DOID_1324 analysis
790.52 ms in get_analysis_df("NSCLC == 'NSCLC' or P..., "lung_cancer_control =..., '')
791.45 ms in Loading dataframe for DOID_1324
Matching sources: 51
Excluded 49 sources as single-class
Stats: 2 sources, 2 series, 2 platforms, 800 samples
Loading data and calculating fold change for DOID_1324
Loading data for GSE19804, id = 2533
  1.25 s in get_probes(4)
 11.28 s in load_gse( series_id platform_id..., 2533, False)
  1.54 s in get_gene_fold_change(<Gse GSE19804>, False, 0, None)
Loading data for GSE66499, id = 13592
941.27 ms in get_probes(283)
 41.14 s in load_gse( series_id platform_id..., 13592, False)
  3.99 s in get_gene_fold_change(<Gse GSE66499>, False, 0, None)
 58.10 s in Load/fold for DOID_1324
Meta-Analyzing DOID_1324
 43.43 s in meta analysis of real data for DOID_1324
 45.93 ms in meta analysis of permutations for DOID_1324
 20.03 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 

 DOID:1324 lung cancer
20

Started DOID_13241 analysis
153.08 ms in get_analysis_df("Behcet == 'Behcet'", "Behcet_control == 'Be..., '')
153.95 ms in Loading dataframe for DOID_13241
Matching sources: 1
FAIL Can't perform meta-analysis on single source
172.57 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:13241 Behcet's disease
21

Started DOID_13378 analysis
136.88 ms in get_analysis_df("Kawasaki == 'Kawasaki'", "Kawasaki_control == '..., '')
137.71 ms in Loading dataframe for DOID_13378
Matching sources: 1
FAIL Can't perform meta-analysis on single source
156.01 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:13378 Kawasaki disease
22

Started DOID_14221 analysis
263.13 ms in get_analysis_df("MetS == 'MetS'", "MetS_Control == 'MetS..., '')
263.95 ms in Loading dataframe for DOID_14221
Matching sources: 4
Excluded 4 sources as single-class
275.11 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:14221 metabolic syndrome X
'Int64Index' object has no attribute 'labels'

Started DOID_14227 analysis
134.62 ms in get_analysis_df("azoospermia == 'azoos..., "azoospermia_control =..., '')
135.42 ms in Loading dataframe for DOID_14227
Matching sources: 2
Stats: 2 sources, 2 series, 1 platforms, 51 samples
Loading data and calculating fold change for DOID_14227
Loading data for GSE45885, id = 8763
980.42 ms in get_probes(283)
  2.96 s in load_gse( series_id platform_id..., 8763, False)
361.67 ms in get_gene_fold_change(<Gse GSE45885>, False, 0, None)
Loading data for GSE45887, id = 8764
948.58 ms in get_probes(283)
  2.44 s in load_gse( series_id platform_id..., 8764, False)
296.01 ms in get_gene_fold_change(<Gse GSE45887>, False, 0, None)
  6.16 s in Load/fold for DOID_14227
Meta-Analyzing DOID_14227
 46.86 s in meta analysis of real data for DOID_14227
 32.30 ms in meta analysis of permutations for DOID_14227
 20.03 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 46.99 s in Meta analysis for


23 DOID:14227 azoospermia
24

Started DOID_14330 analysis
337.56 ms in get_analysis_df("PD == 'PD'", "PD_control == 'PD_con..., '')
338.38 ms in Loading dataframe for DOID_14330
Matching sources: 30
Excluded 2 sources as single-class
Excluded 1 source by min samples
Stats: 27 sources, 26 series, 12 platforms, 946 samples
Loading data and calculating fold change for DOID_14330
Loading data for GSE18838, id = 2292
  1.20 s in get_probes(280)
  2.32 s in load_gse( series_id platform_id..., 2292, False)
445.85 ms in get_gene_fold_change(<Gse GSE18838>, False, 0, None)
Loading data for GSE19587, id = 2467
892.50 ms in get_probes(9)
  1.92 s in load_gse( series_id platform_id..., 2467, False)
284.28 ms in get_gene_fold_change(<Gse GSE19587>, False, 0, None)
Loading data for GSE20141, id = 2616
  1.34 s in get_probes(4)
  3.45 s in load_gse( series_id platform_id..., 2616, False)
545.42 ms in get_gene_fold_change(<Gse GSE20141>, False, 0, None)
Loading data for GSE20146, id = 2619
  1.30 s in get_probes(4)
  3.55 s in loa

 DOID:14330 Parkinson's disease
25

Started DOID_1595 analysis
700.82 ms in get_analysis_df("Depression == 'Depres..., "MDD_control == 'MDD_c..., '')
701.66 ms in Loading dataframe for DOID_1595
Matching sources: 19
Excluded 5 sources as single-class
Stats: 14 sources, 14 series, 6 platforms, 533 samples
Loading data and calculating fold change for DOID_1595
Loading data for GSE12654, id = 704
632.04 ms in get_probes(7)
  1.93 s in load_gse( series_id platform_id..., 704, False)
180.02 ms in get_gene_fold_change(<Gse GSE12654>, False, 0, None)
Loading data for GSE19738, id = 2518
  1.05 s in get_probes(282)
  7.36 s in load_gse( series_id platform_id..., 2518, False)
  1.22 s in get_gene_fold_change(<Gse GSE19738>, False, 0, None)
Loading data for GSE39653, id = 7363
  1.18 s in get_probes(674)
  5.41 s in load_gse( series_id platform_id..., 7363, False)
596.70 ms in get_gene_fold_change(<Gse GSE39653>, False, 0, None)
Loading data for GSE54562, id = 10202
  1.17 s in get_probes(389)
  3.29 s in load_gse( series_id platf

 DOID:1595 endogenous depression
26

Started DOID_1612 analysis
958.10 ms in get_analysis_df("breast_cancer == 'bre..., "breast_cancer_control..., '')
958.95 ms in Loading dataframe for DOID_1612
Matching sources: 62
Excluded 33 sources as single-class
Excluded 1 source by min samples
Stats: 28 sources, 24 series, 16 platforms, 1789 samples
Loading data and calculating fold change for DOID_1612
Loading data for GSE10780, id = 206
  1.34 s in get_probes(4)
 20.29 s in load_gse( series_id platform_id..., 206, False)
  2.28 s in get_gene_fold_change(<Gse GSE10780>, False, 0, None)
Loading data for GSE10797, id = 212
941.94 ms in get_probes(9)
  4.23 s in load_gse( series_id platform_id..., 212, False)
504.75 ms in get_gene_fold_change(<Gse GSE10797>, False, 0, None)
Loading data for GSE10810, id = 218
  1.31 s in get_probes(4)
  3.77 s in load_gse( series_id platform_id..., 218, False)
821.22 ms in get_gene_fold_change(<Gse GSE10810>, False, 0, None)
Loading data for GSE10885, id = 238
880.79 ms in get_probes(83)
823.69 ms i

 DOID:1612 breast cancer
27

Started DOID_1686 analysis
513.58 ms in get_analysis_df("glaucoma == 'glaucoma'", "glaucoma_control == '..., '')
514.43 ms in Loading dataframe for DOID_1686
Matching sources: 11
Excluded 1 source as single-class
Excluded 3 sources by min samples
Stats: 7 sources, 6 series, 5 platforms, 138 samples
Loading data and calculating fold change for DOID_1686
Loading data for GSE2378, id = 3554
664.70 ms in get_probes(7)
  1.23 s in load_gse( series_id platform_id..., 3554, False)
150.47 ms in get_gene_fold_change(<Gse GSE2378>, False, 0, None)
Loading data for GSE27276, id = 4448
  1.01 s in get_probes(39)
  4.48 s in load_gse( series_id platform_id..., 4448, False)
319.19 ms in get_gene_fold_change(<Gse GSE27276>, False, 0, None)
Loading data for GSE45570, id = 8697
  1.20 s in get_probes(280)
  2.10 s in load_gse( series_id platform_id..., 8697, False)
323.01 ms in get_gene_fold_change(<Gse GSE45570>, False, 0, None)
Loading data for GSE9944, id = 11879
942.96 ms in get_probes(9)
688.96 ms

 DOID:1686 glaucoma
zero-size array to reduction operation maximum which has no identity
28

Started DOID_1793 analysis
288.30 ms in get_analysis_df("pancreatic_cancer == ..., "pancreatic_cancer_con..., '')
289.12 ms in Loading dataframe for DOID_1793
Matching sources: 28
Excluded 16 sources as single-class
Excluded 3 sources by min samples
Stats: 9 sources, 9 series, 5 platforms, 250 samples
Loading data and calculating fold change for DOID_1793
Loading data for GSE14245, id = 1080
  1.34 s in get_probes(4)
  3.49 s in load_gse( series_id platform_id..., 1080, False)
538.67 ms in get_gene_fold_change(<Gse GSE14245>, False, 0, None)
Loading data for GSE15932, id = 1550
  1.43 s in get_probes(4)
  4.54 s in load_gse( series_id platform_id..., 1550, False)
555.69 ms in get_gene_fold_change(<Gse GSE15932>, False, 0, None)
Loading data for GSE19279, id = 2400
917.46 ms in get_probes(2)
  1.76 s in load_gse( series_id platform_id..., 2400, False)
211.81 ms in get_gene_fold_change(<Gse GSE19279>, False, 0, None)
Loading data for GSE19650, id = 2485
  1.21 s in get_probes(4)
  3.41 s

 DOID:1793 pancreatic cancer
29

Started DOID_1909 analysis
565.47 ms in get_analysis_df("melanoma == 'melanoma'", "melanoma_control == '..., '')
566.33 ms in Loading dataframe for DOID_1909
Matching sources: 47
Excluded 36 sources as single-class
Excluded 1 source by min samples
Stats: 10 sources, 8 series, 5 platforms, 390 samples
Loading data and calculating fold change for DOID_1909
Loading data for GSE15605, id = 1479
  1.26 s in get_probes(4)
  7.72 s in load_gse( series_id platform_id..., 1479, False)
635.99 ms in get_gene_fold_change(<Gse GSE15605>, False, 0, None)
Loading data for GSE3189, id = 5563
900.72 ms in get_probes(2)
  3.20 s in load_gse( series_id platform_id..., 5563, False)
443.55 ms in get_gene_fold_change(<Gse GSE3189>, False, 0, None)
Loading data for GSE44660, id = 8515
  1.16 s in get_probes(280)
  2.19 s in load_gse( series_id platform_id..., 8515, False)
400.79 ms in get_gene_fold_change(<Gse GSE44660>, False, 0, None)
Loading data for GSE46517, id = 8884
898.16 ms in get_probes(2)
  5.71 s

 DOID:1909 melanoma
30

Started DOID_1936 analysis
325.60 ms in get_analysis_df("atherosclerosis == 'a..., "atherosclerosis_contr..., '')
326.42 ms in Loading dataframe for DOID_1936
Matching sources: 6
Excluded 1 source as single-class
Excluded 1 source by min samples
Stats: 4 sources, 4 series, 4 platforms, 346 samples
Loading data and calculating fold change for DOID_1936
Loading data for GSE20129, id = 2612
867.49 ms in get_probes(3)
  5.49 s in load_gse( series_id platform_id..., 2612, False)
780.51 ms in get_gene_fold_change(<Gse GSE20129>, False, 0, None)
Loading data for GSE23746, id = 3545
962.11 ms in get_probes(74)
  3.53 s in load_gse( series_id platform_id..., 3545, False)
546.82 ms in get_gene_fold_change(<Gse GSE23746>, False, 0, None)
Loading data for GSE37356, id = 6873
  1.30 s in get_probes(674)
  6.28 s in load_gse( series_id platform_id..., 6873, False)
801.87 ms in get_gene_fold_change(<Gse GSE37356>, False, 0, None)
Loading data for GSE9874, id = 11861
840.46 ms in get_probes(2)
  2.94 

 DOID:1936 atherosclerosis
31

Started DOID_219 analysis
277.76 ms in get_analysis_df("colon_cancer == 'colo..., "colon_cancer_control ..., '')
278.57 ms in Loading dataframe for DOID_219
Matching sources: 5
Excluded 1 source as single-class
Stats: 4 sources, 3 series, 4 platforms, 275 samples
Loading data and calculating fold change for DOID_219
Loading data for GSE36400, id = 6646
  1.16 s in get_probes(280)
  1.97 s in load_gse( series_id platform_id..., 6646, False)
365.17 ms in get_gene_fold_change(<Gse GSE36400>, False, 0, None)
Loading data for GSE7466, id = 11249
 62.63 ms in get_probes(821)
 63.98 ms in get_probes(823)
  2.30 s in load_gse( series_id platform_id..., 11249, False)
 91.55 ms in get_gene_fold_change(<Gse GSE7466>, False, 0, None)
Loading data for GSE68468, id = 13736
929.66 ms in get_probes(2)
 13.91 s in load_gse( series_id platform_id..., 13736, False)
  1.42 s in get_gene_fold_change(<Gse GSE68468>, False, 0, None)
 20.20 s in Load/fold for DOID_219
Meta-Analyzing DOID_219
 29.83 s in meta 

 DOID:219 colon cancer
32

Started DOID_2377 analysis
540.03 ms in get_analysis_df("MS == 'MS'", "MS_control == 'MS_con..., '')
540.87 ms in Loading dataframe for DOID_2377
Matching sources: 27
Excluded 3 sources as single-class
Excluded 2 sources by min samples
Stats: 22 sources, 21 series, 9 platforms, 1781 samples
Loading data and calculating fold change for DOID_2377
Loading data for GSE10064, id = 17
967.07 ms in get_probes(12)
  2.62 s in load_gse( series_id platform_id..., 17, False)
254.36 ms in get_gene_fold_change(<Gse GSE10064>, False, 0, None)
Loading data for GSE13732, id = 973
  1.29 s in get_probes(4)
 12.46 s in load_gse( series_id platform_id..., 973, False)
  1.52 s in get_gene_fold_change(<Gse GSE13732>, False, 0, None)
Loading data for GSE14895, id = 1280
807.12 ms in get_probes(9)
932.49 ms in get_probes(2)
  4.26 s in load_gse( series_id platform_id..., 1280, False)
548.35 ms in get_gene_fold_change(<Gse GSE14895>, False, 0, None)
Loading data for GSE16461, id = 1688
  1.41 s in get_probes(

 DOID:2377 multiple sclerosis
zero-size array to reduction operation maximum which has no identity

Started DOID_2394 analysis
200.19 ms in get_analysis_df("ovarian_cancer == 'ov..., "ovarian_cancer_contro..., '')
201.03 ms in Loading dataframe for DOID_2394
Matching sources: 4
Excluded 1 source as single-class
Stats: 3 sources, 3 series, 2 platforms, 123 samples
Loading data and calculating fold change for DOID_2394
Loading data for GSE29220, id = 4918
  1.32 s in get_probes(4)
  3.89 s in load_gse( series_id platform_id..., 4918, False)
531.84 ms in get_gene_fold_change(<Gse GSE29220>, False, 0, None)
Loading data for GSE31682, id = 5516
833.20 ms in get_probes(60)
  3.91 s in load_gse( series_id platform_id..., 5516, False)
449.88 ms in get_gene_fold_change(<Gse GSE31682>, False, 0, None)
Loading data for GSE38666, id = 7137
  1.33 s in get_probes(4)
  5.39 s in load_gse( series_id platform_id..., 7137, False)
747.58 ms in get_gene_fold_change(<Gse GSE38666>, False, 0, None)
 15.11 s in Load/fold for DOID_2394
Meta-Analyzing DOID_2394
 52.96 s in meta analysis of real data for DOI


33 DOID:2394 ovarian cancer
34

Started DOID_2531 analysis
  1.15 s in get_analysis_df("AML_Tissue == 'AML_Ti..., "AML_Control == 'AML_C..., '')
  1.15 s in Loading dataframe for DOID_2531
Matching sources: 95
Excluded 83 sources as single-class
Stats: 12 sources, 12 series, 4 platforms, 746 samples
Loading data and calculating fold change for DOID_2531
Loading data for GSE10746, id = 198
  1.30 s in get_probes(4)
  2.82 s in load_gse( series_id platform_id..., 198, False)
418.33 ms in get_gene_fold_change(<Gse GSE10746>, False, 0, None)
Loading data for GSE13591, id = 938
883.66 ms in get_probes(2)
  6.91 s in load_gse( series_id platform_id..., 938, False)
834.54 ms in get_gene_fold_change(<Gse GSE13591>, False, 0, None)
Loading data for GSE17054, id = 1838
  1.38 s in get_probes(4)
  3.38 s in load_gse( series_id platform_id..., 1838, False)
454.56 ms in get_gene_fold_change(<Gse GSE17054>, False, 0, None)
Loading data for GSE24870, id = 3848
978.61 ms in get_probes(9)
  2.61 s in load_gse( series_id platform_id..

 DOID:2531 hematologic cancer
35

Started DOID_263 analysis
597.19 ms in get_analysis_df("RCC == 'RCC'", "RCC_Control == 'RCC_C..., '')
598.06 ms in Loading dataframe for DOID_263
Matching sources: 60
Excluded 37 sources as single-class
Excluded 2 sources by min samples
Stats: 21 sources, 17 series, 13 platforms, 1536 samples
Loading data and calculating fold change for DOID_263
Loading data for GSE12606, id = 685
  1.33 s in get_probes(4)
  2.81 s in load_gse( series_id platform_id..., 685, False)
414.69 ms in get_gene_fold_change(<Gse GSE12606>, False, 0, None)
Loading data for GSE14762, id = 1230
 31.46 ms in get_probes(471)
804.43 ms in load_gse( series_id platform_id..., 1230, False)
 56.52 ms in get_gene_fold_change(<Gse GSE14762>, False, 0, None)
Loading data for GSE15641, id = 1490
855.65 ms in get_probes(2)
  3.78 s in load_gse( series_id platform_id..., 1490, False)
536.50 ms in get_gene_fold_change(<Gse GSE15641>, False, 0, None)
Loading data for GSE17816, id = 2030
498.95 ms in get_probes(588)
  2.15 s in l

 DOID:263 kidney cancer
36

Started DOID_2841 analysis
623.87 ms in get_analysis_df("asthma == 'asthma'", "asthma_control == 'as..., '')
624.73 ms in Loading dataframe for DOID_2841
Matching sources: 13
Stats: 13 sources, 12 series, 9 platforms, 1384 samples
Loading data and calculating fold change for DOID_2841
Loading data for GSE19187, id = 2377
  1.01 s in get_probes(283)
  3.35 s in load_gse( series_id platform_id..., 2377, False)
308.37 ms in get_gene_fold_change(<Gse GSE19187>, False, 0, None)
Loading data for GSE27011, id = 4384
  1.01 s in get_probes(283)
  4.53 s in load_gse( series_id platform_id..., 4384, False)
492.18 ms in get_gene_fold_change(<Gse GSE27011>, False, 0, None)
Loading data for GSE27876, id = 4561
  1.27 s in get_probes(53)
  2.88 s in load_gse( series_id platform_id..., 4561, False)
342.04 ms in get_gene_fold_change(<Gse GSE27876>, False, 0, None)
Loading data for GSE44037, id = 8399
  1.50 s in get_probes(901)
  4.72 s in load_gse( series_id platform_id..., 8399, False)
560.15 ms in 

 DOID:2841 asthma
zero-size array to reduction operation maximum which has no identity
37

Started DOID_2986 analysis
215.51 ms in get_analysis_df("IgA_nephropathy == 'I..., "IgA_nephropathy_contr..., '')
216.45 ms in Loading dataframe for DOID_2986
Matching sources: 6
Stats: 6 sources, 5 series, 3 platforms, 161 samples
Loading data and calculating fold change for DOID_2986
Loading data for GSE14795, id = 1241
886.79 ms in get_probes(2)
  1.86 s in load_gse( series_id platform_id..., 1241, False)
267.20 ms in get_gene_fold_change(<Gse GSE14795>, False, 0, None)
Loading data for GSE35487, id = 6431
886.33 ms in get_probes(2)
  2.36 s in load_gse( series_id platform_id..., 6431, False)
317.08 ms in get_gene_fold_change(<Gse GSE35487>, False, 0, None)
Loading data for GSE35488, id = 6432
664.08 ms in get_probes(1019)
  1.52 s in load_gse( series_id platform_id..., 6432, False)
186.09 ms in get_gene_fold_change(<Gse GSE35488>, False, 0, None)
Loading data for GSE35489, id = 6433
565.46 ms in get_probes(1019)
  1.04 s in get_probes(2)
  3.90 s in load_gse( series_id platform_id.

 DOID:2986 IgA glomerulonephritis
38

Started DOID_2998 analysis
226.34 ms in get_analysis_df("testicular_cancer == ..., "testicular_cancer_con..., '')
227.18 ms in Loading dataframe for DOID_2998
Matching sources: 0
235.99 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:2998 testicular cancer
'Int64Index' object has no attribute 'labels'

Started DOID_3083 analysis
653.35 ms in get_analysis_df("COPD == 'COPD'", "COPD_control == 'COPD..., '')
654.19 ms in Loading dataframe for DOID_3083
Matching sources: 22
Excluded 4 sources as single-class
Stats: 18 sources, 17 series, 7 platforms, 1720 samples
Loading data and calculating fold change for DOID_3083
Loading data for GSE10006, id = 4
  1.31 s in get_probes(4)
  8.83 s in load_gse( series_id platform_id..., 4, False)
  1.29 s in get_gene_fold_change(<Gse GSE10006>, False, 0, None)
Loading data for GSE11784, id = 450
  1.27 s in get_probes(4)
 14.70 s in load_gse( series_id platform_id..., 450, False)
  1.96 s in get_gene_fold_change(<Gse GSE11784>, False, 0, None)
Loading data for GSE12472, id = 652
  1.08 s in get_probes(26)
  2.59 s in load_gse( series_id platform_id..., 652, False)
640.51 ms in get_gene_fold_change(<Gse GSE12472>, False, 0, None)
Loading data for GSE16972, id = 1810
979.20 ms in get_probes(2)
  1.70 s in load_gse( series_id platform_id..., 1810, False)


39 DOID:3083 chronic obstructive pulmonary disease
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE37768/GSE37768_series_matrix.txt.gz ...
Loading URL

  1.51 s in load_gse( series_id platform_id..., 6962, False)
 85.68 s in Load/fold for DOID_3083
 86.37 s in perform_analysis({'analysis_name': 'DOI...)


 ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE37768/GSE37768-GPL570_series_matrix.txt.gz ...
Can't find matrix file for series 6962, platform 4

Started DOID_3310 analysis
193.82 ms in get_analysis_df("eczema == 'eczema'", "eczema_control == 'ec..., '')
194.66 ms in Loading dataframe for DOID_3310
Matching sources: 8
Excluded 2 sources as single-class
Stats: 6 sources, 5 series, 5 platforms, 129 samples
Loading data and calculating fold change for DOID_3310
Loading data for GSE12511, id = 664
  1.01 s in get_probes(367)
  3.20 s in load_gse( series_id platform_id..., 664, False)
391.44 ms in get_gene_fold_change(<Gse GSE12511>, False, 0, None)
Loading data for GSE13709, id = 966
460.53 ms in get_probes(414)
367.77 ms in get_probes(415)
  1.56 s in load_gse( series_id platform_id..., 966, False)
191.78 ms in get_gene_fold_change(<Gse GSE13709>, False, 0, None)
Loading data for GSE16161, id = 1619
  1.36 s in get_probes(4)
  3.61 s in load_gse( series_id platform_id..., 1619, False)
516.51 ms in get_gene_fold_change(<Gse GSE16161>, False, 0, None)
Loading data for GSE26952, id = 4370
888.34 ms in get_probes(74)
  1.76 s in load_g


40 DOID:3310 atopic dermatitis
41

Started DOID_3312 analysis
355.53 ms in get_analysis_df("bipolar_disorder == '..., "bipolar_disorder_cont..., '')
356.34 ms in Loading dataframe for DOID_3312
Matching sources: 15
Excluded 6 sources as single-class
Stats: 9 sources, 9 series, 6 platforms, 438 samples
Loading data and calculating fold change for DOID_3312
Loading data for GSE11767, id = 446
950.60 ms in get_probes(9)
  1.56 s in load_gse( series_id platform_id..., 446, False)
213.66 ms in get_gene_fold_change(<Gse GSE11767>, False, 0, None)
Loading data for GSE23848, id = 3566
  1.07 s in get_probes(343)
  4.17 s in load_gse( series_id platform_id..., 3566, False)
445.74 ms in get_gene_fold_change(<Gse GSE23848>, False, 0, None)
Loading data for GSE39653, id = 7363
  1.17 s in get_probes(674)
  5.45 s in load_gse( series_id platform_id..., 7363, False)
484.57 ms in get_gene_fold_change(<Gse GSE39653>, False, 0, None)
Loading data for GSE46449, id = 8868
  1.36 s in get_probes(4)
 10.17 s in load_gse( series_id platform_

 DOID:3312 bipolar disorder
42

Started DOID_332 analysis
239.49 ms in get_analysis_df("ALS == 'ALS'", "ALS_control == 'ALS_c..., '')
240.33 ms in Loading dataframe for DOID_332
Matching sources: 11
Excluded 1 source as single-class
Stats: 10 sources, 10 series, 7 platforms, 171 samples
Loading data and calculating fold change for DOID_332
Loading data for GSE18920, id = 2315
  3.28 s in get_probes(54)
 20.33 s in load_gse( series_id platform_id..., 2315, False)
  6.51 s in get_gene_fold_change(<Gse GSE18920>, False, 0, None)
Loading data for GSE19332, id = 2413
  1.21 s in get_probes(4)
  2.60 s in load_gse( series_id platform_id..., 2413, False)
397.74 ms in get_gene_fold_change(<Gse GSE19332>, False, 0, None)
Loading data for GSE26276, id = 4160
917.01 ms in get_probes(283)
  1.85 s in load_gse( series_id platform_id..., 4160, False)
230.39 ms in get_gene_fold_change(<Gse GSE26276>, False, 0, None)
Loading data for GSE28253, id = 4659
  1.20 s in get_probes(10)
  3.58 s in load_gse( series_id platform_id..., 4659,

 DOID:332 amyotrophic lateral sclerosis
43

Started DOID_3393 analysis
386.49 ms in get_analysis_df("CAD == 'CAD'", "CAD_control == 'CAD_c..., '')
387.34 ms in Loading dataframe for DOID_3393
Matching sources: 8
Excluded 1 source as single-class
Stats: 7 sources, 7 series, 6 platforms, 524 samples
Loading data and calculating fold change for DOID_3393
Loading data for GSE10195, id = 57
  1.08 s in get_probes(26)
  4.83 s in load_gse( series_id platform_id..., 57, False)
546.55 ms in get_gene_fold_change(<Gse GSE10195>, False, 0, None)
Loading data for GSE12288, id = 598
919.80 ms in get_probes(2)
  7.65 s in load_gse( series_id platform_id..., 598, False)
  1.37 s in get_gene_fold_change(<Gse GSE12288>, False, 0, None)
Loading data for GSE18608, id = 2234
  1.32 s in get_probes(4)
  2.61 s in load_gse( series_id platform_id..., 2234, False)
529.09 ms in get_gene_fold_change(<Gse GSE18608>, False, 0, None)
Loading data for GSE23561, id = 3492
  1.21 s in get_probes(780)
  3.93 s in load_gse( series_id platform_id..., 3492, False)

 DOID:3393 coronary artery disease
44

Started DOID_418 analysis
244.28 ms in get_analysis_df("systemic_scleroderma ..., "systemic_scleroderma_..., '')
245.13 ms in Loading dataframe for DOID_418
Matching sources: 10
Excluded 4 sources as single-class
Excluded 2 sources by min samples
Stats: 4 sources, 4 series, 3 platforms, 157 samples
Loading data and calculating fold change for DOID_418
Loading data for GSE3886, id = 7181
101.21 ms in get_probes(1119)
  1.19 s in load_gse( series_id platform_id..., 7181, False)
363.93 ms in get_gene_fold_change(<Gse GSE3886>, False, 0, None)
Loading data for GSE4385, id = 8361
127.95 ms in get_probes(1119)
  1.20 s in load_gse( series_id platform_id..., 8361, False)
 70.71 ms in get_gene_fold_change(<Gse GSE4385>, False, 0, None)
Loading data for GSE45536, id = 8688
  1.25 s in get_probes(4)
 10.78 s in load_gse( series_id platform_id..., 8688, False)
  1.61 s in get_gene_fold_change(<Gse GSE45536>, False, 0, None)
Loading data for GSE63903, id = 13309
  1.21 s in get_probes(674)
  2.85 

 DOID:418 systemic scleroderma
45

Started DOID_4481 analysis
300.21 ms in get_analysis_df("allergic_rhinitis == ..., "allergic_rhinitis_con..., '')
301.08 ms in Loading dataframe for DOID_4481
Matching sources: 12
Stats: 12 sources, 11 series, 10 platforms, 352 samples
Loading data and calculating fold change for DOID_4481
Loading data for GSE19187, id = 2377
765.00 ms in get_probes(283)
  3.20 s in load_gse( series_id platform_id..., 2377, False)
377.25 ms in get_gene_fold_change(<Gse GSE19187>, False, 0, None)
Loading data for GSE1964, id = 2481
161.58 ms in get_probes(654)
635.82 ms in load_gse( series_id platform_id..., 2481, False)
 67.70 ms in get_gene_fold_change(<Gse GSE1964>, False, 0, None)
Loading data for GSE37146, id = 6813
952.29 ms in get_probes(17)
  3.32 s in load_gse( series_id platform_id..., 6813, False)
344.17 ms in get_gene_fold_change(<Gse GSE37146>, False, 0, None)
Loading data for GSE37155, id = 6815
  1.02 s in get_probes(674)
  2.51 s in load_gse( series_id platform_id..., 6815, False)
309.24

 DOID:4481 allergic rhinitis
46

Started DOID_4606 analysis
349.83 ms in get_analysis_df("bile_duct_cancer == '..., "bile_duct_cancer_cont..., '')
350.68 ms in Loading dataframe for DOID_4606
Matching sources: 0
368.21 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:4606 bile duct cancer
'Int64Index' object has no attribute 'labels'
47

Started DOID_5419 analysis
635.49 ms in get_analysis_df("schizophrenia == 'sch..., "Psychiatric_Control =..., '')
636.33 ms in Loading dataframe for DOID_5419
Matching sources: 27
Excluded 4 sources as single-class
Stats: 23 sources, 22 series, 12 platforms, 1420 samples
Loading data and calculating fold change for DOID_5419
Loading data for GSE12649, id = 702
706.18 ms in get_probes(2)
  4.64 s in load_gse( series_id platform_id..., 702, False)
611.05 ms in get_gene_fold_change(<Gse GSE12649>, False, 0, None)
Loading data for GSE12654, id = 704
662.98 ms in get_probes(7)
  1.94 s in load_gse( series_id platform_id..., 704, False)
185.21 ms in get_gene_fold_change(<Gse GSE12654>, False, 0, None)
Loading data for GSE17612, id = 1980
  1.27 s in get_probes(4)
  5.54 s in load_gse( series_id platform_id..., 1980, False)
871.32 ms in get_gene_fold_change(<Gse GSE17612>, False, 0, None)
Loading data for GSE18312, id = 2160
  1.14 s in get_probes(280)
  2.50 s in load_gse( series_id platform

 DOID:5419 schizophrenia
zero-size array to reduction operation maximum which has no identity

Started DOID_635 analysis
478.75 ms in get_analysis_df("HIV_Stage1 == 'HIV_St..., "HIV_healthycontrol ==..., '')
479.60 ms in Loading dataframe for DOID_635
Matching sources: 27
Excluded 22 sources as single-class
Excluded 1 source by min samples
Stats: 4 sources, 4 series, 4 platforms, 188 samples
Loading data and calculating fold change for DOID_635
Loading data for GSE16363, id = 1658
  1.21 s in get_probes(4)
  5.47 s in load_gse( series_id platform_id..., 1658, False)
883.49 ms in get_gene_fold_change(<Gse GSE16363>, False, 0, None)
Loading data for GSE30310, id = 5201
333.79 ms in get_probes(616)
  1.68 s in load_gse( series_id platform_id..., 5201, False)
150.36 ms in get_gene_fold_change(<Gse GSE30310>, False, 0, None)
Loading data for GSE52900, id = 9963
  1.17 s in get_probes(389)
  2.94 s in load_gse( series_id platform_id..., 9963, False)
324.08 ms in get_gene_fold_change(<Gse GSE52900>, False, 0, None)
Loading data for GSE6740, id = 11026
893.68 ms in get_probes(2)
  3.17 


48 DOID:635 acquired immunodeficiency syndrome
49

Started DOID_7147 analysis
179.97 ms in get_analysis_df("ankylosing_spondyliti..., "ankylosing_spondyliti..., '')
180.77 ms in Loading dataframe for DOID_7147
Matching sources: 3
Excluded 1 source by min samples
Stats: 2 sources, 2 series, 2 platforms, 65 samples
Loading data and calculating fold change for DOID_7147
Loading data for GSE11886, id = 479
  1.15 s in get_probes(4)
  4.75 s in load_gse( series_id platform_id..., 479, False)
659.79 ms in get_gene_fold_change(<Gse GSE11886>, False, 0, None)
Loading data for GSE25101, id = 3902
863.48 ms in get_probes(389)
  2.16 s in load_gse( series_id platform_id..., 3902, False)
438.96 ms in get_gene_fold_change(<Gse GSE25101>, False, 0, None)
  8.14 s in Load/fold for DOID_7147
Meta-Analyzing DOID_7147
 28.80 s in meta analysis of real data for DOID_7147
 50.41 ms in meta analysis of permutations for DOID_7147
 15.97 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 28.94 s

 DOID:7147 ankylosing spondylitis
50

Started DOID_7148 analysis
336.34 ms in get_analysis_df("RA == 'RA'", "RA_control == 'RA_con..., '')
337.18 ms in Loading dataframe for DOID_7148
Matching sources: 21
Excluded 21 sources as single-class
348.63 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:7148 rheumatoid arthritis
'Int64Index' object has no attribute 'labels'

Started DOID_7693 analysis
168.72 ms in get_analysis_df("AAA == 'AAA'", "AAA_control == 'AAA_c..., '')
169.54 ms in Loading dataframe for DOID_7693
Matching sources: 5
Excluded 1 source by min samples
Stats: 4 sources, 4 series, 3 platforms, 104 samples
Loading data and calculating fold change for DOID_7693
Loading data for GSE21803, id = 3020
 64.65 ms in get_probes(724)
243.24 ms in load_gse( series_id platform_id..., 3020, False)
 55.55 ms in get_gene_fold_change(<Gse GSE21803>, False, 0, None)
Loading data for GSE47472, id = 9057
  1.23 s in get_probes(674)
  3.10 s in load_gse( series_id platform_id..., 9057, False)
438.02 ms in get_gene_fold_change(<Gse GSE47472>, False, 0, None)
Loading data for GSE7084, id = 11146
950.61 ms in get_probes(39)
  2.53 s in load_gse( series_id platform_id..., 11146, False)
240.00 ms in get_gene_fold_change(<Gse GSE7084>, False, 0, None)
Loading data for GSE57691, id = 12645
  1.05 s in get_probes(674)
  5.84 s in load_gse( series_id platform_id...,


51 DOID:7693 abdominal aortic aneurysm
52

Started DOID_784 analysis
161.59 ms in get_analysis_df("CKD == 'CKD'", "CKD_Control == 'CKD_C..., '')
162.42 ms in Loading dataframe for DOID_784
Matching sources: 2
Excluded 1 source as single-class
FAIL Can't perform meta-analysis on single source
181.55 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:784 chronic kidney failure
53

Started DOID_824 analysis
133.08 ms in get_analysis_df("periodontitis == 'per..., "periodontitis_control..., '')
133.92 ms in Loading dataframe for DOID_824
Matching sources: 3
Excluded 1 source by min samples
Stats: 2 sources, 2 series, 2 platforms, 33 samples
Loading data and calculating fold change for DOID_824
Loading data for GSE27993, id = 4587
857.93 ms in get_probes(283)
  1.83 s in load_gse( series_id platform_id..., 4587, False)
239.18 ms in get_gene_fold_change(<Gse GSE27993>, False, 0, None)
Loading data for GSE43525, id = 8303
  1.17 s in get_probes(674)
  3.56 s in load_gse( series_id platform_id..., 8303, False)
413.84 ms in get_gene_fold_change(<Gse GSE43525>, False, 0, None)
  6.12 s in Load/fold for DOID_824
Meta-Analyzing DOID_824
 45.06 s in meta analysis of real data for DOID_824
 39.23 ms in meta analysis of permutations for DOID_824
 20.98 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 45.19 s in

 DOID:824 periodontitis
54

Started DOID_8577 analysis
362.75 ms in get_analysis_df("UC == 'UC'", "UC_control == 'UC_con..., '')
363.59 ms in Loading dataframe for DOID_8577
Matching sources: 18
Excluded 1 source as single-class
Stats: 17 sources, 17 series, 12 platforms, 838 samples
Loading data and calculating fold change for DOID_8577
Loading data for GSE10191, id = 56
739.80 ms in get_probes(25)
  2.02 s in load_gse( series_id platform_id..., 56, False)
272.20 ms in get_gene_fold_change(<Gse GSE10191>, False, 0, None)
Loading data for GSE10616, id = 169
890.92 ms in get_probes(25)
  3.58 s in load_gse( series_id platform_id..., 169, False)
293.58 ms in get_gene_fold_change(<Gse GSE10616>, False, 0, None)
Loading data for GSE11223, id = 317
  1.02 s in get_probes(26)
 13.67 s in load_gse( series_id platform_id..., 317, False)
  1.86 s in get_gene_fold_change(<Gse GSE11223>, False, 0, None)
Loading data for GSE1710, id = 1852
794.39 ms in get_probes(18)
  3.17 s in load_gse( series_id platform_id..., 1852, Fals

 DOID:8577 ulcerative colitis
55

Started DOID_8778 analysis
436.67 ms in get_analysis_df("CD == 'CD'", "CD_control == 'CD_con..., '')
437.49 ms in Loading dataframe for DOID_8778
Matching sources: 20
Excluded 1 source as single-class
Excluded 7 sources by min samples
Stats: 12 sources, 12 series, 11 platforms, 743 samples
Loading data and calculating fold change for DOID_8778
Loading data for GSE10616, id = 169
951.35 ms in get_probes(25)
  3.60 s in load_gse( series_id platform_id..., 169, False)
412.19 ms in get_gene_fold_change(<Gse GSE10616>, False, 0, None)
Loading data for GSE1710, id = 1852
827.64 ms in get_probes(18)
  3.14 s in load_gse( series_id platform_id..., 1852, False)
227.16 ms in get_gene_fold_change(<Gse GSE1710>, False, 0, None)
Loading data for GSE17594, id = 1976
  1.11 s in get_probes(10)
  2.58 s in load_gse( series_id platform_id..., 1976, False)
344.50 ms in get_gene_fold_change(<Gse GSE17594>, False, 0, None)
Loading data for GSE20881, id = 2803
991.35 ms in get_probes(26)
 11.40 s in load_g

 DOID:8778 Crohn's disease
56

Started DOID_8893 analysis
335.54 ms in get_analysis_df("psoriasis == 'psorias..., "psoriasis_control == ..., '')
336.40 ms in Loading dataframe for DOID_8893
Matching sources: 10
Excluded 2 sources as single-class
Excluded 1 source by min samples
Stats: 7 sources, 7 series, 6 platforms, 313 samples
Loading data and calculating fold change for DOID_8893
Loading data for GSE32407, id = 5703
975.53 ms in get_probes(9)
  3.40 s in load_gse( series_id platform_id..., 5703, False)
419.71 ms in get_gene_fold_change(<Gse GSE32407>, False, 0, None)
Loading data for GSE47598, id = 9087
  1.05 s in get_probes(674)
  3.44 s in load_gse( series_id platform_id..., 9087, False)
429.91 ms in get_gene_fold_change(<Gse GSE47598>, False, 0, None)
Loading data for GSE57225, id = 10556
  1.02 s in get_probes(1010)
  6.17 s in load_gse( series_id platform_id..., 10556, False)
580.42 ms in get_gene_fold_change(<Gse GSE57225>, False, 0, None)
Loading data for GSE57383, id = 12627
  1.36 s in get_probes(901)


 DOID:8893 psoriasis
zero-size array to reduction operation maximum which has no identity
57

Started DOID_8986 analysis
149.87 ms in get_analysis_df("narcolepsy == 'narcol..., "narcolepsy_control ==..., '')
150.70 ms in Loading dataframe for DOID_8986
Matching sources: 1
FAIL Can't perform meta-analysis on single source
169.90 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:8986 narcolepsy
58

Started DOID_9008 analysis
198.31 ms in get_analysis_df("PsA == 'PsA'", "PsA_control == 'PsA_c..., '')
199.15 ms in Loading dataframe for DOID_9008
Matching sources: 3
Stats: 3 sources, 3 series, 2 platforms, 141 samples
Loading data and calculating fold change for DOID_9008
Loading data for GSE57383, id = 12627
  1.41 s in get_probes(901)
 10.38 s in load_gse( series_id platform_id..., 12627, False)
834.15 ms in get_gene_fold_change(<Gse GSE57383>, False, 0, None)
Loading data for GSE57405, id = 12629
  1.43 s in get_probes(901)
 10.22 s in load_gse( series_id platform_id..., 12629, False)
820.03 ms in get_gene_fold_change(<Gse GSE57405>, False, 0, None)
Loading data for GSE61281, id = 12998
  1.07 s in get_probes(53)
  5.20 s in load_gse( series_id platform_id..., 12998, False)
456.96 ms in get_gene_fold_change(<Gse GSE61281>, False, 0, None)
 28.10 s in Load/fold for DOID_9008
Meta-Analyzing DOID_9008
 55.08 s in meta analysis of real data for DOID_9008
 72.14 ms in meta analysis of

 DOID:9008 psoriatic arthritis
59

Started DOID_9074 analysis
580.93 ms in get_analysis_df("SLE == 'SLE'", "SLE_control == 'SLE_c..., '')
581.75 ms in Loading dataframe for DOID_9074
Matching sources: 21
Excluded 21 sources as single-class
593.50 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:9074 systemic lupus erythematosus
'Int64Index' object has no attribute 'labels'

Started DOID_9206 analysis
236.54 ms in get_analysis_df("BE_Tissue == 'BE_Tiss..., "EAC_Non_Tumor == 'EAC..., '')
237.37 ms in Loading dataframe for DOID_9206
Matching sources: 16
Excluded 10 sources as single-class
Excluded 2 sources by min samples
Stats: 4 sources, 4 series, 3 platforms, 155 samples
Loading data and calculating fold change for DOID_9206
Loading data for GSE34619, id = 6215
944.92 ms in get_probes(283)
  2.48 s in load_gse( series_id platform_id..., 6215, False)
284.45 ms in get_gene_fold_change(<Gse GSE34619>, False, 0, None)
Loading data for GSE36223, id = 6608
939.19 ms in get_probes(9)
  1.96 s in load_gse( series_id platform_id..., 6608, False)
362.50 ms in get_gene_fold_change(<Gse GSE36223>, False, 0, None)
Loading data for GSE39491, id = 7332
876.84 ms in get_probes(9)
  5.14 s in load_gse( series_id platform_id..., 7332, False)
577.12 ms in get_gene_fold_change(<Gse GSE39491>, False, 0, None)
Loading data for GSE6059, id = 10854
 63.37 ms in get_probes(1512)



60 DOID:9206 Barrett's esophagus
61

Started DOID_9296 analysis
144.14 ms in get_analysis_df("cleft_lip == 'cleft_l..., "cleft_lip_control == ..., '')
145.01 ms in Loading dataframe for DOID_9296
Matching sources: 1
FAIL Can't perform meta-analysis on single source
164.53 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:9296 cleft lip
62

Started DOID_9352 analysis
417.92 ms in get_analysis_df("T2D == 'T2D'", "T2D_control == 'T2D_c..., '')
418.72 ms in Loading dataframe for DOID_9352
Matching sources: 36
Excluded 4 sources as single-class
Excluded 1 source by min samples
Stats: 31 sources, 25 series, 19 platforms, 854 samples
Loading data and calculating fold change for DOID_9352
Loading data for GSE121, id = 539
425.79 ms in get_probes(338)
458.62 ms in get_probes(339)
367.50 ms in get_probes(22)
463.59 ms in get_probes(340)
426.80 ms in get_probes(341)
  4.06 s in load_gse( series_id platform_id..., 539, False)
373.92 ms in get_gene_fold_change(<Gse GSE121>, False, 0, None)
Loading data for GSE12643, id = 697
675.59 ms in get_probes(7)
  1.35 s in load_gse( series_id platform_id..., 697, False)
171.54 ms in get_gene_fold_change(<Gse GSE12643>, False, 0, None)
Loading data for GSE13015, id = 796
983.64 ms in get_probes(343)
  1.10 s in get_probes(389)
 11.16 s in load_gse( series_id platform_id..., 796, False)
553.52 m

 DOID:9352 type 2 diabetes mellitus
zero-size array to reduction operation maximum which has no identity

Started DOID_9744 analysis
391.96 ms in get_analysis_df("T1D == 'T1D'", "T1D_control == 'T1D_c..., '')
392.79 ms in Loading dataframe for DOID_9744
Matching sources: 13
Excluded 4 sources as single-class
Stats: 9 sources, 8 series, 6 platforms, 384 samples
Loading data and calculating fold change for DOID_9744
Loading data for GSE10586, id = 157
  1.25 s in get_probes(4)
  4.10 s in load_gse( series_id platform_id..., 157, False)
569.46 ms in get_gene_fold_change(<Gse GSE10586>, False, 0, None)
Loading data for GSE11907, id = 488
731.48 ms in get_probes(50)
  7.94 s in load_gse( series_id platform_id..., 488, False)
232.31 ms in get_gene_fold_change(<Gse GSE11907>, False, 0, None)
Loading data for GSE14368, id = 1122
922.85 ms in get_probes(331)
  1.79 s in load_gse( series_id platform_id..., 1122, False)
249.47 ms in get_gene_fold_change(<Gse GSE14368>, False, 0, None)
Loading data for GSE24147, id = 3647
  1.33 s in get_probes(4)
  5.87 s in load_gse( series_id platform_id..., 3647, 


63 DOID:9744 type 1 diabetes mellitus
64

Started DOID_986 analysis
172.14 ms in get_analysis_df("alopecia_areata == 'a..., "alopecia_areata_contr..., '')
172.96 ms in Loading dataframe for DOID_986
Matching sources: 2
Stats: 2 sources, 2 series, 1 platforms, 17 samples
Loading data and calculating fold change for DOID_986
Loading data for GSE45512, id = 8684
  1.34 s in get_probes(4)
  2.94 s in load_gse( series_id platform_id..., 8684, False)
403.15 ms in get_gene_fold_change(<Gse GSE45512>, False, 0, None)
Loading data for GSE58573, id = 10697
  1.13 s in get_probes(4)
  2.42 s in load_gse( series_id platform_id..., 10697, False)
374.16 ms in get_gene_fold_change(<Gse GSE58573>, False, 0, None)
  6.23 s in Load/fold for DOID_986
Meta-Analyzing DOID_986
 48.12 s in meta analysis of real data for DOID_986
 56.62 ms in meta analysis of permutations for DOID_986
 19.79 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 48.26 s in Meta analysis for DOID_986
DONE DOI

 DOID:986 alopecia areata
65

Started DOID_9970 analysis
455.93 ms in get_analysis_df("obesity == 'obesity'", "obesity_control == 'o..., '')
456.76 ms in Loading dataframe for DOID_9970
Matching sources: 43
Excluded 23 sources as single-class
Stats: 20 sources, 14 series, 16 platforms, 561 samples
Loading data and calculating fold change for DOID_9970
Loading data for GSE12050, id = 521
  1.24 s in get_probes(332)
  3.25 s in load_gse( series_id platform_id..., 521, False)
426.46 ms in get_gene_fold_change(<Gse GSE12050>, False, 0, None)
Loading data for GSE15524, id = 1461
773.42 ms in get_probes(500)
  2.03 s in load_gse( series_id platform_id..., 1461, False)
253.77 ms in get_gene_fold_change(<Gse GSE15524>, False, 0, None)
Loading data for GSE2508, id = 3892
653.44 ms in get_probes(7)
679.19 ms in get_probes(8)
557.69 ms in get_probes(13)
497.70 ms in get_probes(14)
370.64 ms in get_probes(15)
522.93 ms in get_probes(16)
  7.71 s in load_gse( series_id platform_id..., 3892, False)
786.04 ms in get_gene_fold_cha

 DOID:9970 obesity
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE60403/GSE60403_series_matrix.txt.gz ...
Cache to

  1.38 s in get_probes(4)
  7.96 s in load_gse( series_id platform_id..., 10836, False)
484.48 ms in get_gene_fold_change(<Gse GSE60403>, False, 0, None)
Loading data for GSE9624, id = 11780


 data/geo_mirror/DATA/SeriesMatrix/GSE60403/GSE60403_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE9624/GSE9624_series_matrix.txt.gz ...
Cache to

  1.22 s in get_probes(4)
  5.88 s in load_gse( series_id platform_id..., 11780, False)
408.65 ms in get_gene_fold_change(<Gse GSE9624>, False, 0, None)
 63.30 s in Load/fold for DOID_9970
Meta-Analyzing DOID_9970
 63.63 s in meta analysis of real data for DOID_9970
269.45 ms in meta analysis of permutations for DOID_9970
 20.03 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper \..., Empty DataFrame Column...)
 63.99 s in Meta analysis for DOID_9970
DONE DOID_9970 analysis
127.78 s in perform_analysis({'analysis_name': 'DOI...)


 data/geo_mirror/DATA/SeriesMatrix/GSE9624/GSE9624_series_matrix.txt.gz


In [6]:
error_df = pandas.DataFrame(errors)
error_df = query_df.merge(error_df)
error_df

Unnamed: 0,slim_id,slim_name,case_query,control_query,error,name
0,DOID:0050741,alcohol dependence,alcoholism == 'alcoholism',alcoholism_control == 'alcoholism_control',zero-size array to reduction operation maximum...,DOID_0050741
1,DOID:10283,prostate cancer,PC == 'PC' or PC_tissue_case == 'PC_tissue_case',PC_control == 'PC_control' or PC_tissue_contro...,zero-size array to reduction operation maximum...,DOID_10283
2,DOID:11054,urinary bladder cancer,bladder_cancer == 'bladder_cancer',bladder_cancer_control == 'bladder_cancer_cont...,'Int64Index' object has no attribute 'labels',DOID_11054
3,DOID:11714,gestational diabetes,GDM == 'GDM',GDM_control == 'GDM_control',zero-size array to reduction operation maximum...,DOID_11714
4,DOID:12849,autistic disorder,autism == 'autism',autism_control == 'autism_control',zero-size array to reduction operation maximum...,DOID_12849
5,DOID:14221,metabolic syndrome X,MetS == 'MetS',MetS_Control == 'MetS_Control','Int64Index' object has no attribute 'labels',DOID_14221
6,DOID:1686,glaucoma,glaucoma == 'glaucoma',glaucoma_control == 'glaucoma_control',zero-size array to reduction operation maximum...,DOID_1686
7,DOID:2377,multiple sclerosis,MS == 'MS',MS_control == 'MS_control',zero-size array to reduction operation maximum...,DOID_2377
8,DOID:2841,asthma,asthma == 'asthma',asthma_control == 'asthma_control',zero-size array to reduction operation maximum...,DOID_2841
9,DOID:2998,testicular cancer,testicular_cancer == 'testicular_cancer',testicular_cancer_control == 'testicular_cance...,'Int64Index' object has no attribute 'labels',DOID_2998


In [7]:
error_df.to_csv('data/errors.csv', index=False)