# Run stargeo queries for hetio-ind diseases

In [1]:
import logging
import os
import gzip

import pandas
import easydict

import starapi.analysis
import starapi.conf

In [2]:
starapi.analysis.logger.propagate = False
starapi.conf.configure('data')

In [3]:
query_df = pandas.read_table('data/queries.tsv')
query_df.head()

Unnamed: 0,slim_id,slim_name,case_query,control_query
0,DOID:0050156,idiopathic pulmonary fibrosis,IPF == 'IPF',IPF_control == 'IPF_control'
1,DOID:0050741,alcohol dependence,alcoholism == 'alcoholism',alcoholism_control == 'alcoholism_control'
2,DOID:0050742,nicotine dependence,Smoker == 'Smoker',Smoker_control == 'Smoker_control'
3,DOID:1024,leprosy,borderline_leprosy == 'borderline_leprosy' or ...,leprosy_control == 'leprosy_control'
4,DOID:10283,prostate cancer,PC == 'PC',PC_control == 'PC_control' or PC_tissue_contro...


In [4]:
errors = list()

for i, row in query_df.iterrows():
    print i, row.slim_id, row.slim_name

    name = row.slim_id.replace(':', '_')
    params = easydict.EasyDict(
        analysis_name = name,
        case_query = row.case_query,
        control_query = row.control_query,
        modifier_query = '',
        min_samples = 3,
    )
    
    directory = os.path.join('data', 'doslim', params.analysis_name)
    if not os.path.isdir(directory):
        os.mkdir(directory)

    logfile = logging.FileHandler(os.path.join(directory, 'log.txt'))
    logfile.setLevel(logging.DEBUG)
    starapi.analysis.logger.addHandler(logfile)
    try:
        sample_df, fold_df, balanced_perm_df, perm_df = starapi.analysis.perform_analysis(params)
        for df in fold_df, balanced_perm_df:
            if df is not None:
                df.reset_index(inplace=True)
        
    except Exception as e:
        sample_df, fold_df, balanced_perm_df, perm_df = None, None, None, None
        errors.append({'slim_id': row.slim_id, 'name': name, 'error': e})
        print(e)
    starapi.analysis.logger.removeHandler(logfile)
    
    # write files
    files = [
        (sample_df, 'samples.tsv', open),
        (fold_df, 'fold_change.tsv.gz', gzip.open),
        (balanced_perm_df, 'balanced_permutation.tsv.gz', gzip.open),
    ]
    for df, filename, opener in files:
        if df is None:
            continue
        path = os.path.join(directory, filename)
        with opener(path, 'wt') as write_file:
            df.to_csv(write_file, index=False, sep='\t', float_format='%.5g')


Started DOID_0050156 analysis
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
382.78 ms in get_analysis_df("IPF == 'IPF'", "IPF_control == 'IPF_c..., '')
383.63 ms in Loading dataframe for DOID_0050156
Matching sources: 12
Stats: 12 sources, 12 series, 8 platforms, 679 samples
Loading data and calculating fold change for DOID_0050156
Loading data for GSE10667, id = 179
324.39 ms in get_probes(10)
  3.86 s in load_gse( series_id platform_id..., 179, False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  samples['subset'] = "NA"
644.97 ms in get_gene_fold_change(<Gse GSE10667>, False, 0, None)
Loading

0 DOID:0050156 idiopathic pulmonary fibrosis
1

Started DOID_0050741 analysis
296.07 ms in get_analysis_df("alcoholism == 'alcoho..., "alcoholism_control ==..., '')
296.93 ms in Loading dataframe for DOID_0050741
Matching sources: 8
Stats: 8 sources, 8 series, 8 platforms, 435 samples
Loading data and calculating fold change for DOID_0050741
Loading data for GSE10356, id = 95
 32.20 ms in get_probes(36)
  3.39 s in load_gse( series_id platform_id..., 95, False)
 40.47 ms in get_gene_fold_change(<Gse GSE10356>, False, 0, None)
Loading data for GSE20568, id = 2733
 32.07 ms in get_probes(689)
474.04 ms in load_gse( series_id platform_id..., 2733, False)
 53.62 ms in get_gene_fold_change(<Gse GSE20568>, False, 0, None)
Loading data for GSE29555, id = 5002
284.94 ms in get_probes(389)
  3.55 s in load_gse( series_id platform_id..., 5002, False)
  1.02 s in get_gene_fold_change(<Gse GSE29555>, False, 0, None)
Loading data for GSE44456, id = 8480
246.75 ms in get_probes(283)
  2.14 s in load_gse( series_id platform_id..., 8480, False)
449

 DOID:0050741 alcohol dependence
2

Started DOID_0050742 analysis
317.50 ms in get_analysis_df("Smoker == 'Smoker'", "Smoker_control == 'Sm..., '')
318.34 ms in Loading dataframe for DOID_0050742
Matching sources: 22
Excluded 3 sources as single-class
Stats: 19 sources, 17 series, 6 platforms, 1423 samples
Loading data and calculating fold change for DOID_0050742
Loading data for GSE10006, id = 4
300.47 ms in get_probes(4)
  7.76 s in load_gse( series_id platform_id..., 4, False)
  1.28 s in get_gene_fold_change(<Gse GSE10006>, False, 0, None)
Loading data for GSE11784, id = 450
307.23 ms in get_probes(4)
 13.39 s in load_gse( series_id platform_id..., 450, False)
  2.20 s in get_gene_fold_change(<Gse GSE11784>, False, 0, None)
Loading data for GSE11906, id = 487
302.13 ms in get_probes(4)
 18.47 s in load_gse( series_id platform_id..., 487, False)
  3.18 s in get_gene_fold_change(<Gse GSE11906>, False, 0, None)
Loading data for GSE19407, id = 2430
301.10 ms in get_probes(4)
 11.31 s in load_gse( series_id platform_id...

 DOID:0050742 nicotine dependence
3

Started DOID_1024 analysis
231.72 ms in get_analysis_df("borderline_leprosy ==..., "leprosy_control == 'l..., '')
232.57 ms in Loading dataframe for DOID_1024
Matching sources: 1
Excluded 1 source by min samples
FAIL Can't perform meta-analysis on no data
253.45 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:1024 leprosy
4

Started DOID_10283 analysis
321.45 ms in get_analysis_df("PC == 'PC'", "PC_control == 'PC_con..., '')
322.32 ms in Loading dataframe for DOID_10283
Matching sources: 16
Excluded 13 sources as single-class
Stats: 3 sources, 3 series, 2 platforms, 464 samples
Loading data and calculating fold change for DOID_10283
Loading data for GSE11682, id = 437
294.04 ms in get_probes(10)
  3.42 s in load_gse( series_id platform_id..., 437, False)
529.67 ms in get_gene_fold_change(<Gse GSE11682>, False, 0, None)
Loading data for GSE30994, id = 5362
291.50 ms in get_probes(10)
  1.36 s in load_gse( series_id platform_id..., 5362, False)
307.83 ms in get_gene_fold_change(<Gse GSE30994>, False, 0, None)
Loading data for GSE62872, id = 13217
 32.43 ms in get_probes(1787)
 12.17 s in load_gse( series_id platform_id..., 13217, False)
 75.38 ms in get_gene_fold_change(<Gse GSE62872>, False, 0, None)
 18.04 s in Load/fold for DOID_10283
Meta-Analyzing DOID_10283
 45.56 s in meta analysis of real data for DO

 DOID:10283 prostate cancer
5

Started DOID_10534 analysis
185.82 ms in get_analysis_df("stomach_cancer_tissue..., "stomach_cancer_tissue..., '')
186.67 ms in Loading dataframe for DOID_10534
Matching sources: 2
Excluded 1 source as single-class
FAIL Can't perform meta-analysis on single source
206.25 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:10534 stomach cancer
6

Started DOID_10608 analysis
167.00 ms in get_analysis_df("celiac == 'celiac'", "celiac_control == 'ce..., '')
167.81 ms in Loading dataframe for DOID_10608
Matching sources: 1
FAIL Can't perform meta-analysis on single source
186.97 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:10608 celiac disease
7

Started DOID_10652 analysis
207.76 ms in get_analysis_df("AD == 'AD'", "AD_Control == 'AD_Con..., '')
208.56 ms in Loading dataframe for DOID_10652
Matching sources: 4
Excluded 2 sources as single-class
Stats: 2 sources, 2 series, 2 platforms, 512 samples
Loading data and calculating fold change for DOID_10652
Loading data for GSE33000, id = 5836
280.21 ms in get_probes(297)
 45.25 s in load_gse( series_id platform_id..., 5836, False)
  3.21 s in get_gene_fold_change(<Gse GSE33000>, False, 0, None)
Loading data for GSE6613, id = 11004
246.82 ms in get_probes(2)
  3.31 s in load_gse( series_id platform_id..., 11004, False)
391.18 ms in get_gene_fold_change(<Gse GSE6613>, False, 0, None)
 52.27 s in Load/fold for DOID_10652
Meta-Analyzing DOID_10652
 27.64 s in meta analysis of real data for DOID_10652
 30.14 ms in meta analysis of permutations for DOID_10652
 20.98 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 27.76 s 

 DOID:10652 Alzheimer's disease
8

Started DOID_10763 analysis
222.94 ms in get_analysis_df("PHT == 'PHT' or hyper..., "PHT_Control == 'PHT_C..., '')
223.79 ms in Loading dataframe for DOID_10763
Matching sources: 8
Excluded 1 source as single-class
Stats: 7 sources, 7 series, 6 platforms, 309 samples
Loading data and calculating fold change for DOID_10763
Loading data for GSE10767, id = 202
303.18 ms in get_probes(4)
  1.50 s in load_gse( series_id platform_id..., 202, False)
378.09 ms in get_gene_fold_change(<Gse GSE10767>, False, 0, None)
Loading data for GSE15197, id = 1359
261.54 ms in get_probes(53)
  2.61 s in load_gse( series_id platform_id..., 1359, False)
573.45 ms in get_gene_fold_change(<Gse GSE15197>, False, 0, None)
Loading data for GSE19617, id = 2473
321.62 ms in get_probes(53)
  2.25 s in load_gse( series_id platform_id..., 2473, False)
451.65 ms in get_gene_fold_change(<Gse GSE19617>, False, 0, None)
Loading data for GSE24988, id = 3871
251.95 ms in get_probes(283)
  5.23 s in load_gse( series_id platf

 DOID:10763 hypertension
9

Started DOID_10871 analysis
245.93 ms in get_analysis_df("AMD == 'AMD'", "AMD_control == 'AMD_c..., '')
246.76 ms in Loading dataframe for DOID_10871
Matching sources: 1
FAIL Can't perform meta-analysis on single source
267.23 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:10871 age related macular degeneration
10

Started DOID_1094 analysis
137.70 ms in get_analysis_df("ADHD == 'ADHD'", "ADHD_control == 'ADHD..., '')
138.47 ms in Loading dataframe for DOID_1094
Matching sources: 1
Excluded 1 source by min samples
FAIL Can't perform meta-analysis on no data
158.63 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:1094 attention deficit hyperactivity disorder
11

Started DOID_11054 analysis
170.28 ms in get_analysis_df("bladder_cancer == 'bl..., "bladder_cancer_contro..., '')
171.07 ms in Loading dataframe for DOID_11054
Matching sources: 0
179.72 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:11054 urinary bladder cancer
'Int64Index' object has no attribute 'labels'

Started DOID_11612 analysis
134.70 ms in get_analysis_df("PCOS == 'PCOS'", "PCOS_control == 'PCOS..., '')
135.51 ms in Loading dataframe for DOID_11612
Matching sources: 11
Stats: 11 sources, 10 series, 5 platforms, 215 samples
Loading data and calculating fold change for DOID_11612
Loading data for GSE10946, id = 254
305.89 ms in get_probes(4)
  3.00 s in load_gse( series_id platform_id..., 254, False)
566.42 ms in get_gene_fold_change(<Gse GSE10946>, False, 0, None)
Loading data for GSE1615, id = 1613
181.13 ms in get_probes(2)
123.35 ms in get_probes(50)
  1.70 s in load_gse( series_id platform_id..., 1613, False)
400.01 ms in get_gene_fold_change(<Gse GSE1615>, False, 0, None)
Loading data for GSE34526, id = 6193
305.32 ms in get_probes(4)
  1.91 s in load_gse( series_id platform_id..., 6193, False)
419.47 ms in get_gene_fold_change(<Gse GSE34526>, False, 0, None)
Loading data for GSE43264, id = 8243
157.00 ms in get_probes(1160)
844.47 ms in load_gse( series_id platform_id..., 824


12 DOID:11612 polycystic ovary syndrome
13

Started DOID_11714 analysis
192.23 ms in get_analysis_df("GDM == 'GDM'", "GDM_control == 'GDM_c..., '')
193.06 ms in Loading dataframe for DOID_11714
Matching sources: 4
Excluded 2 sources by min samples
Stats: 2 sources, 2 series, 2 platforms, 12 samples
Loading data and calculating fold change for DOID_11714
Loading data for GSE49524, id = 9415
240.55 ms in get_probes(482)
842.13 ms in load_gse( series_id platform_id..., 9415, False)
199.25 ms in get_gene_fold_change(<Gse GSE49524>, False, 0, None)
Loading data for GSE65737, id = 13510
 33.56 ms in get_probes(1802)
  1.26 s in load_gse( series_id platform_id..., 13510, False)
 47.68 ms in get_gene_fold_change(<Gse GSE65737>, False, 0, None)
  2.39 s in Load/fold for DOID_11714
Meta-Analyzing DOID_11714
  3.28 s in meta analysis of real data for DOID_11714
  3.28 s in Meta analysis for DOID_11714
  5.89 s in perform_analysis({'analysis_name': 'DOI...)


 DOID:11714 gestational diabetes
'mygene_sym'
14

Started DOID_11949 analysis
151.05 ms in get_analysis_df("CJD == 'CJD'", "CJD_control == 'CJD_c..., '')
151.92 ms in Loading dataframe for DOID_11949
Matching sources: 1
Excluded 1 source by min samples
FAIL Can't perform meta-analysis on no data
173.31 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:11949 Creutzfeldt-Jakob disease
15

Started DOID_12306 analysis
129.42 ms in get_analysis_df("vitiligo == 'vitiligo'", "vitiligo_control == '..., '')
130.25 ms in Loading dataframe for DOID_12306
Matching sources: 1
FAIL Can't perform meta-analysis on single source
149.17 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:12306 vitiligo
16

Started DOID_12365 analysis
236.10 ms in get_analysis_df("malaria == 'malaria'", "malaria_control == 'm..., '')
236.88 ms in Loading dataframe for DOID_12365
Matching sources: 8
Excluded 1 source as single-class
Excluded 1 source by min samples
Stats: 6 sources, 6 series, 5 platforms, 295 samples
Loading data and calculating fold change for DOID_12365
Loading data for GSE15221, id = 1372
254.79 ms in get_probes(17)
  2.99 s in load_gse( series_id platform_id..., 1372, False)
359.37 ms in get_gene_fold_change(<Gse GSE15221>, False, 0, None)
Loading data for GSE18323, id = 2164
243.61 ms in get_probes(9)
  7.55 s in load_gse( series_id platform_id..., 2164, False)
416.03 ms in get_gene_fold_change(<Gse GSE18323>, False, 0, None)
Loading data for GSE24849, id = 3840
303.64 ms in get_probes(4)
  3.38 s in load_gse( series_id platform_id..., 3840, False)
491.39 ms in get_gene_fold_change(<Gse GSE24849>, False, 0, None)
Loading data for GSE34404, id = 6155
282.66 ms in get_probes(674)
 11.88

 DOID:12365 malaria
17

Started DOID_12849 analysis
316.45 ms in get_analysis_df("autism == 'autism'", "autism_control == 'au..., '')
317.29 ms in Loading dataframe for DOID_12849
Matching sources: 14
Excluded 1 source as single-class
Excluded 1 source by min samples
Stats: 12 sources, 11 series, 7 platforms, 1302 samples
Loading data and calculating fold change for DOID_12849
Loading data for GSE15402, id = 1425
275.11 ms in get_probes(497)
  7.11 s in load_gse( series_id platform_id..., 1425, False)
816.99 ms in get_gene_fold_change(<Gse GSE15402>, False, 0, None)
Loading data for GSE15451, id = 1434
288.41 ms in get_probes(497)
  2.73 s in load_gse( series_id platform_id..., 1434, False)
362.30 ms in get_gene_fold_change(<Gse GSE15451>, False, 0, None)
Loading data for GSE18123, id = 2111
305.02 ms in get_probes(4)
243.94 ms in get_probes(283)
 19.05 s in load_gse( series_id platform_id..., 2111, False)
  1.91 s in get_gene_fold_change(<Gse GSE18123>, False, 0, None)
Loading data for GSE25507, id = 4004
30

 DOID:12849 autistic disorder
zero-size array to reduction operation maximum which has no identity

Started DOID_13223 analysis
165.77 ms in get_analysis_df("uterine_fibroid == 'u..., "uterine_fibroid_contr..., '')
166.60 ms in Loading dataframe for DOID_13223
Matching sources: 7
Excluded 1 source as single-class
Stats: 6 sources, 6 series, 5 platforms, 124 samples
Loading data and calculating fold change for DOID_13223
Loading data for GSE2724, id = 4435
221.18 ms in get_probes(2)
  1.06 s in load_gse( series_id platform_id..., 4435, False)
258.46 ms in get_gene_fold_change(<Gse GSE2724>, False, 0, None)
Loading data for GSE28945, id = 4848
 34.70 ms in get_probes(929)
227.28 ms in load_gse( series_id platform_id..., 4848, False)
 67.93 ms in get_gene_fold_change(<Gse GSE28945>, False, 0, None)
Loading data for GSE41386, id = 7773
307.46 ms in get_probes(4)
  1.68 s in load_gse( series_id platform_id..., 7773, False)
430.98 ms in get_gene_fold_change(<Gse GSE41386>, False, 0, None)
Loading data for GSE593, id = 10753
190.14 ms in get_probes(2)
813.97 ms in load_gse( series_id platfo


18 DOID:13223 uterine fibroid
19

Started DOID_1324 analysis
415.97 ms in get_analysis_df("NSCLC == 'NSCLC' or P..., "lung_cancer_control =..., '')
416.81 ms in Loading dataframe for DOID_1324
Matching sources: 51
Excluded 49 sources as single-class
Stats: 2 sources, 2 series, 2 platforms, 800 samples
Loading data and calculating fold change for DOID_1324
Loading data for GSE19804, id = 2533
301.00 ms in get_probes(4)
 10.36 s in load_gse( series_id platform_id..., 2533, False)
  1.59 s in get_gene_fold_change(<Gse GSE19804>, False, 0, None)
Loading data for GSE66499, id = 13592
298.38 ms in get_probes(283)
 40.08 s in load_gse( series_id platform_id..., 13592, False)
  3.90 s in get_gene_fold_change(<Gse GSE66499>, False, 0, None)
 56.08 s in Load/fold for DOID_1324
Meta-Analyzing DOID_1324
 43.70 s in meta analysis of real data for DOID_1324
 46.69 ms in meta analysis of permutations for DOID_1324
 19.79 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)


 DOID:1324 lung cancer
20

Started DOID_13241 analysis
162.68 ms in get_analysis_df("Behcet == 'Behcet'", "Behcet_control == 'Be..., '')
163.50 ms in Loading dataframe for DOID_13241
Matching sources: 1
FAIL Can't perform meta-analysis on single source
182.71 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:13241 Behcet's disease
21

Started DOID_13378 analysis
134.44 ms in get_analysis_df("Kawasaki == 'Kawasaki'", "Kawasaki_control == '..., '')
135.27 ms in Loading dataframe for DOID_13378
Matching sources: 1
FAIL Can't perform meta-analysis on single source
154.03 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:13378 Kawasaki disease
22

Started DOID_14221 analysis
201.88 ms in get_analysis_df("MetS == 'MetS'", "MetS_Control == 'MetS..., '')
202.71 ms in Loading dataframe for DOID_14221
Matching sources: 4
Excluded 4 sources as single-class
214.33 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:14221 metabolic syndrome X
'Int64Index' object has no attribute 'labels'

Started DOID_14227 analysis
131.47 ms in get_analysis_df("azoospermia == 'azoos..., "azoospermia_control =..., '')
132.32 ms in Loading dataframe for DOID_14227
Matching sources: 2
Stats: 2 sources, 2 series, 1 platforms, 51 samples
Loading data and calculating fold change for DOID_14227
Loading data for GSE45885, id = 8763
249.80 ms in get_probes(283)
  2.19 s in load_gse( series_id platform_id..., 8763, False)
346.06 ms in get_gene_fold_change(<Gse GSE45885>, False, 0, None)
Loading data for GSE45887, id = 8764
247.67 ms in get_probes(283)
  1.69 s in load_gse( series_id platform_id..., 8764, False)
298.05 ms in get_gene_fold_change(<Gse GSE45887>, False, 0, None)
  4.62 s in Load/fold for DOID_14227
Meta-Analyzing DOID_14227
 47.59 s in meta analysis of real data for DOID_14227
 32.89 ms in meta analysis of permutations for DOID_14227
 20.03 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 47.72 s in Meta analysis for


23 DOID:14227 azoospermia
24

Started DOID_14330 analysis
285.20 ms in get_analysis_df("PD == 'PD'", "PD_control == 'PD_con..., '')
286.05 ms in Loading dataframe for DOID_14330
Matching sources: 30
Excluded 2 sources as single-class
Excluded 1 source by min samples
Stats: 27 sources, 26 series, 12 platforms, 946 samples
Loading data and calculating fold change for DOID_14330
Loading data for GSE18838, id = 2292
287.90 ms in get_probes(280)
  1.40 s in load_gse( series_id platform_id..., 2292, False)
442.68 ms in get_gene_fold_change(<Gse GSE18838>, False, 0, None)
Loading data for GSE19587, id = 2467
213.80 ms in get_probes(9)
  1.22 s in load_gse( series_id platform_id..., 2467, False)
328.15 ms in get_gene_fold_change(<Gse GSE19587>, False, 0, None)
Loading data for GSE20141, id = 2616
303.16 ms in get_probes(4)
  2.31 s in load_gse( series_id platform_id..., 2616, False)
497.04 ms in get_gene_fold_change(<Gse GSE20141>, False, 0, None)
Loading data for GSE20146, id = 2619
302.19 ms in get_probes(4)
  2.45 s in 

 DOID:14330 Parkinson's disease
25

Started DOID_1595 analysis
540.61 ms in get_analysis_df("Depression == 'Depres..., "MDD_control == 'MDD_c..., '')
541.46 ms in Loading dataframe for DOID_1595
Matching sources: 19
Excluded 5 sources as single-class
Stats: 14 sources, 14 series, 6 platforms, 533 samples
Loading data and calculating fold change for DOID_1595
Loading data for GSE12654, id = 704
199.87 ms in get_probes(7)
  1.47 s in load_gse( series_id platform_id..., 704, False)
187.10 ms in get_gene_fold_change(<Gse GSE12654>, False, 0, None)
Loading data for GSE19738, id = 2518
327.76 ms in get_probes(282)
  6.93 s in load_gse( series_id platform_id..., 2518, False)
  1.25 s in get_gene_fold_change(<Gse GSE19738>, False, 0, None)
Loading data for GSE39653, id = 7363
338.86 ms in get_probes(674)
  4.70 s in load_gse( series_id platform_id..., 7363, False)
607.97 ms in get_gene_fold_change(<Gse GSE39653>, False, 0, None)
Loading data for GSE54562, id = 10202
266.40 ms in get_probes(389)
  2.43 s in load_gse( series_id pl

 DOID:1595 endogenous depression
26

Started DOID_1612 analysis
470.81 ms in get_analysis_df("breast_cancer == 'bre..., "breast_cancer_control..., '')
471.66 ms in Loading dataframe for DOID_1612
Matching sources: 63
Excluded 33 sources as single-class
Excluded 1 source by min samples
Stats: 29 sources, 25 series, 16 platforms, 1819 samples
Loading data and calculating fold change for DOID_1612
Loading data for GSE10780, id = 206
305.31 ms in get_probes(4)
 18.78 s in load_gse( series_id platform_id..., 206, False)
  2.32 s in get_gene_fold_change(<Gse GSE10780>, False, 0, None)
Loading data for GSE10797, id = 212
240.80 ms in get_probes(9)
  3.53 s in load_gse( series_id platform_id..., 212, False)
488.40 ms in get_gene_fold_change(<Gse GSE10797>, False, 0, None)
Loading data for GSE10810, id = 218
299.46 ms in get_probes(4)
  2.75 s in load_gse( series_id platform_id..., 218, False)
810.45 ms in get_gene_fold_change(<Gse GSE10810>, False, 0, None)
Loading data for GSE10885, id = 238
241.04 ms in get_probes(83)
239.45 ms

 DOID:1612 breast cancer
27

Started DOID_1686 analysis
318.63 ms in get_analysis_df("glaucoma == 'glaucoma'", "glaucoma_control == '..., '')
319.50 ms in Loading dataframe for DOID_1686
Matching sources: 11
Excluded 1 source as single-class
Excluded 3 sources by min samples
Stats: 7 sources, 6 series, 5 platforms, 138 samples
Loading data and calculating fold change for DOID_1686
Loading data for GSE2378, id = 3554
207.32 ms in get_probes(7)
723.93 ms in load_gse( series_id platform_id..., 3554, False)
146.14 ms in get_gene_fold_change(<Gse GSE2378>, False, 0, None)
Loading data for GSE27276, id = 4448
238.81 ms in get_probes(39)
  3.58 s in load_gse( series_id platform_id..., 4448, False)
316.12 ms in get_gene_fold_change(<Gse GSE27276>, False, 0, None)
Loading data for GSE45570, id = 8697
258.74 ms in get_probes(280)
  1.15 s in load_gse( series_id platform_id..., 8697, False)
341.87 ms in get_gene_fold_change(<Gse GSE45570>, False, 0, None)
Loading data for GSE9944, id = 11879
246.48 ms in get_probes(9)
117.57

 DOID:1686 glaucoma
28

Started DOID_1793 analysis
235.76 ms in get_analysis_df("pancreatic_cancer == ..., "pancreatic_cancer_con..., '')
236.53 ms in Loading dataframe for DOID_1793
Matching sources: 28
Excluded 16 sources as single-class
Excluded 3 sources by min samples
Stats: 9 sources, 9 series, 5 platforms, 250 samples
Loading data and calculating fold change for DOID_1793
Loading data for GSE14245, id = 1080
333.94 ms in get_probes(4)
  2.47 s in load_gse( series_id platform_id..., 1080, False)
547.91 ms in get_gene_fold_change(<Gse GSE14245>, False, 0, None)
Loading data for GSE15932, id = 1550
335.72 ms in get_probes(4)
  3.48 s in load_gse( series_id platform_id..., 1550, False)
572.18 ms in get_gene_fold_change(<Gse GSE15932>, False, 0, None)
Loading data for GSE19279, id = 2400
240.65 ms in get_probes(2)
  1.09 s in load_gse( series_id platform_id..., 2400, False)
207.80 ms in get_gene_fold_change(<Gse GSE19279>, False, 0, None)
Loading data for GSE19650, id = 2485
335.02 ms in get_probes(4)
  2.5

 DOID:1793 pancreatic cancer
29

Started DOID_1909 analysis
390.62 ms in get_analysis_df("melanoma == 'melanoma'", "melanoma_control == '..., '')
391.46 ms in Loading dataframe for DOID_1909
Matching sources: 48
Excluded 35 sources as single-class
Excluded 1 source by min samples
Stats: 12 sources, 9 series, 5 platforms, 487 samples
Loading data and calculating fold change for DOID_1909
Loading data for GSE11907, id = 488
245.75 ms in get_probes(2)
204.46 ms in get_probes(50)
 16.05 s in load_gse( series_id platform_id..., 488, False)
680.93 ms in get_gene_fold_change(<Gse GSE11907>, False, 0, None)
Loading data for GSE15605, id = 1479
334.12 ms in get_probes(4)
  6.33 s in load_gse( series_id platform_id..., 1479, False)
607.41 ms in get_gene_fold_change(<Gse GSE15605>, False, 0, None)
Loading data for GSE3189, id = 5563
239.15 ms in get_probes(2)
  2.40 s in load_gse( series_id platform_id..., 5563, False)
431.49 ms in get_gene_fold_change(<Gse GSE3189>, False, 0, None)
Loading data for GSE44660, id = 8515
264.41 ms

 DOID:1909 melanoma
30

Started DOID_1936 analysis
308.23 ms in get_analysis_df("atherosclerosis == 'a..., "atherosclerosis_contr..., '')
309.02 ms in Loading dataframe for DOID_1936
Matching sources: 6
Excluded 1 source as single-class
Excluded 1 source by min samples
Stats: 4 sources, 4 series, 4 platforms, 346 samples
Loading data and calculating fold change for DOID_1936
Loading data for GSE20129, id = 2612
241.35 ms in get_probes(3)
  4.84 s in load_gse( series_id platform_id..., 2612, False)
755.89 ms in get_gene_fold_change(<Gse GSE20129>, False, 0, None)
Loading data for GSE23746, id = 3545
247.63 ms in get_probes(74)
  2.70 s in load_gse( series_id platform_id..., 3545, False)
530.32 ms in get_gene_fold_change(<Gse GSE23746>, False, 0, None)
Loading data for GSE37356, id = 6873
288.43 ms in get_probes(674)
  5.23 s in load_gse( series_id platform_id..., 6873, False)
763.47 ms in get_gene_fold_change(<Gse GSE37356>, False, 0, None)
Loading data for GSE9874, id = 11861
239.65 ms in get_probes(2)
  2.30

 DOID:1936 atherosclerosis
31

Started DOID_219 analysis
229.05 ms in get_analysis_df("colon_cancer == 'colo..., "colon_cancer_control ..., '')
229.87 ms in Loading dataframe for DOID_219
Matching sources: 5
Excluded 1 source as single-class
Stats: 4 sources, 3 series, 4 platforms, 275 samples
Loading data and calculating fold change for DOID_219
Loading data for GSE36400, id = 6646
291.83 ms in get_probes(280)
  1.10 s in load_gse( series_id platform_id..., 6646, False)
331.35 ms in get_gene_fold_change(<Gse GSE36400>, False, 0, None)
Loading data for GSE7466, id = 11249
 63.98 ms in get_probes(821)
 63.54 ms in get_probes(823)
  2.33 s in load_gse( series_id platform_id..., 11249, False)
 93.76 ms in get_gene_fold_change(<Gse GSE7466>, False, 0, None)
Loading data for GSE68468, id = 13736
242.83 ms in get_probes(2)
 12.89 s in load_gse( series_id platform_id..., 13736, False)
  1.38 s in get_gene_fold_change(<Gse GSE68468>, False, 0, None)
 18.28 s in Load/fold for DOID_219
Meta-Analyzing DOID_219
 29.61 s in meta

 DOID:219 colon cancer
32

Started DOID_2377 analysis
307.90 ms in get_analysis_df("MS == 'MS'", "MS_control == 'MS_con..., '')
308.74 ms in Loading dataframe for DOID_2377
Matching sources: 27
Excluded 3 sources as single-class
Excluded 2 sources by min samples
Stats: 22 sources, 21 series, 9 platforms, 1781 samples
Loading data and calculating fold change for DOID_2377
Loading data for GSE10064, id = 17
243.83 ms in get_probes(12)
  1.90 s in load_gse( series_id platform_id..., 17, False)
258.42 ms in get_gene_fold_change(<Gse GSE10064>, False, 0, None)
Loading data for GSE13732, id = 973
303.69 ms in get_probes(4)
 11.45 s in load_gse( series_id platform_id..., 973, False)
  1.42 s in get_gene_fold_change(<Gse GSE13732>, False, 0, None)
Loading data for GSE14895, id = 1280
247.17 ms in get_probes(9)
208.41 ms in get_probes(2)
  2.97 s in load_gse( series_id platform_id..., 1280, False)
511.52 ms in get_gene_fold_change(<Gse GSE14895>, False, 0, None)
Loading data for GSE16461, id = 1688
300.83 ms in get_probe

 DOID:2377 multiple sclerosis
33

Started DOID_2394 analysis
259.64 ms in get_analysis_df("ovarian_cancer == 'ov..., "ovarian_cancer_contro..., '')
260.45 ms in Loading dataframe for DOID_2394
Matching sources: 4
Excluded 1 source as single-class
Stats: 3 sources, 3 series, 2 platforms, 123 samples
Loading data and calculating fold change for DOID_2394
Loading data for GSE29220, id = 4918
300.07 ms in get_probes(4)
  2.85 s in load_gse( series_id platform_id..., 4918, False)
552.95 ms in get_gene_fold_change(<Gse GSE29220>, False, 0, None)
Loading data for GSE31682, id = 5516
233.57 ms in get_probes(60)
  3.30 s in load_gse( series_id platform_id..., 5516, False)
436.62 ms in get_gene_fold_change(<Gse GSE31682>, False, 0, None)
Loading data for GSE38666, id = 7137
302.34 ms in get_probes(4)
  4.33 s in load_gse( series_id platform_id..., 7137, False)
666.57 ms in get_gene_fold_change(<Gse GSE38666>, False, 0, None)
 12.33 s in Load/fold for DOID_2394
Meta-Analyzing DOID_2394
 54.58 s in meta analysis of real data for D

 DOID:2394 ovarian cancer
34

Started DOID_2531 analysis
504.19 ms in get_analysis_df("AML_Tissue == 'AML_Ti..., "AML_Control == 'AML_C..., '')
505.09 ms in Loading dataframe for DOID_2531
Matching sources: 95
Excluded 82 sources as single-class
Stats: 13 sources, 13 series, 5 platforms, 755 samples
Loading data and calculating fold change for DOID_2531
Loading data for GSE10746, id = 198
302.99 ms in get_probes(4)
  1.78 s in load_gse( series_id platform_id..., 198, False)
415.29 ms in get_gene_fold_change(<Gse GSE10746>, False, 0, None)
Loading data for GSE13591, id = 938
244.62 ms in get_probes(2)
  6.13 s in load_gse( series_id platform_id..., 938, False)
845.62 ms in get_gene_fold_change(<Gse GSE13591>, False, 0, None)
Loading data for GSE17054, id = 1838
303.20 ms in get_probes(4)
  2.29 s in load_gse( series_id platform_id..., 1838, False)
447.41 ms in get_gene_fold_change(<Gse GSE17054>, False, 0, None)
Loading data for GSE24870, id = 3848
239.11 ms in get_probes(9)
  1.84 s in load_gse( series_id platform_

 DOID:2531 hematologic cancer
35

Started DOID_263 analysis
374.27 ms in get_analysis_df("RCC == 'RCC'", "RCC_Control == 'RCC_C..., '')
375.09 ms in Loading dataframe for DOID_263
Matching sources: 60
Excluded 37 sources as single-class
Excluded 2 sources by min samples
Stats: 21 sources, 17 series, 13 platforms, 1536 samples
Loading data and calculating fold change for DOID_263
Loading data for GSE12606, id = 685
311.73 ms in get_probes(4)
  1.79 s in load_gse( series_id platform_id..., 685, False)
400.25 ms in get_gene_fold_change(<Gse GSE12606>, False, 0, None)
Loading data for GSE14762, id = 1230
 32.28 ms in get_probes(471)
815.55 ms in load_gse( series_id platform_id..., 1230, False)
 56.44 ms in get_gene_fold_change(<Gse GSE14762>, False, 0, None)
Loading data for GSE15641, id = 1490
244.73 ms in get_probes(2)
  3.19 s in load_gse( series_id platform_id..., 1490, False)
546.55 ms in get_gene_fold_change(<Gse GSE15641>, False, 0, None)
Loading data for GSE17816, id = 2030
210.38 ms in get_probes(588)
  1.83 s in 

 DOID:263 kidney cancer
36

Started DOID_2841 analysis
402.27 ms in get_analysis_df("asthma == 'asthma'", "asthma_control == 'as..., '')
403.09 ms in Loading dataframe for DOID_2841
Matching sources: 14
Stats: 14 sources, 13 series, 9 platforms, 1486 samples
Loading data and calculating fold change for DOID_2841
Loading data for GSE19187, id = 2377
245.46 ms in get_probes(283)
  2.56 s in load_gse( series_id platform_id..., 2377, False)
305.19 ms in get_gene_fold_change(<Gse GSE19187>, False, 0, None)
Loading data for GSE27011, id = 4384
244.85 ms in get_probes(283)
  3.69 s in load_gse( series_id platform_id..., 4384, False)
481.41 ms in get_gene_fold_change(<Gse GSE27011>, False, 0, None)
Loading data for GSE27876, id = 4561
312.52 ms in get_probes(53)
  1.88 s in load_gse( series_id platform_id..., 4561, False)
343.16 ms in get_gene_fold_change(<Gse GSE27876>, False, 0, None)
Loading data for GSE4302, id = 8180
302.34 ms in get_probes(4)
 11.75 s in load_gse( series_id platform_id..., 8180, False)
  1.32 s in 

 DOID:2841 asthma
37

Started DOID_2986 analysis
320.86 ms in get_analysis_df("IgA_nephropathy == 'I..., "IgA_nephropathy_contr..., '')
321.68 ms in Loading dataframe for DOID_2986
Matching sources: 6
Stats: 6 sources, 5 series, 3 platforms, 161 samples
Loading data and calculating fold change for DOID_2986
Loading data for GSE14795, id = 1241
  1.49 s in get_probes(2)
  2.44 s in load_gse( series_id platform_id..., 1241, False)
261.03 ms in get_gene_fold_change(<Gse GSE14795>, False, 0, None)
Loading data for GSE35487, id = 6431
  1.22 s in get_probes(2)
  2.65 s in load_gse( series_id platform_id..., 6431, False)
306.68 ms in get_gene_fold_change(<Gse GSE35487>, False, 0, None)
Loading data for GSE35488, id = 6432
493.57 ms in get_probes(1019)
  1.35 s in load_gse( series_id platform_id..., 6432, False)
191.12 ms in get_gene_fold_change(<Gse GSE35488>, False, 0, None)
Loading data for GSE35489, id = 6433
373.02 ms in get_probes(1019)
560.54 ms in get_probes(2)
  3.17 s in load_gse( series_id platform_id..

 DOID:2986 IgA glomerulonephritis
38

Started DOID_2998 analysis
220.81 ms in get_analysis_df("testicular_cancer == ..., "testicular_cancer_con..., '')
221.63 ms in Loading dataframe for DOID_2998
Matching sources: 0
230.53 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:2998 testicular cancer
'Int64Index' object has no attribute 'labels'

Started DOID_3083 analysis
283.75 ms in get_analysis_df("COPD == 'COPD'", "COPD_control == 'COPD..., '')
284.53 ms in Loading dataframe for DOID_3083
Matching sources: 22
Excluded 4 sources as single-class
Stats: 18 sources, 17 series, 7 platforms, 1720 samples
Loading data and calculating fold change for DOID_3083
Loading data for GSE10006, id = 4
629.80 ms in get_probes(4)
  8.03 s in load_gse( series_id platform_id..., 4, False)
  1.22 s in get_gene_fold_change(<Gse GSE10006>, False, 0, None)
Loading data for GSE11784, id = 450
566.16 ms in get_probes(4)
 13.48 s in load_gse( series_id platform_id..., 450, False)
  1.94 s in get_gene_fold_change(<Gse GSE11784>, False, 0, None)
Loading data for GSE12472, id = 652
402.23 ms in get_probes(26)
  1.87 s in load_gse( series_id platform_id..., 652, False)
640.99 ms in get_gene_fold_change(<Gse GSE12472>, False, 0, None)
Loading data for GSE16972, id = 1810
329.30 ms in get_probes(2)
  1.03 s in load_gse( series_id platform_id..., 1810, Fal


39 DOID:3083 chronic obstructive pulmonary disease
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE37768/GSE37768_series_matrix.txt.gz ...
Loading URL

  1.33 s in load_gse( series_id platform_id..., 6962, False)
 76.78 s in Load/fold for DOID_3083
 77.10 s in perform_analysis({'analysis_name': 'DOI...)


 ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE37768/GSE37768-GPL570_series_matrix.txt.gz ...
Can't find matrix file for series 6962, platform 4

Started DOID_3310 analysis
266.09 ms in get_analysis_df("eczema == 'eczema'", "eczema_control == 'ec..., '')
266.91 ms in Loading dataframe for DOID_3310
Matching sources: 8
Excluded 2 sources as single-class
Stats: 6 sources, 5 series, 5 platforms, 129 samples
Loading data and calculating fold change for DOID_3310
Loading data for GSE12511, id = 664
362.40 ms in get_probes(367)
  2.51 s in load_gse( series_id platform_id..., 664, False)
417.01 ms in get_gene_fold_change(<Gse GSE12511>, False, 0, None)
Loading data for GSE13709, id = 966
133.77 ms in get_probes(414)
 77.53 ms in get_probes(415)
912.53 ms in load_gse( series_id platform_id..., 966, False)
198.10 ms in get_gene_fold_change(<Gse GSE13709>, False, 0, None)
Loading data for GSE16161, id = 1619
476.90 ms in get_probes(4)
  2.70 s in load_gse( series_id platform_id..., 1619, False)
469.30 ms in get_gene_fold_change(<Gse GSE16161>, False, 0, None)
Loading data for GSE26952, id = 4370
294.75 ms in get_probes(74)
  1.13 s in loa


40 DOID:3310 atopic dermatitis
41

Started DOID_3312 analysis
264.78 ms in get_analysis_df("bipolar_disorder == '..., "bipolar_disorder_cont..., '')
265.63 ms in Loading dataframe for DOID_3312
Matching sources: 15
Excluded 6 sources as single-class
Stats: 9 sources, 9 series, 6 platforms, 438 samples
Loading data and calculating fold change for DOID_3312
Loading data for GSE11767, id = 446
265.89 ms in get_probes(9)
869.18 ms in load_gse( series_id platform_id..., 446, False)
202.26 ms in get_gene_fold_change(<Gse GSE11767>, False, 0, None)
Loading data for GSE23848, id = 3566
332.15 ms in get_probes(343)
  3.38 s in load_gse( series_id platform_id..., 3566, False)
422.77 ms in get_gene_fold_change(<Gse GSE23848>, False, 0, None)
Loading data for GSE39653, id = 7363
431.26 ms in get_probes(674)
  4.64 s in load_gse( series_id platform_id..., 7363, False)
470.07 ms in get_gene_fold_change(<Gse GSE39653>, False, 0, None)
Loading data for GSE46449, id = 8868
470.57 ms in get_probes(4)
  9.16 s in load_gse( series_id platf

 DOID:3312 bipolar disorder
42

Started DOID_332 analysis
209.07 ms in get_analysis_df("ALS == 'ALS'", "ALS_control == 'ALS_c..., '')
209.89 ms in Loading dataframe for DOID_332
Matching sources: 11
Excluded 1 source as single-class
Stats: 10 sources, 10 series, 7 platforms, 171 samples
Loading data and calculating fold change for DOID_332
Loading data for GSE18920, id = 2315
  1.93 s in get_probes(54)
 18.83 s in load_gse( series_id platform_id..., 2315, False)
  6.36 s in get_gene_fold_change(<Gse GSE18920>, False, 0, None)
Loading data for GSE19332, id = 2413
304.90 ms in get_probes(4)
  1.67 s in load_gse( series_id platform_id..., 2413, False)
404.49 ms in get_gene_fold_change(<Gse GSE19332>, False, 0, None)
Loading data for GSE26276, id = 4160
218.60 ms in get_probes(283)
  1.15 s in load_gse( series_id platform_id..., 4160, False)
225.95 ms in get_gene_fold_change(<Gse GSE26276>, False, 0, None)
Loading data for GSE28253, id = 4659
318.55 ms in get_probes(10)
  2.70 s in load_gse( series_id platform_id..., 465

 DOID:332 amyotrophic lateral sclerosis
43

Started DOID_3393 analysis
442.53 ms in get_analysis_df("CAD == 'CAD'", "CAD_control == 'CAD_c..., '')
443.34 ms in Loading dataframe for DOID_3393
Matching sources: 8
Excluded 1 source as single-class
Stats: 7 sources, 7 series, 6 platforms, 524 samples
Loading data and calculating fold change for DOID_3393
Loading data for GSE10195, id = 57
283.25 ms in get_probes(26)
  3.93 s in load_gse( series_id platform_id..., 57, False)
537.87 ms in get_gene_fold_change(<Gse GSE10195>, False, 0, None)
Loading data for GSE12288, id = 598
240.89 ms in get_probes(2)
  6.68 s in load_gse( series_id platform_id..., 598, False)
  1.26 s in get_gene_fold_change(<Gse GSE12288>, False, 0, None)
Loading data for GSE18608, id = 2234
339.42 ms in get_probes(4)
  1.60 s in load_gse( series_id platform_id..., 2234, False)
503.82 ms in get_gene_fold_change(<Gse GSE18608>, False, 0, None)
Loading data for GSE23561, id = 3492
296.20 ms in get_probes(780)
  2.95 s in load_gse( series_id platform_id..., 3492, Fal

 DOID:3393 coronary artery disease
44

Started DOID_418 analysis
230.71 ms in get_analysis_df("systemic_scleroderma ..., "systemic_scleroderma_..., '')
231.52 ms in Loading dataframe for DOID_418
Matching sources: 10
Excluded 4 sources as single-class
Excluded 2 sources by min samples
Stats: 4 sources, 4 series, 3 platforms, 157 samples
Loading data and calculating fold change for DOID_418
Loading data for GSE3886, id = 7181
 96.08 ms in get_probes(1119)
  1.17 s in load_gse( series_id platform_id..., 7181, False)
 70.79 ms in get_gene_fold_change(<Gse GSE3886>, False, 0, None)
Loading data for GSE4385, id = 8361
 99.39 ms in get_probes(1119)
  1.18 s in load_gse( series_id platform_id..., 8361, False)
 73.39 ms in get_gene_fold_change(<Gse GSE4385>, False, 0, None)
Loading data for GSE45536, id = 8688
307.75 ms in get_probes(4)
  9.33 s in load_gse( series_id platform_id..., 8688, False)
  1.55 s in get_gene_fold_change(<Gse GSE45536>, False, 0, None)
Loading data for GSE63903, id = 13309
289.11 ms in get_probes(674)
  1.9

 DOID:418 systemic scleroderma
45

Started DOID_4481 analysis
246.76 ms in get_analysis_df("allergic_rhinitis == ..., "allergic_rhinitis_con..., '')
247.57 ms in Loading dataframe for DOID_4481
Matching sources: 12
Stats: 12 sources, 11 series, 10 platforms, 352 samples
Loading data and calculating fold change for DOID_4481
Loading data for GSE19187, id = 2377
243.73 ms in get_probes(283)
  2.56 s in load_gse( series_id platform_id..., 2377, False)
351.32 ms in get_gene_fold_change(<Gse GSE19187>, False, 0, None)
Loading data for GSE1964, id = 2481
 35.40 ms in get_probes(654)
472.87 ms in load_gse( series_id platform_id..., 2481, False)
 73.37 ms in get_gene_fold_change(<Gse GSE1964>, False, 0, None)
Loading data for GSE37146, id = 6813
246.12 ms in get_probes(17)
  2.41 s in load_gse( series_id platform_id..., 6813, False)
317.54 ms in get_gene_fold_change(<Gse GSE37146>, False, 0, None)
Loading data for GSE37155, id = 6815
258.19 ms in get_probes(674)
  1.41 s in load_gse( series_id platform_id..., 6815, False)
299.0

 DOID:4481 allergic rhinitis
46

Started DOID_4606 analysis
233.09 ms in get_analysis_df("bile_duct_cancer == '..., "bile_duct_cancer_cont..., '')
233.87 ms in Loading dataframe for DOID_4606
Matching sources: 0
242.82 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:4606 bile duct cancer
'Int64Index' object has no attribute 'labels'
47

Started DOID_5419 analysis
295.35 ms in get_analysis_df("schizophrenia == 'sch..., "Psychiatric_Control =..., '')
296.19 ms in Loading dataframe for DOID_5419
Matching sources: 27
Excluded 4 sources as single-class
Stats: 23 sources, 22 series, 12 platforms, 1420 samples
Loading data and calculating fold change for DOID_5419
Loading data for GSE12649, id = 702
248.23 ms in get_probes(2)
  4.01 s in load_gse( series_id platform_id..., 702, False)
484.11 ms in get_gene_fold_change(<Gse GSE12649>, False, 0, None)
Loading data for GSE12654, id = 704
206.61 ms in get_probes(7)
  1.49 s in load_gse( series_id platform_id..., 704, False)
195.06 ms in get_gene_fold_change(<Gse GSE12654>, False, 0, None)
Loading data for GSE17612, id = 1980
309.25 ms in get_probes(4)
  4.48 s in load_gse( series_id platform_id..., 1980, False)
825.64 ms in get_gene_fold_change(<Gse GSE17612>, False, 0, None)
Loading data for GSE18312, id = 2160
286.38 ms in get_probes(280)
  1.51 s in load_gse( series_id platfo

 DOID:5419 schizophrenia
48

Started DOID_635 analysis
435.88 ms in get_analysis_df("HIV == 'HIV' or HIV_S..., "HIV_control == 'HIV_c..., '')
436.75 ms in Loading dataframe for DOID_635
Matching sources: 27
Excluded 3 sources as single-class
Excluded 2 sources by min samples
Stats: 22 sources, 22 series, 13 platforms, 1973 samples
Loading data and calculating fold change for DOID_635
Loading data for GSE12597, id = 683


 DOID:635 acquired immunodeficiency syndrome
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE12597/GSE12597_series_matrix.txt.gz ...
Cache to

 36.48 ms in get_probes(370)
  1.24 s in load_gse( series_id platform_id..., 683, False)
 54.81 ms in get_gene_fold_change(<Gse GSE12597>, False, 0, None)
Loading data for GSE14542, id = 1175


 data/geo_mirror/DATA/SeriesMatrix/GSE12597/GSE12597_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE14542/GSE14542_series_matrix.txt.gz ...
Cache to

 36.12 ms in get_probes(454)
  4.60 s in load_gse( series_id platform_id..., 1175, False)
 64.60 ms in get_gene_fold_change(<Gse GSE14542>, False, 0, None)
Loading data for GSE16363, id = 1658
300.46 ms in get_probes(4)
  4.49 s in load_gse( series_id platform_id..., 1658, False)
836.25 ms in get_gene_fold_change(<Gse GSE16363>, False, 0, None)
Loading data for GSE18233, id = 2140


 data/geo_mirror/DATA/SeriesMatrix/GSE14542/GSE14542_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE18233/GSE18233_series_matrix.txt.gz ...
Cache to

280.87 ms in get_probes(416)
 24.98 s in load_gse( series_id platform_id..., 2140, False)
  1.57 s in get_gene_fold_change(<Gse GSE18233>, False, 0, None)
Loading data for GSE19811, id = 2535


 data/geo_mirror/DATA/SeriesMatrix/GSE18233/GSE18233_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE19811/GSE19811_series_matrix.txt.gz ...
Cache to

246.76 ms in get_probes(2)
  4.54 s in load_gse( series_id platform_id..., 2535, False)
229.72 ms in get_gene_fold_change(<Gse GSE19811>, False, 0, None)
Loading data for GSE28177, id = 4638


 data/geo_mirror/DATA/SeriesMatrix/GSE19811/GSE19811_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE28177/GSE28177_series_matrix.txt.gz ...
Cache to

301.58 ms in get_probes(4)
  4.47 s in load_gse( series_id platform_id..., 4638, False)
367.60 ms in get_gene_fold_change(<Gse GSE28177>, False, 0, None)
Loading data for GSE28944, id = 4847


 data/geo_mirror/DATA/SeriesMatrix/GSE28177/GSE28177_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE28944/GSE28944_series_matrix.txt.gz ...
Cache to

 98.32 ms in get_probes(929)
  1.32 s in load_gse( series_id platform_id..., 4847, False)
 62.21 ms in get_gene_fold_change(<Gse GSE28944>, False, 0, None)
Loading data for GSE29536, id = 4997
254.43 ms in get_probes(17)
 29.27 s in load_gse( series_id platform_id..., 4997, False)
  1.12 s in get_gene_fold_change(<Gse GSE29536>, False, 0, None)
Loading data for GSE29939, id = 5106


 data/geo_mirror/DATA/SeriesMatrix/GSE28944/GSE28944_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE29939/GSE29939_series_matrix.txt.gz ...
Cache to

 98.96 ms in get_probes(929)
  2.02 s in load_gse( series_id platform_id..., 5106, False)
 74.39 ms in get_gene_fold_change(<Gse GSE29939>, False, 0, None)
Loading data for GSE30310, id = 5201
166.80 ms in get_probes(616)
  1.52 s in load_gse( series_id platform_id..., 5201, False)
188.16 ms in get_gene_fold_change(<Gse GSE30310>, False, 0, None)
Loading data for GSE33877, id = 6044


 data/geo_mirror/DATA/SeriesMatrix/GSE29939/GSE29939_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE33877/GSE33877_series_matrix.txt.gz ...
Cache to

256.82 ms in get_probes(389)
  4.35 s in load_gse( series_id platform_id..., 6044, False)
316.57 ms in get_gene_fold_change(<Gse GSE33877>, False, 0, None)
Loading data for GSE3489, id = 6268


 data/geo_mirror/DATA/SeriesMatrix/GSE33877/GSE33877_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE3489/GSE3489_series_matrix.txt.gz ...
Cache to

207.07 ms in get_probes(7)
  3.95 s in load_gse( series_id platform_id..., 6268, False)
192.77 ms in get_gene_fold_change(<Gse GSE3489>, False, 0, None)
Loading data for GSE35864, id = 6523


 data/geo_mirror/DATA/SeriesMatrix/GSE3489/GSE3489_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE35864/GSE35864_series_matrix.txt.gz ...
Cache to

307.98 ms in get_probes(4)
 18.49 s in load_gse( series_id platform_id..., 6523, False)
  1.03 s in get_gene_fold_change(<Gse GSE35864>, False, 0, None)
Loading data for GSE37250, id = 6847


 data/geo_mirror/DATA/SeriesMatrix/GSE35864/GSE35864_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE37250/GSE37250_series_matrix.txt.gz ...
Cache to

282.92 ms in get_probes(674)
 52.57 s in load_gse( series_id platform_id..., 6847, False)
  4.90 s in get_gene_fold_change(<Gse GSE37250>, False, 0, None)
Loading data for GSE39939, id = 7430


 data/geo_mirror/DATA/SeriesMatrix/GSE37250/GSE37250_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE39939/GSE39939_series_matrix.txt.gz ...
Cache to

282.28 ms in get_probes(674)
 22.46 s in load_gse( series_id platform_id..., 7430, False)
  1.47 s in get_gene_fold_change(<Gse GSE39939>, False, 0, None)
Loading data for GSE39940, id = 7431


 data/geo_mirror/DATA/SeriesMatrix/GSE39939/GSE39939_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE39940/GSE39940_series_matrix.txt.gz ...
Cache to

286.44 ms in get_probes(674)
 33.12 s in load_gse( series_id platform_id..., 7431, False)
  2.88 s in get_gene_fold_change(<Gse GSE39940>, False, 0, None)
Loading data for GSE4124, id = 7745


 data/geo_mirror/DATA/SeriesMatrix/GSE39940/GSE39940_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE4124/GSE4124_series_matrix.txt.gz ...
Cache to

241.10 ms in get_probes(9)
  4.37 s in load_gse( series_id platform_id..., 7745, False)
376.48 ms in get_gene_fold_change(<Gse GSE4124>, False, 0, None)
Loading data for GSE42058, id = 7937


 data/geo_mirror/DATA/SeriesMatrix/GSE4124/GSE4124_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE42058/GSE42058_series_matrix.txt.gz ...
Cache to

302.16 ms in get_probes(4)
  4.02 s in load_gse( series_id platform_id..., 7937, False)
366.17 ms in get_gene_fold_change(<Gse GSE42058>, False, 0, None)
Loading data for GSE511, id = 9682


 data/geo_mirror/DATA/SeriesMatrix/GSE42058/GSE42058_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE511/GSE511_series_matrix.txt.gz ...
Cache to

169.97 ms in get_probes(22)
  2.91 s in load_gse( series_id platform_id..., 9682, False)
124.27 ms in get_gene_fold_change(<Gse GSE511>, False, 0, None)
Loading data for GSE52900, id = 9963
288.19 ms in get_probes(389)
  2.07 s in load_gse( series_id platform_id..., 9963, False)
308.09 ms in get_gene_fold_change(<Gse GSE52900>, False, 0, None)
Loading data for GSE6740, id = 11026
246.26 ms in get_probes(2)
  2.51 s in load_gse( series_id platform_id..., 11026, False)
367.76 ms in get_gene_fold_change(<Gse GSE6740>, False, 0, None)
Loading data for GSE9927, id = 11872


 data/geo_mirror/DATA/SeriesMatrix/GSE511/GSE511_series_matrix.txt.gz
Loading URL ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SeriesMatrix/GSE9927/GSE9927_series_matrix.txt.gz ...
Cache to

308.08 ms in get_probes(4)
  5.58 s in load_gse( series_id platform_id..., 11872, False)
511.98 ms in get_gene_fold_change(<Gse GSE9927>, False, 0, None)
253.64 s in Load/fold for DOID_635
Meta-Analyzing DOID_635
 64.33 s in meta analysis of real data for DOID_635
313.04 ms in meta analysis of permutations for DOID_635
 20.03 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper \..., Empty DataFrame Column...)
 64.73 s in Meta analysis for DOID_635
DONE DOID_635 analysis
318.85 s in perform_analysis({'analysis_name': 'DOI...)


 data/geo_mirror/DATA/SeriesMatrix/GSE9927/GSE9927_series_matrix.txt.gz
49

Started DOID_7147 analysis
254.08 ms in get_analysis_df("ankylosing_spondyliti..., "ankylosing_spondyliti..., '')
254.90 ms in Loading dataframe for DOID_7147
Matching sources: 3
Excluded 1 source by min samples
Stats: 2 sources, 2 series, 2 platforms, 65 samples
Loading data and calculating fold change for DOID_7147
Loading data for GSE11886, id = 479
300.34 ms in get_probes(4)
  3.81 s in load_gse( series_id platform_id..., 479, False)
620.16 ms in get_gene_fold_change(<Gse GSE11886>, False, 0, None)
Loading data for GSE25101, id = 3902
283.35 ms in get_probes(389)
  1.46 s in load_gse( series_id platform_id..., 3902, False)
403.63 ms in get_gene_fold_change(<Gse GSE25101>, False, 0, None)
  6.43 s in Load/fold for DOID_7147
Meta-Analyzing DOID_7147
 28.29 s in meta analysis of real data for DOID_7147
 46.49 ms in meta analysis of permutations for DOID_7147
 13.11 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 28.42 

 DOID:7147 ankylosing spondylitis
50

Started DOID_7148 analysis
255.91 ms in get_analysis_df("RA == 'RA'", "RA_control == 'RA_con..., '')
256.76 ms in Loading dataframe for DOID_7148
Matching sources: 21
Excluded 1 source as single-class
Excluded 2 sources by min samples
Stats: 18 sources, 17 series, 13 platforms, 607 samples
Loading data and calculating fold change for DOID_7148
Loading data for GSE10500, id = 137
142.37 ms in get_probes(7)
524.58 ms in load_gse( series_id platform_id..., 137, False)
136.22 ms in get_gene_fold_change(<Gse GSE10500>, False, 0, None)
Loading data for GSE11827, id = 457
 32.18 ms in get_probes(36)
  1.47 s in load_gse( series_id platform_id..., 457, False)
 42.44 ms in get_gene_fold_change(<Gse GSE11827>, False, 0, None)
Loading data for GSE12021, id = 517
244.55 ms in get_probes(2)
183.13 ms in get_probes(50)
  2.80 s in load_gse( series_id platform_id..., 517, False)
424.25 ms in get_gene_fold_change(<Gse GSE12021>, False, 0, None)
Loading data for GSE13026, id = 799
 32.08 ms in get_prob

 DOID:7148 rheumatoid arthritis
51

Started DOID_7693 analysis
232.94 ms in get_analysis_df("AAA == 'AAA'", "AAA_control == 'AAA_c..., '')
233.78 ms in Loading dataframe for DOID_7693
Matching sources: 5
Excluded 1 source by min samples
Stats: 4 sources, 4 series, 3 platforms, 104 samples
Loading data and calculating fold change for DOID_7693
Loading data for GSE21803, id = 3020
 66.61 ms in get_probes(724)
226.97 ms in load_gse( series_id platform_id..., 3020, False)
 55.70 ms in get_gene_fold_change(<Gse GSE21803>, False, 0, None)
Loading data for GSE47472, id = 9057
316.97 ms in get_probes(674)
  2.13 s in load_gse( series_id platform_id..., 9057, False)
401.88 ms in get_gene_fold_change(<Gse GSE47472>, False, 0, None)
Loading data for GSE7084, id = 11146
246.02 ms in get_probes(39)
  1.78 s in load_gse( series_id platform_id..., 11146, False)
231.87 ms in get_gene_fold_change(<Gse GSE7084>, False, 0, None)
Loading data for GSE57691, id = 12645
312.23 ms in get_probes(674)
  5.00 s in load_gse( series_id platform_id..

 DOID:7693 abdominal aortic aneurysm
52

Started DOID_784 analysis
157.52 ms in get_analysis_df("CKD == 'CKD'", "CKD_Control == 'CKD_C..., '')
158.34 ms in Loading dataframe for DOID_784
Matching sources: 2
Excluded 1 source as single-class
FAIL Can't perform meta-analysis on single source
177.73 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:784 chronic kidney failure
53

Started DOID_824 analysis
134.66 ms in get_analysis_df("periodontitis == 'per..., "periodontitis_control..., '')
135.48 ms in Loading dataframe for DOID_824
Matching sources: 3
Excluded 1 source by min samples
Stats: 2 sources, 2 series, 2 platforms, 33 samples
Loading data and calculating fold change for DOID_824
Loading data for GSE27993, id = 4587
252.77 ms in get_probes(283)
  1.21 s in load_gse( series_id platform_id..., 4587, False)
236.04 ms in get_gene_fold_change(<Gse GSE27993>, False, 0, None)
Loading data for GSE43525, id = 8303
285.55 ms in get_probes(674)
  2.63 s in load_gse( series_id platform_id..., 8303, False)
405.62 ms in get_gene_fold_change(<Gse GSE43525>, False, 0, None)
  4.56 s in Load/fold for DOID_824
Meta-Analyzing DOID_824
 44.28 s in meta analysis of real data for DOID_824
 36.67 ms in meta analysis of permutations for DOID_824
 21.93 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 44.41 s i

 DOID:824 periodontitis
54

Started DOID_8577 analysis
248.86 ms in get_analysis_df("UC == 'UC'", "UC_control == 'UC_con..., '')
249.68 ms in Loading dataframe for DOID_8577
Matching sources: 18
Excluded 1 source as single-class
Stats: 17 sources, 17 series, 12 platforms, 838 samples
Loading data and calculating fold change for DOID_8577
Loading data for GSE10191, id = 56
242.72 ms in get_probes(25)
  1.46 s in load_gse( series_id platform_id..., 56, False)
259.38 ms in get_gene_fold_change(<Gse GSE10191>, False, 0, None)
Loading data for GSE10616, id = 169
252.95 ms in get_probes(25)
  2.87 s in load_gse( series_id platform_id..., 169, False)
278.55 ms in get_gene_fold_change(<Gse GSE10616>, False, 0, None)
Loading data for GSE11223, id = 317
288.43 ms in get_probes(26)
 12.29 s in load_gse( series_id platform_id..., 317, False)
  1.81 s in get_gene_fold_change(<Gse GSE11223>, False, 0, None)
Loading data for GSE1710, id = 1852
234.94 ms in get_probes(18)
  2.55 s in load_gse( series_id platform_id..., 1852, Fal

 DOID:8577 ulcerative colitis
55

Started DOID_8778 analysis
320.12 ms in get_analysis_df("CD == 'CD'", "CD_control == 'CD_con..., '')
321.02 ms in Loading dataframe for DOID_8778
Matching sources: 20
Excluded 1 source as single-class
Excluded 7 sources by min samples
Stats: 12 sources, 12 series, 11 platforms, 743 samples
Loading data and calculating fold change for DOID_8778
Loading data for GSE10616, id = 169
248.54 ms in get_probes(25)
  2.95 s in load_gse( series_id platform_id..., 169, False)
414.49 ms in get_gene_fold_change(<Gse GSE10616>, False, 0, None)
Loading data for GSE1710, id = 1852
243.31 ms in get_probes(18)
  2.66 s in load_gse( series_id platform_id..., 1852, False)
236.02 ms in get_gene_fold_change(<Gse GSE1710>, False, 0, None)
Loading data for GSE17594, id = 1976
292.89 ms in get_probes(10)
  1.74 s in load_gse( series_id platform_id..., 1976, False)
352.87 ms in get_gene_fold_change(<Gse GSE17594>, False, 0, None)
Loading data for GSE20881, id = 2803
293.60 ms in get_probes(26)
 10.09 s in load_

 DOID:8778 Crohn's disease
56

Started DOID_8893 analysis
294.17 ms in get_analysis_df("psoriasis == 'psorias..., "psoriasis_control == ..., '')
295.00 ms in Loading dataframe for DOID_8893
Matching sources: 11
Excluded 2 sources as single-class
Excluded 1 source by min samples
Stats: 8 sources, 8 series, 7 platforms, 429 samples
Loading data and calculating fold change for DOID_8893
Loading data for GSE13355, id = 878
309.96 ms in get_probes(4)
 15.46 s in load_gse( series_id platform_id..., 878, False)
  1.48 s in get_gene_fold_change(<Gse GSE13355>, False, 0, None)
Loading data for GSE32407, id = 5703
247.08 ms in get_probes(9)
  2.59 s in load_gse( series_id platform_id..., 5703, False)
449.40 ms in get_gene_fold_change(<Gse GSE32407>, False, 0, None)
Loading data for GSE47598, id = 9087
287.18 ms in get_probes(674)
  2.67 s in load_gse( series_id platform_id..., 9087, False)
423.82 ms in get_gene_fold_change(<Gse GSE47598>, False, 0, None)
Loading data for GSE57225, id = 10556
313.52 ms in get_probes(1010)
  5.

 DOID:8893 psoriasis
57

Started DOID_8986 analysis
174.28 ms in get_analysis_df("narcolepsy == 'narcol..., "narcolepsy_control ==..., '')
175.07 ms in Loading dataframe for DOID_8986
Matching sources: 1
FAIL Can't perform meta-analysis on single source
194.40 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:8986 narcolepsy
58

Started DOID_9008 analysis
195.27 ms in get_analysis_df("PsA == 'PsA'", "PsA_control == 'PsA_c..., '')
196.05 ms in Loading dataframe for DOID_9008
Matching sources: 3
Stats: 3 sources, 3 series, 2 platforms, 141 samples
Loading data and calculating fold change for DOID_9008
Loading data for GSE57383, id = 12627
305.71 ms in get_probes(901)
  9.03 s in load_gse( series_id platform_id..., 12627, False)
851.44 ms in get_gene_fold_change(<Gse GSE57383>, False, 0, None)
Loading data for GSE57405, id = 12629
298.31 ms in get_probes(901)
  9.02 s in load_gse( series_id platform_id..., 12629, False)
836.09 ms in get_gene_fold_change(<Gse GSE57405>, False, 0, None)
Loading data for GSE61281, id = 12998
323.58 ms in get_probes(53)
  4.55 s in load_gse( series_id platform_id..., 12998, False)
467.27 ms in get_gene_fold_change(<Gse GSE61281>, False, 0, None)
 24.96 s in Load/fold for DOID_9008
Meta-Analyzing DOID_9008
 54.43 s in meta analysis of real data for DOID_9008
 73.69 ms in meta analysis

 DOID:9008 psoriatic arthritis
59

Started DOID_9074 analysis
345.02 ms in get_analysis_df("SLE == 'SLE'", "SLE_control == 'SLE_c..., '')
345.84 ms in Loading dataframe for DOID_9074
Matching sources: 23
Excluded 3 sources as single-class
Excluded 1 source by min samples
Stats: 19 sources, 18 series, 12 platforms, 1375 samples
Loading data and calculating fold change for DOID_9074
Loading data for GSE10325, id = 92
238.34 ms in get_probes(2)
  2.32 s in load_gse( series_id platform_id..., 92, False)
494.25 ms in get_gene_fold_change(<Gse GSE10325>, False, 0, None)
Loading data for GSE11907, id = 488
234.34 ms in get_probes(2)
199.81 ms in get_probes(50)
 16.03 s in load_gse( series_id platform_id..., 488, False)
  1.11 s in get_gene_fold_change(<Gse GSE11907>, False, 0, None)
Loading data for GSE12374, id = 618
202.14 ms in get_probes(51)
  1.23 s in load_gse( series_id platform_id..., 618, False)
213.38 ms in get_gene_fold_change(<Gse GSE12374>, False, 0, None)
Loading data for GSE17755, id = 2014
208.38 ms in get_prob

 DOID:9074 systemic lupus erythematosus
60

Started DOID_9206 analysis
288.95 ms in get_analysis_df("BE_Tissue == 'BE_Tiss..., "EAC_Non_Tumor == 'EAC..., '')
289.78 ms in Loading dataframe for DOID_9206
Matching sources: 16
Excluded 10 sources as single-class
Excluded 2 sources by min samples
Stats: 4 sources, 4 series, 3 platforms, 155 samples
Loading data and calculating fold change for DOID_9206
Loading data for GSE34619, id = 6215
244.94 ms in get_probes(283)
  1.79 s in load_gse( series_id platform_id..., 6215, False)
302.71 ms in get_gene_fold_change(<Gse GSE34619>, False, 0, None)
Loading data for GSE36223, id = 6608
183.92 ms in get_probes(9)
  1.19 s in load_gse( series_id platform_id..., 6608, False)
376.56 ms in get_gene_fold_change(<Gse GSE36223>, False, 0, None)
Loading data for GSE39491, id = 7332
242.15 ms in get_probes(9)
  4.38 s in load_gse( series_id platform_id..., 7332, False)
558.40 ms in get_gene_fold_change(<Gse GSE39491>, False, 0, None)
Loading data for GSE6059, id = 10854
 64.46 ms in get_probes(1512)


 DOID:9206 Barrett's esophagus
61

Started DOID_9296 analysis
156.99 ms in get_analysis_df("cleft_lip == 'cleft_l..., "cleft_lip_control == ..., '')
157.85 ms in Loading dataframe for DOID_9296
Matching sources: 1
FAIL Can't perform meta-analysis on single source
176.49 ms in perform_analysis({'analysis_name': 'DOI...)


 DOID:9296 cleft lip
62

Started DOID_9352 analysis
279.50 ms in get_analysis_df("T2D == 'T2D'", "T2D_control == 'T2D_c..., '')
280.35 ms in Loading dataframe for DOID_9352
Matching sources: 36
Excluded 4 sources as single-class
Excluded 1 source by min samples
Stats: 31 sources, 25 series, 19 platforms, 854 samples
Loading data and calculating fold change for DOID_9352
Loading data for GSE121, id = 539
101.46 ms in get_probes(338)
 99.93 ms in get_probes(339)
 80.11 ms in get_probes(22)
106.16 ms in get_probes(340)
 70.81 ms in get_probes(341)
  2.32 s in load_gse( series_id platform_id..., 539, False)
399.14 ms in get_gene_fold_change(<Gse GSE121>, False, 0, None)
Loading data for GSE12643, id = 697
170.65 ms in get_probes(7)
828.66 ms in load_gse( series_id platform_id..., 697, False)
168.45 ms in get_gene_fold_change(<Gse GSE12643>, False, 0, None)
Loading data for GSE13015, id = 796
254.27 ms in get_probes(343)
284.99 ms in get_probes(389)
  9.62 s in load_gse( series_id platform_id..., 796, False)
546.76

 DOID:9352 type 2 diabetes mellitus
63

Started DOID_9744 analysis
358.45 ms in get_analysis_df("T1D == 'T1D'", "T1D_control == 'T1D_c..., '')
359.30 ms in Loading dataframe for DOID_9744
Matching sources: 13
Excluded 3 sources as single-class
Stats: 10 sources, 8 series, 6 platforms, 416 samples
Loading data and calculating fold change for DOID_9744
Loading data for GSE10586, id = 157
304.67 ms in get_probes(4)
  3.13 s in load_gse( series_id platform_id..., 157, False)
587.12 ms in get_gene_fold_change(<Gse GSE10586>, False, 0, None)
Loading data for GSE11907, id = 488
243.75 ms in get_probes(2)
213.58 ms in get_probes(50)
 16.24 s in load_gse( series_id platform_id..., 488, False)
545.96 ms in get_gene_fold_change(<Gse GSE11907>, False, 0, None)
Loading data for GSE14368, id = 1122
221.67 ms in get_probes(331)
  1.04 s in load_gse( series_id platform_id..., 1122, False)
239.08 ms in get_gene_fold_change(<Gse GSE14368>, False, 0, None)
Loading data for GSE24147, id = 3647
301.39 ms in get_probes(4)
  4.86 s in load_gse( se

 DOID:9744 type 1 diabetes mellitus
64

Started DOID_986 analysis
175.31 ms in get_analysis_df("alopecia_areata == 'a..., "alopecia_areata_contr..., '')
176.15 ms in Loading dataframe for DOID_986
Matching sources: 2
Stats: 2 sources, 2 series, 1 platforms, 17 samples
Loading data and calculating fold change for DOID_986
Loading data for GSE45512, id = 8684
299.20 ms in get_probes(4)
  1.87 s in load_gse( series_id platform_id..., 8684, False)
422.44 ms in get_gene_fold_change(<Gse GSE45512>, False, 0, None)
Loading data for GSE58573, id = 10697
301.78 ms in get_probes(4)
  1.59 s in load_gse( series_id platform_id..., 10697, False)
366.88 ms in get_gene_fold_change(<Gse GSE58573>, False, 0, None)
  4.35 s in Load/fold for DOID_986
Meta-Analyzing DOID_986
 48.44 s in meta analysis of real data for DOID_986
 55.11 ms in meta analysis of permutations for DOID_986
 19.07 mks in get_balanced_permutations({'analysis_name': 'DOI...,  C H H_lower H_upper I..., Empty DataFrame Column...)
 48.58 s in Meta analysis for DOID_986
DONE D

 DOID:986 alopecia areata
65

Started DOID_9970 analysis
369.50 ms in get_analysis_df("obesity == 'obesity'", "obesity_control == 'o..., '')
370.46 ms in Loading dataframe for DOID_9970
Matching sources: 43
Excluded 23 sources as single-class
Stats: 20 sources, 14 series, 16 platforms, 561 samples
Loading data and calculating fold change for DOID_9970
Loading data for GSE12050, id = 521
308.65 ms in get_probes(332)
  2.33 s in load_gse( series_id platform_id..., 521, False)
437.75 ms in get_gene_fold_change(<Gse GSE12050>, False, 0, None)
Loading data for GSE15524, id = 1461
215.90 ms in get_probes(500)
  1.47 s in load_gse( series_id platform_id..., 1461, False)
255.22 ms in get_gene_fold_change(<Gse GSE15524>, False, 0, None)
Loading data for GSE2508, id = 3892
108.61 ms in get_probes(7)
109.37 ms in get_probes(8)
136.71 ms in get_probes(13)
107.08 ms in get_probes(14)
134.10 ms in get_probes(15)
170.68 ms in get_probes(16)
  5.15 s in load_gse( series_id platform_id..., 3892, False)
770.97 ms in get_gene_fold_ch

 DOID:9970 obesity


In [5]:
error_df = pandas.DataFrame(errors)
error_df = query_df.merge(error_df)
error_df

Unnamed: 0,slim_id,slim_name,case_query,control_query,error,name
0,DOID:11054,urinary bladder cancer,bladder_cancer == 'bladder_cancer',bladder_cancer_control == 'bladder_cancer_cont...,'Int64Index' object has no attribute 'labels',DOID_11054
1,DOID:11714,gestational diabetes,GDM == 'GDM',GDM_control == 'GDM_control','mygene_sym',DOID_11714
2,DOID:12849,autistic disorder,autism == 'autism',autism_control == 'autism_control',zero-size array to reduction operation maximum...,DOID_12849
3,DOID:14221,metabolic syndrome X,MetS == 'MetS',MetS_Control == 'MetS_Control','Int64Index' object has no attribute 'labels',DOID_14221
4,DOID:2998,testicular cancer,testicular_cancer == 'testicular_cancer',testicular_cancer_control == 'testicular_cance...,'Int64Index' object has no attribute 'labels',DOID_2998
5,DOID:3083,chronic obstructive pulmonary disease,COPD == 'COPD',COPD_control == 'COPD_control',"Can't find matrix file for series 6962, platfo...",DOID_3083
6,DOID:4606,bile duct cancer,bile_duct_cancer == 'bile_duct_cancer',bile_duct_cancer_control == 'bile_duct_cancer_...,'Int64Index' object has no attribute 'labels',DOID_4606


In [6]:
error_df.to_csv('data/errors.csv', index=False)