In [1]:
#Importing a series of packages used throughout the pipeline
import GEOparse
import pandas as pd
import numpy as np
import os
import json
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import quantile_transform
from sklearn.decomposition import PCA
import requests
os.chdir('../Data')
os.chdir('../Scripts')
from microarray_analysis import *

### Below are your inputs, the GEO accession ID for your study and the control and treated samples you wish to analyze

In [2]:
#The series accession id for the study you are analyzing
geo_accession_id = "GSE54917"
#Separating control and treated samples
control_samples = ['GSM1326549', 'GSM1326550', 'GSM1326551']
treated_samples = ['GSM1326552', 'GSM1326553', 'GSM1326554', 'GSM1326555']

In [3]:
#Creating a dictionary of assigned control and treated samples
control_samples = { i : 'control' for i in control_samples }
treated_samples = { i : 'treated' for i in treated_samples }
all_samples = merge(control_samples, treated_samples) 
print(all_samples) 

{'GSM1326549': 'control', 'GSM1326550': 'control', 'GSM1326551': 'control', 'GSM1326552': 'treated', 'GSM1326553': 'treated', 'GSM1326554': 'treated', 'GSM1326555': 'treated'}


In [4]:
#Parse the GEO data using the Accession ID
gse = GEOparse.get_GEO(geo=geo_accession_id, destdir="./")

01-Aug-2019 16:18:43 INFO GEOparse - File already exist: using local version.
01-Aug-2019 16:18:43 INFO GEOparse - Parsing ./GSE54917_family.soft.gz: 
01-Aug-2019 16:18:43 DEBUG GEOparse - DATABASE: GeoMiame
01-Aug-2019 16:18:43 DEBUG GEOparse - SERIES: GSE54917
01-Aug-2019 16:18:43 DEBUG GEOparse - PLATFORM: GPL11180
  return DataFrame.from_csv(StringIO(data), index_col=None, sep="\t")
  gpls[entry_name] = parse_GPL(data_group, entry_name)
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326543
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326544
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326545
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326546
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326547
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326548
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326549
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326550
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMPLE: GSM1326551
01-Aug-2019 16:18:44 DEBUG GEOparse - SAMP

In [5]:
#Create a list of samples to use in the development of the expression matrix
list_samples = list(all_samples.keys())
list_samples

['GSM1326549',
 'GSM1326550',
 'GSM1326551',
 'GSM1326552',
 'GSM1326553',
 'GSM1326554',
 'GSM1326555']

In [6]:
#Visualization of expression matrix
pivoted_samples = gse.pivot_samples('VALUE')[list_samples]
pivoted_samples.head()

name,GSM1326549,GSM1326550,GSM1326551,GSM1326552,GSM1326553,GSM1326554,GSM1326555
ID_REF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1415670_PM_at,9.682183,9.880824,9.432104,9.331813,9.467232,9.699046,9.596045
1415671_PM_at,10.084613,10.526075,9.817309,9.406962,9.771655,10.012961,10.02005
1415672_PM_at,6.978157,6.598286,7.323166,7.056302,7.114557,6.97889,7.064719
1415673_PM_at,7.087708,7.486799,6.820598,7.089049,7.082327,7.136154,7.08828
1415674_PM_a_at,7.11153,7.386174,6.989748,6.809202,6.778237,7.019381,6.880839


In [7]:
#Determine the total amount of probes used in the study
pivoted_samples_average = pivoted_samples.median(axis=1)
print("Number of probes before filtering: ", len(pivoted_samples_average))

Number of probes before filtering:  45141


In [8]:
#Filtering out unexpressed probes
expression_threshold = pivoted_samples_average.quantile(0.3)
expressed_probes = pivoted_samples_average[pivoted_samples_average >= expression_threshold].index.tolist()
print("number of probes above threshold: ", len(expressed_probes))

number of probes above threshold:  31599


In [9]:
#Redefine expression data using only the expressed probes
exprsdata = gse.pivot_samples("VALUE").loc[expressed_probes]
exprsdata = exprsdata.T
#Deletes additional samples that aren't being analyzed
exprsdata = exprsdata[exprsdata.index.isin(list_samples)]
#Drop any probe columns where expression data is missing or negative
exprsdata.dropna(axis = 1)

ID_REF,1415670_PM_at,1415671_PM_at,1415672_PM_at,1415673_PM_at,1415674_PM_a_at,1415675_PM_at,1415676_PM_a_at,1415677_PM_at,1415678_PM_at,1415679_PM_at,...,AFFX-r2-Ec-bioB-M_at,AFFX-r2-Ec-bioC-3_at,AFFX-r2-Ec-bioC-5_at,AFFX-r2-Ec-bioD-3_at,AFFX-r2-Ec-bioD-5_at,AFFX-r2-P1-cre-3_at,AFFX-r2-P1-cre-5_at,AFFX-r2-TagB_at,AFFX-r2-TagIN-3_at,AFFX-r2-TagO-5_at
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM1326549,9.682183,10.084613,6.978157,7.087708,7.11153,7.428983,10.856926,9.414557,10.443569,10.480751,...,7.511447,8.52224,8.905374,10.72636,10.942361,12.339005,12.268646,4.07753,4.551598,4.359745
GSM1326550,9.880824,10.526075,6.598286,7.486799,7.386174,7.868106,11.002599,9.757142,10.22074,10.90589,...,8.13388,9.033456,9.441275,10.880152,11.125984,12.235052,12.275333,4.298606,4.497361,4.454506
GSM1326551,9.432104,9.817309,7.323166,6.820598,6.989748,7.412727,10.726939,9.199426,10.334905,10.303043,...,7.394142,8.420644,8.760909,10.715907,10.940048,12.32888,12.276473,4.093229,4.274816,4.393254
GSM1326552,9.331813,9.406962,7.056302,7.089049,6.809202,7.423941,10.681481,9.019493,10.471583,10.280853,...,8.008407,8.972226,9.40439,11.066611,11.312504,12.645738,12.63046,4.001587,4.692505,4.655139
GSM1326553,9.467232,9.771655,7.114557,7.082327,6.778237,7.428983,10.782074,9.161784,10.519036,10.386653,...,7.728326,8.708584,9.075278,10.860182,11.035452,12.424336,12.383585,4.045578,4.522659,4.502272
GSM1326554,9.699046,10.012961,6.97889,7.136154,7.019381,7.536684,10.936911,9.479117,10.430231,10.640736,...,7.765818,8.648662,9.046145,10.687108,10.911692,12.339227,12.276109,4.094186,4.217038,4.3763
GSM1326555,9.596045,10.02005,7.064719,7.08828,6.880839,7.362699,10.799768,9.378351,10.343568,10.290654,...,8.583238,9.469922,9.910788,11.66271,11.859479,13.131364,13.095672,3.800621,4.158638,4.388169


In [10]:
#Quantile normalization of data
rank_mean = exprsdata.stack().groupby(exprsdata.rank(method='first').stack().astype(int)).mean()
exprsdata.rank(method='min').stack().astype(int).map(rank_mean).unstack()

ID_REF,1415670_PM_at,1415671_PM_at,1415672_PM_at,1415673_PM_at,1415674_PM_a_at,1415675_PM_at,1415676_PM_a_at,1415677_PM_at,1415678_PM_at,1415679_PM_at,...,AFFX-r2-Ec-bioB-M_at,AFFX-r2-Ec-bioC-3_at,AFFX-r2-Ec-bioC-5_at,AFFX-r2-Ec-bioD-3_at,AFFX-r2-Ec-bioD-5_at,AFFX-r2-P1-cre-3_at,AFFX-r2-P1-cre-5_at,AFFX-r2-TagB_at,AFFX-r2-TagIN-3_at,AFFX-r2-TagO-5_at
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GSM1326549,6.330278,6.432779,6.07489,6.168083,6.432779,6.245189,6.330278,6.330278,6.330278,6.330278,...,6.07489,6.07489,6.07489,6.168083,6.168083,6.168083,5.907094,6.245189,6.432779,5.907094
GSM1326550,6.600439,6.600439,5.907094,6.600439,6.600439,6.600439,6.600439,6.600439,5.907094,6.600439,...,6.432779,6.432779,6.432779,6.330278,6.330278,5.907094,6.07489,6.600439,6.245189,6.330278
GSM1326551,6.07489,6.168083,6.600439,5.907094,6.245189,6.07489,6.07489,6.168083,6.07489,6.168083,...,5.907094,5.907094,5.907094,6.07489,6.07489,6.07489,6.245189,6.330278,6.168083,6.245189
GSM1326552,5.907094,5.907094,6.245189,6.330278,6.07489,6.168083,5.907094,5.907094,6.432779,5.907094,...,6.330278,6.330278,6.330278,6.432779,6.432779,6.432779,6.432779,6.07489,6.600439,6.600439
GSM1326553,6.168083,6.07489,6.432779,6.07489,5.907094,6.245189,6.168083,6.07489,6.600439,6.245189,...,6.168083,6.245189,6.245189,6.245189,6.245189,6.330278,6.330278,6.168083,6.330278,6.432779
GSM1326554,6.432779,6.245189,6.168083,6.432779,6.330278,6.432779,6.432779,6.432779,6.245189,6.432779,...,6.245189,6.168083,6.168083,5.907094,5.907094,6.245189,6.168083,6.432779,6.07489,6.07489
GSM1326555,6.245189,6.330278,6.330278,6.245189,6.168083,5.907094,6.245189,6.245189,6.168083,6.07489,...,6.600439,6.600439,6.600439,6.600439,6.600439,6.600439,6.600439,5.907094,5.907094,6.168083


In [11]:
#Compute PCA
pca = PCA(n_components=3)
principalComponents = pca.fit_transform(exprsdata)
principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2', 'principal component 3'])

In [12]:
#Making Dataframe of samples to concatenate with principal components
samplesDf = pd.DataFrame.from_dict(all_samples, orient = 'index', columns = ['type'])
samplesDf.reset_index(inplace=True)

In [13]:
#Concatenate sample data with PCA data
principalDf = pd.concat([samplesDf, principalDf], axis=1)
principalDf

Unnamed: 0,index,type,principal component 1,principal component 2,principal component 3
0,GSM1326549,control,8.594644,0.36974,9.84085
1,GSM1326550,control,74.705664,8.868083,8.417663
2,GSM1326551,control,-40.998394,-14.577642,23.329922
3,GSM1326552,treated,-37.498969,17.615843,-2.380319
4,GSM1326553,treated,-20.004035,22.974763,-11.090719
5,GSM1326554,treated,19.612695,-12.432511,-7.995981
6,GSM1326555,treated,-4.411605,-22.818275,-20.121416


In [14]:
#PCA scatter plot
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize = (12,12))
ax = fig.gca(projection='3d')
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_zlabel('Principal Component 3', fontsize = 15)
ax.set_title('3 Component PCA', fontsize = 20)

types = ('control', 'treated')
colors = ['green', 'violet']
for type, color in zip(types, colors):
    indicesToKeep = principalDf['type'] == type
    ax.scatter(principalDf.loc[indicesToKeep, 'principal component 1'], 
               principalDf.loc[indicesToKeep, 'principal component 2'], principalDf.loc[indicesToKeep, 'principal component 3'], c = color, s = 50)
ax.legend(types)

<matplotlib.legend.Legend at 0x1a23169c88>

In [15]:
#Calculate variance ratio
pca.explained_variance_ratio_

array([0.6436185 , 0.12167545, 0.08828647])

In [16]:
#Transpose data matrix for sorting, index correlated to probe IDs
exprsdata = exprsdata.T
exprsdata

name,GSM1326549,GSM1326550,GSM1326551,GSM1326552,GSM1326553,GSM1326554,GSM1326555
ID_REF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1415670_PM_at,9.682183,9.880824,9.432104,9.331813,9.467232,9.699046,9.596045
1415671_PM_at,10.084613,10.526075,9.817309,9.406962,9.771655,10.012961,10.020050
1415672_PM_at,6.978157,6.598286,7.323166,7.056302,7.114557,6.978890,7.064719
1415673_PM_at,7.087708,7.486799,6.820598,7.089049,7.082327,7.136154,7.088280
1415674_PM_a_at,7.111530,7.386174,6.989748,6.809202,6.778237,7.019381,6.880839
1415675_PM_at,7.428983,7.868106,7.412727,7.423941,7.428983,7.536684,7.362699
1415676_PM_a_at,10.856926,11.002599,10.726939,10.681481,10.782074,10.936911,10.799768
1415677_PM_at,9.414557,9.757142,9.199426,9.019493,9.161784,9.479117,9.378351
1415678_PM_at,10.443569,10.220740,10.334905,10.471583,10.519036,10.430231,10.343568
1415679_PM_at,10.480751,10.905890,10.303043,10.280853,10.386653,10.640736,10.290654


In [17]:
#Sort expression matrix using 800 genes with greatest variance
variances = np.var(exprsdata, axis=1)
srt_idx = variances.argsort()[::-1]
data_sub = exprsdata.iloc[srt_idx].iloc[:800]
data_sub.index = data_sub.index.map(str)
data_sub

name,GSM1326549,GSM1326550,GSM1326551,GSM1326552,GSM1326553,GSM1326554,GSM1326555
ID_REF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1415994_PM_at,7.220758,9.889383,5.246760,8.567014,10.330730,9.177822,9.148437
1422651_PM_at,6.307225,8.746259,5.122033,7.662396,9.836162,8.116202,8.516480
1447733_PM_x_at,5.632917,3.976951,6.361501,7.185739,7.232304,6.687746,8.722426
1436453_PM_at,3.679207,6.011494,3.091251,3.886726,6.645105,5.713522,6.292786
1455696_PM_a_at,6.908131,4.675551,8.862734,8.657529,8.088245,7.171424,7.397144
1417600_PM_at,5.244625,4.649418,7.720450,8.509593,7.424009,5.933650,6.706952
1433924_PM_at,7.915459,5.986463,9.818358,9.922026,9.467739,8.783365,8.895620
1417867_PM_at,8.313750,10.893579,7.297770,9.481550,11.178194,9.841375,9.969093
1448665_PM_at,5.374812,3.599657,7.429408,7.222130,6.538073,5.181166,5.788622
1442715_PM_at,5.216212,4.227061,7.167384,7.559747,6.911558,5.162193,5.665372


In [18]:
#Extract probe ids from data
probeids = list(data_sub.index)
probeids

['1415994_PM_at',
 '1422651_PM_at',
 '1447733_PM_x_at',
 '1436453_PM_at',
 '1455696_PM_a_at',
 '1417600_PM_at',
 '1433924_PM_at',
 '1417867_PM_at',
 '1448665_PM_at',
 '1442715_PM_at',
 '1457304_PM_at',
 '1415965_PM_at',
 '1428083_PM_at',
 '1423439_PM_at',
 '1415964_PM_at',
 '1439617_PM_s_at',
 '1417356_PM_at',
 '1424252_PM_at',
 '1436871_PM_at',
 '1440182_PM_at',
 '1417307_PM_at',
 '1428536_PM_at',
 '1437598_PM_at',
 '1439399_PM_a_at',
 '1460139_PM_at',
 '1425260_PM_at',
 '1436240_PM_at',
 '1455300_PM_at',
 '1457327_PM_at',
 '1449095_PM_at',
 '1431610_PM_at',
 '1446947_PM_at',
 '1443534_PM_at',
 '1425522_PM_at',
 '1447767_PM_at',
 '1452774_PM_at',
 '1420992_PM_at',
 '1457017_PM_at',
 '1447859_PM_at',
 '1446127_PM_at',
 '1426124_PM_a_at',
 '1428539_PM_at',
 '1422108_PM_at',
 '1456089_PM_at',
 '1433537_PM_at',
 '1447345_PM_at',
 '1437862_PM_at',
 '1446603_PM_at',
 '1447977_PM_x_at',
 '1454774_PM_at',
 '1428467_PM_at',
 '1453688_PM_at',
 '1415872_PM_at',
 '1442710_PM_at',
 '1435391_PM_at'

In [19]:
#Upload annotation file as dictionary
dict1 = {}
with open('../data/probe2gene.txt') as f:
    for line in f:
        line = line.strip()
        (platform, probe, symbol) = line.split()
        dict1[probe] = symbol

In [20]:
#Examine how many ids are duplicates for gene symbols/unmatched
len(set(probeids) - dict1.keys())

257

In [21]:
#Reset index and replace with gene symbols, view as dataframe
exprsdata = pd.DataFrame(exprsdata)
exprsdata['symbol'] = exprsdata.index.to_series().map(dict1)
exprsdata.reset_index(inplace=True)
data = exprsdata.set_index('symbol')
#Drop probe id column
data = data.drop('ID_REF', axis=1)
data

name,GSM1326549,GSM1326550,GSM1326551,GSM1326552,GSM1326553,GSM1326554,GSM1326555
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
COPG1,9.682183,9.880824,9.432104,9.331813,9.467232,9.699046,9.596045
ATP6V0D1,10.084613,10.526075,9.817309,9.406962,9.771655,10.012961,10.020050
GOLGA7,6.978157,6.598286,7.323166,7.056302,7.114557,6.978890,7.064719
PSPH,7.087708,7.486799,6.820598,7.089049,7.082327,7.136154,7.088280
TRAPPC4,7.111530,7.386174,6.989748,6.809202,6.778237,7.019381,6.880839
DPM2,7.428983,7.868106,7.412727,7.423941,7.428983,7.536684,7.362699
PSMB5,10.856926,11.002599,10.726939,10.681481,10.782074,10.936911,10.799768
DHRS1,9.414557,9.757142,9.199426,9.019493,9.161784,9.479117,9.378351
PPM1A,10.443569,10.220740,10.334905,10.471583,10.519036,10.430231,10.343568
PSENEN,10.480751,10.905890,10.303043,10.280853,10.386653,10.640736,10.290654


In [22]:
#Drop rows that aren't associated with a particular gene symbol
data = data.reset_index().dropna().set_index('symbol')
data

name,GSM1326549,GSM1326550,GSM1326551,GSM1326552,GSM1326553,GSM1326554,GSM1326555
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
COPG1,9.682183,9.880824,9.432104,9.331813,9.467232,9.699046,9.596045
ATP6V0D1,10.084613,10.526075,9.817309,9.406962,9.771655,10.012961,10.020050
GOLGA7,6.978157,6.598286,7.323166,7.056302,7.114557,6.978890,7.064719
PSPH,7.087708,7.486799,6.820598,7.089049,7.082327,7.136154,7.088280
TRAPPC4,7.111530,7.386174,6.989748,6.809202,6.778237,7.019381,6.880839
DPM2,7.428983,7.868106,7.412727,7.423941,7.428983,7.536684,7.362699
PSMB5,10.856926,11.002599,10.726939,10.681481,10.782074,10.936911,10.799768
DHRS1,9.414557,9.757142,9.199426,9.019493,9.161784,9.479117,9.378351
PPM1A,10.443569,10.220740,10.334905,10.471583,10.519036,10.430231,10.343568
PSENEN,10.480751,10.905890,10.303043,10.280853,10.386653,10.640736,10.290654


In [23]:
#Standardized data to a text file
data_file = ('../expression_matrix_top800_genes.txt')
data.to_csv(data_file, sep='\t')
data_file

'../expression_matrix_top800_genes.txt'

In [24]:
#Import required packages for characteristic direction and utilize warning statements
import warnings
from scipy.stats import chi2
from scipy.stats.mstats import zscore
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=RuntimeWarning) 

In [25]:
#Make sample classes, ensure that there is a distinction between control/treated samples
data_cd = {}

sample_classes = {}
sample_class = np.zeros(data.shape[1], dtype=np.int32)
sample_class[samplesDf['type'].values == 'control'] = 1
sample_class[samplesDf['type'].values == 'treated'] = 2
sample_classes = sample_class

print(sample_classes)

[1 1 1 2 2 2 2]


In [26]:
#CD results
cd_res = chdir(data.values, sample_classes, data.index, gamma=.5, sort=False, calculate_sig=False)
cd_coefs = np.array(list(map(lambda x: x[0], cd_res)))

srt_idx = np.abs(cd_coefs).argsort()[::-1]
cd_coefs = cd_coefs[srt_idx][:600]
sorted_DEGs = data.index[srt_idx][:600]
up_genes = dict(zip(sorted_DEGs[cd_coefs > 0], cd_coefs[cd_coefs > 0]))
dn_genes = dict(zip(sorted_DEGs[cd_coefs < 0], cd_coefs[cd_coefs < 0]))
data_cd['up'] = up_genes
data_cd['dn'] = dn_genes

In [27]:
#Retrieve up and down gene sets
up_list = list(up_genes.keys())
dn_list = list(dn_genes.keys())

In [28]:
import json
import requests

ENRICHR_URL = 'https://amp.pharm.mssm.edu/Enrichr'

def _enrichr_add_list(genes, meta=''):
    genes_str = '\n'.join(genes)
    payload = {
        'list': (None, genes_str),
        'description': (None, meta)
    }
    # POST genes to the /addList endpoint
    response = requests.post("%s/addList" % ENRICHR_URL, files=payload)
    list_ids = json.loads(response.text)
    return list_ids

def enrichr_link(genes, meta=''):
    list_ids = _enrichr_add_list(genes, meta)
    shortId = list_ids['shortId']
    link = '%s/enrich?dataset=%s' % (ENRICHR_URL, shortId)
    return link

In [29]:
#Print Enrichr links for further analysis
for key, d in data_cd.items():
    time.sleep(1)
    genes = list(data_cd[key].keys())
    genes = [str(g) for g in genes]
    link = enrichr_link(genes, key)
    print(key)
    print(link)

up
https://amp.pharm.mssm.edu/Enrichr/enrich?dataset=5661d17d7606ea995d76875ab180fe68
dn
https://amp.pharm.mssm.edu/Enrichr/enrich?dataset=8ffebe6b83be738677653ee78dda99cb
