Shared Gene Dysregulation in Multiple SCA Models
========================
This notebook takes .csv notebooks containing lists of dysregulated genes and their gene expression changes (relative to WT), and seeks to determine what genes are dysregulated across multiple models of ataxia


Written by Ravi Chopra (chopra.r@wustl.edu) and John Cooper (jpcoope@utexas.edu)<br>
Last commit: 5/18/2020<br>
Python version: Python 3.6.3<br>
Modules: pandas 1.0.3, numpy 1.13.3, scipy 0.19.1, matplotlib 2.1.0, upsetplot 0.4.0 

In [1]:
#import statements
from itertools import combinations
import pandas as pd
import numpy as np
import scipy.stats as stats
import random
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from upsetplot import plot, from_memberships, from_contents
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('default')
matplotlib.rcParams['font.sans-serif'] = 'Arial'

In [2]:
def clean_dataframe(df):
    
    """ Return a cleaned dataframe with NaN rows removed and duplicate
        fold change measurements averaged """
    
    # Select all rows from the df that don't have NA
    clean_df = df.loc[df['Gene name'].notnull(), :]
    # Select only rows with Gene names that are duplicated
    dup_df = clean_df[clean_df.duplicated(subset='Gene name',keep=False)]
    dup_df = dup_df.sort_values(by=['Gene name'])
    try: # won't work if no duplicates to average
        # Average duplicate fold change measurements
        dup_df = dup_df.groupby('Gene name',as_index=False).mean()
        dup_df = dup_df.round(3)
    except:
        print(f'No duplicated gene names in dataset for df with column 1: {df.columns[1]}')
        pass
    # Drop rows from the original dataframe that are in the duplicate df
    cond = clean_df['Gene name'].isin(dup_df['Gene name'])
    clean_df.drop(clean_df[cond].index, inplace = True)
    clean_df = clean_df.append(dup_df)
    clean_df = clean_df.reset_index(drop=True)
    
    del dup_df
    return clean_df

In [3]:
def get_combinations(names_list, k):
    
    """ Return a list of unique combinations (each element is a tuple)
        from the names_list """
    
    return list(combinations(names_list, k))

def find_pairwise_overlaps(dfs_dict):
    
    """ Return a dataframe with a column holding the overlapping
        set of 'Gene name's for each unique pair of models in the
        dfs_dict """

    model_pairs = get_combinations(names_list=list(dfs_dict.keys()), k=2)
    overlaps_dict = {}
    
    for combi_tuple in model_pairs:
        # create a name to be used for this combination's 
        # dataframe column
        combi_name = '-'.join(combi_tuple)
        # find overlap between the two model gene name columns
        df_1 = dfs_dict[combi_tuple[0]]
        df_2 = dfs_dict[combi_tuple[1]]
        overlap_df = pd.merge(df_1, df_2, on='Gene name')

        overlaps_dict[combi_name] = overlap_df['Gene name']        
        
    overlaps_df = pd.DataFrame(overlaps_dict)
    return overlaps_df
    
def find_triplet_overlaps(dfs_dict):
    
    """ Return a dataframe with a column holding the overlapping
        set of 'Gene name's for each unique unique group of three 
        models in the dfs_dict """

    model_trips = get_combinations(names_list=list(dfs_dict.keys()), k=3)
    overlaps_dict = {}
    
    for combi_tuple in model_trips:
        # create a name to be used for this combination's 
        # dataframe column
        combi_name = '-'.join(combi_tuple)
        # find overlap between the two model gene name columns
        df_1 = dfs_dict[combi_tuple[0]]
        df_2 = dfs_dict[combi_tuple[1]]
        df_3 = dfs_dict[combi_tuple[2]]
        
        overlap_df = pd.merge(df_1, df_2, on='Gene name')
        overlap_df = pd.merge(overlap_df, df_3, on='Gene name')

        overlaps_dict[combi_name] = overlap_df['Gene name']        
        
    overlaps_df = pd.DataFrame(overlaps_dict)
    
    return overlaps_df

def find_quad_overlaps(dfs_dict):
    
    """ Return a dataframe with a column holding the overlapping
        set of 'Gene name's across all models in dfs_dict """

    model_trips = get_combinations(names_list=list(dfs_dict.keys()), k=4)
    overlaps_dict = {}
    
    for combi_tuple in model_trips:
        # create a name to be used for this combination's 
        # dataframe column
        combi_name = '-'.join(combi_tuple)
        # find overlap between the two model gene name columns
        df_1 = dfs_dict[combi_tuple[0]]
        df_2 = dfs_dict[combi_tuple[1]]
        df_3 = dfs_dict[combi_tuple[2]]
        df_4 = dfs_dict[combi_tuple[3]]
        
        overlap_df = pd.merge(df_1, df_2, on='Gene name')
        overlap_df = pd.merge(overlap_df, df_3, on='Gene name')
        overlap_df = pd.merge(overlap_df, df_4, on='Gene name')

        overlaps_dict[combi_name] = overlap_df['Gene name']        
        
    overlaps_df = pd.DataFrame(overlaps_dict)
    
    return overlaps_df

In [4]:
def hypergeometric_test(dfs_dict):
    
    """ Return nothing. Run a hypergeometric test to determine 
        significance of overlaps between dysregulated channels in
        each pair of models in dfs_dict. Print p-values for likelihood
        of channel overlap between each unique pair of models. """
    
    pairwise_overlaps_df = find_pairwise_overlaps(dfs_dict)

    for pair_name in pairwise_overlaps_df.columns:
        # Define total number of genes and channels overlapping between current two models
        overlapping_genes = pairwise_overlaps_df.loc[:, pair_name].dropna()
        overlapping_channels = pd.Series(list(set(overlapping_genes).intersection(set(IUPHAR_Channels_names))))
        total_channel_overlaps = len(overlapping_channels)

        # Define the total number of channels in the IUPHAR channel database
        IUPHAR_chan_num = len(IUPHAR_Channels_names)

        # Find the names of the two models under consideration
        overlap_model_names = pair_name.split('-')

        # Find the total number of channels dysregulated in each model's dataset
        model_1_genes = dfs_dict[overlap_model_names[0]].loc[:, 'Gene name']
        model_1_channels = pd.Series(list(set(model_1_genes).intersection(set(IUPHAR_Channels_names))))
        total_model_1_channels = len(model_1_channels)

        model_2_genes = dfs_dict[overlap_model_names[1]].loc[:, 'Gene name']
        model_2_channels = pd.Series(list(set(model_2_genes).intersection(set(IUPHAR_Channels_names))))
        total_model_2_channels = len(model_2_channels)

        pairwise_overlap_p_value = 1-stats.hypergeom.cdf(total_channel_overlaps,
                                                         IUPHAR_chan_num,
                                                         total_model_1_channels,
                                                         total_model_2_channels)

        print(f'{overlap_model_names[0]} and {overlap_model_names[1]} p-value={pairwise_overlap_p_value}')

In [5]:
def get_n_channels_dict(dfs_dict):
    
    """ Return a dict holding the number of 
        channels dysregulated in each model """
    
    n_channels_dict = {}
    
    for model_name, model_df in dfs_dict.items():
        
        gene_names = model_df.loc[:, 'Gene name']
        channel_names = pd.Series(list(set(gene_names).intersection(set(IUPHAR_Channels_names))))
        n_channels = len(channel_names)
        
        n_channels_dict[model_name] = n_channels
        
    return n_channels_dict

def set_channels_df(dfs_dict, filename):
    # Save and get a csv with each model as column and its respective
    # list of dysregulated channels along rows
    
    model_channels_df_dict = {}
    for model_name, model_df in dfs_dict.items():
        
        gene_names = model_df.loc[:, 'Gene name']
        channel_names = pd.Series(list(set(gene_names).intersection(set(IUPHAR_Channels_names))))
        model_channels_df_dict[model_name] = channel_names
    
    model_channels_df = pd.DataFrame(model_channels_df_dict)
    if filename:
        model_channels_df.to_csv(filename)
    else:
        pass
    
    return model_channels_df, model_channels_df_dict

In [6]:
def simulate_channel_dfs(dfs_dict, n_channels_dict):
    
    """ Return a dict of dataframes simulated according to the data
        in dfs_dict. For each model, choose a random set of n channels
        from the IUHPAR database where n = number of channels dysregulated
        in that model. n is defined in n_channels_dict """
    
    # for each model, choose a random set of of channels from IUPHAR database
    # of size however meany channels are in that models original data    
    sim_dfs_dict = {}

    for model_name, model_df in dfs_dict.items():
        # get the number of channels dysregulated in this model
        n_channels = n_channels_dict[model_name]
        # Sample the IUPHAR channels and make a simulated dataframe
        # for this model's dysregulated Genes
        sim_channel_names = IUPHAR_Channels_names.sample(n_channels)
        sim_model_df = pd.DataFrame({'Gene name': sim_channel_names})

        # Add this simulated model df to the sim_dfs_dict
        sim_dfs_dict[model_name] = sim_model_df
        
    return sim_dfs_dict

In [7]:
def simulate_triplet_overlaps(dfs_dict, n_runs, observed_overlap_n, n_channels_dict):
    
    """ Return a tuple: (triplet_overlaps, n_succeses) where triplet_overlaps is a
        list of total overlaps found for each simulation of length n_runs and
        n_successes is the number of runs with >= observed_overlap_n simulated
        overlaps. """
    
    triplet_overlaps = []
    n_successes = 0
    
    for i in range(0, n_runs):
        
        # Simulate a dictionary of dataframes, one dataframe
        # for each model
        sim_channel_dfs_dict = simulate_channel_dfs(dfs_dict, n_channels_dict)
        sim_triplet_overlaps_df = find_triplet_overlaps(sim_channel_dfs_dict)
        triplet_overlaps_list = [sim_triplet_overlaps_df.loc[:, column].dropna() for column in sim_triplet_overlaps_df.columns]
        triplet_overlaps_names = pd.concat(triplet_overlaps_list).unique() # returns a series object
        
        triplet_overlaps_num = len(triplet_overlaps_names)
        print(f'Run number {i+1} of {n_runs}. Found {triplet_overlaps_num} triple overlapping channels',
              end='\r')
        
        triplet_overlaps.append(triplet_overlaps_num)
        
        if triplet_overlaps_num >= observed_overlap_n:
            n_successes += 1
        else:
            pass
        
    return triplet_overlaps, n_successes

def simulate_quad_overlaps(dfs_dict, n_runs, observed_overlap_n, n_channels_dict):
    
    """ Return a tuple: (quad_overlaps, n_succeses) where quad_overlaps is a
        list of total overlaps found for each simulation of length n_runs and
        n_successes is the number of runs with >= observed_overlap_n simulated
        overlaps. """
    
    quad_overlaps = []
    n_successes = 0
    
    for i in range(0, n_runs):
        
        # Simulate a dictionary of dataframes, one dataframe
        # for each model
        sim_channel_dfs_dict = simulate_channel_dfs(dfs_dict, n_channels_dict)
        sim_quad_overlaps_df = find_quad_overlaps(sim_channel_dfs_dict)
        quad_overlaps_list = [sim_quad_overlaps_df.loc[:, column].dropna() for column in sim_quad_overlaps_df.columns]
        quad_overlaps_names = pd.concat(quad_overlaps_list).unique() # returns a series object
        
        quad_overlaps_num = len(quad_overlaps_names)
        print(f'Run number {i+1} of {n_runs}. Found {quad_overlaps_num} quad overlapping channels',
              end='\r')
        
        quad_overlaps.append(quad_overlaps_num)
        
        if quad_overlaps_num >= observed_overlap_n:
            n_successes += 1
        else:
            pass
        
    return quad_overlaps, n_successes

In [8]:
def drop_non_channels(overlaps_df, filename):    
    
    """ Return the overlap dataframe with all channels dropped
        and index reset. Save the df as a csv with the filename
        passed this function. """
    
    df = overlaps_df
    
    channels_df_dict = {}
    for column in df.columns:
        # For each set of overlaps, drop all the gene names that are not
        # channels. They are replaced by NaNs.
        channels_bool = df.loc[:, column].isin(IUPHAR_Channels_names)
        channels_df_dict[column] = df.loc[channels_bool, column]

    channels_df = pd.DataFrame(channels_df_dict)
    
    clean_channels_df = channels_df.reset_index(drop=True).copy()    
    for column in channels_df.columns:
        # Set all of the rows in this column to NaN so they can be replaced
        # by lists of channel names in each overlap.
        clean_channels_df.loc[:, column] = np.NaN
        channel_names = list(channels_df.loc[:, column].dropna())
        # Put the list of channels in the overlap's row. Save the df
        clean_channels_df.loc[0:len(channel_names)-1, column] = channel_names
        clean_channels_df.to_csv(filename)
        
    return clean_channels_df

### 1. Read in in data, set number of runs for overlap simulation, and set dfs and model_names lists

In [9]:
##Import gene expression data from ataxia mouse models to pandas dataframes
#Gene expression data sources are outlined the methods section. In all cases, raw data tables were modified so that .csv files
#below have 3 columns: 
#column 1 - dysregulated gene names
#column 2 - log2 transformation of the fold expression for gene in mouse model of interest relative to appropriate WT
#column 3 - fold expression for gene in mouse model of interest relative to appropriate WT

ATXN1_82Q_5_Wk = pd.read_csv('ATXN1_82Q_5_Week.csv')
ATXN1_154Q_5_12_Wk = pd.read_csv('ATXN1_154Q_5_12_Week.csv')
ATXN2_127Q_6_Wk = pd.read_csv('ATXN2_127Q_6_Week.csv')
ATXN2_72Q_8_Wk = pd.read_csv('ATXN2_72Q_8_Week.csv')

#Import a list of all ion channel genes found in mice
IUPHAR_Channels = pd.read_csv('IUPHAR_Channels.csv', encoding = "ANSI", engine='python')
IUPHAR_Channels_names = IUPHAR_Channels['Gene name']

_n_runs = 1000

# Set up lists for making dictionary below
dfs = [ATXN1_82Q_5_Wk,
       ATXN1_154Q_5_12_Wk,
       ATXN2_127Q_6_Wk,
       ATXN2_72Q_8_Wk]

model_names = ['ATXN1_82Q_5_Wk',
               'ATXN1_154Q_5_12_Wk',
               'ATXN2_127Q_6_Wk',
               'ATXN2_72Q_8_Wk']

### 2. Clean the dataframes read in above and save them as .csvs. Put them into a dictionary called dfs_dict

In [10]:
# Clean the dfs
clean_dfs = []
for df in dfs:
    clean_dfs.append(clean_dataframe(df))
    
# Save the cleaned dfs
i = 0
for df in clean_dfs:
    name = model_names[i]
    df.to_csv(f'{name}_cleaned.csv')
    i += 1
    
dfs_dict = dict(zip(model_names, clean_dfs))

No duplicated gene names in dataset for df with column 1: Log2(Fold expression) (ATXN1[82Q] vs. WT)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


### 3. Create a dataframe and dictionary to store dysregulated channels in each model and save the dataframe as 

In [11]:
dysreg_channels_df, channels_dict = set_channels_df(dfs_dict, 'channels_dysreg_in_each_model.csv')
# this saves a .csv with each column a different model each row its dysregulated channels

### 4. Create a 'contents' data structure from the dictionary above and plot overlap distributions

Plot singlet overlaps and up. Note with the data in this form (constructed with from_contents()), the function is not plotting overlaps with 0 genes in overlap. 

In [12]:
# Rename keys for aesthetic purposes
keys = ['ATXN1[82Q]',
        'Atxn1(154Q)',
        'ATXN2[Q127]',
        'ATXN2-BAC-Q72']

channels_dict = dict(zip(keys, channels_dict.values()))

contents = from_contents(channels_dict)

plot_dict = plot(contents)

plot_dict['intersections'].set_ylim(0, 50)
plot_dict['intersections'].set_ylabel('Genes in Overlap', fontsize=12)

plt.savefig('python_upset_with_singlet_overlaps.svg', dpi=300)

(0, 50)

Text(0,0.5,'Genes in Overlap')

Plot pairwise overlaps and up

In [13]:
plot_dict = plot(contents)

#plot_dict['totals'].set_xlim(50, 0)

plot_dict['intersections'].set_ylim(0, 10)
plot_dict['intersections'].set_ylabel('Genes in Overlap', fontsize=12)
plot_dict['intersections'].set_xlim(3.5, 12.875)

plt.savefig('python_upset_pairwise_and_up.svg', dpi=300)

(0, 10)

Text(0,0.5,'Genes in Overlap')

(3.5, 12.875)

### 5. Find pairwise overlaps and analyze overlap significance with hypergeometric test

In [14]:
pairwise_overlaps_df = find_pairwise_overlaps(dfs_dict)
pairwise_overlaps_df.to_csv('pairwise_all_genes_overlaps.csv')
pairwise_channels_overlaps = drop_non_channels(pairwise_overlaps_df,
                                              'pairwise_channels_overlaps.csv')
pairwise_overlaps_list = [pairwise_overlaps_df.loc[:, column].dropna() for column in pairwise_overlaps_df.columns]
pairwise_overlaps_names = pd.concat(pairwise_overlaps_list).unique()

pairwise_overlaps_channels = pd.Series(list(set(pairwise_overlaps_names).intersection(set(IUPHAR_Channels_names))))

In [15]:
hypergeometric_test(dfs_dict)

ATXN1_82Q_5_Wk and ATXN1_154Q_5_12_Wk p-value=0.028727814921322814
ATXN1_82Q_5_Wk and ATXN2_127Q_6_Wk p-value=0.0005124345063888258
ATXN1_82Q_5_Wk and ATXN2_72Q_8_Wk p-value=0.014406874471300068
ATXN1_154Q_5_12_Wk and ATXN2_127Q_6_Wk p-value=0.0002980442903435243
ATXN1_154Q_5_12_Wk and ATXN2_72Q_8_Wk p-value=0.0005893463610623373
ATXN2_127Q_6_Wk and ATXN2_72Q_8_Wk p-value=0.0027472618229910095


### 6. Find all channels that are dysregulated in 3 or more models and simulate random channel selections to see if similar overlaps to those observed occur by chance

In [16]:
# Find the set of all genes that are shared in 3 or more models
triplet_overlaps_df = find_triplet_overlaps(dfs_dict)
triplet_overlaps_df.to_csv('triplet_all_genes_overlaps_df.csv')
triplet_channels_overlaps = drop_non_channels(triplet_overlaps_df,
                                              'triplet_channels_overlaps.csv')
triplet_overlaps_list = [triplet_overlaps_df.loc[:, column].dropna() for column in triplet_overlaps_df.columns]
triplet_overlaps_names = pd.concat(triplet_overlaps_list).unique() # returns a series object

# Find the set of all channel genes that are shared in 3 or more models
triplet_overlaps_channels = pd.Series(list(set(triplet_overlaps_names).intersection(set(IUPHAR_Channels_names))))
triplet_overlaps_channels_num = len(triplet_overlaps_channels)
print('Total number of channels dysregulated in any 3 models =', triplet_overlaps_channels_num)

Total number of channels dysregulated in any 3 models = 12


In [17]:
n_channels_dict = get_n_channels_dict(dfs_dict)
overlaps, count = simulate_triplet_overlaps(dfs_dict,
                                            n_runs=_n_runs,
                                            observed_overlap_n=triplet_overlaps_channels_num,
                                            n_channels_dict=n_channels_dict)

print('Count of number of runs out of', _n_runs, 'greater than/equal to', triplet_overlaps_channels_num, 'channel genes in overlap of any 3 models:', count)
Three_sim_pvalue = count/_n_runs
print('p-value for greater than/equal to 20 channel genes in all intersections of 3 models:', Three_sim_pvalue)

Count of number of runs out of 1000 greater than/equal to 12 channel genes in overlap of any 3 models: 0
p-value for greater than/equal to 20 channel genes in all intersections of 3 models: 0.0


### 7. Find all channels that are dysregulated in all 4 models and simulate random channels selections to see if similar overlap to that observed occurs by chance

In [18]:
# Find the set of all genes that are shared in all 4 models
quad_overlaps_df = find_quad_overlaps(dfs_dict)
quad_overlaps_df.to_csv('quad_all_genes_overlaps_df.csv')
quad_channels_overlaps = drop_non_channels(quad_overlaps_df,
                                           'quad_channels_overlaps.csv')
quad_overlaps_list = [quad_overlaps_df.loc[:, column].dropna() for column in quad_overlaps_df.columns]
quad_overlaps_names = pd.concat(quad_overlaps_list).unique() # returns a series object

# Find the set of all channel genes that are shared in 3 or more models
quad_overlaps_channels = pd.Series(list(set(quad_overlaps_names).intersection(set(IUPHAR_Channels_names))))
quad_overlaps_channels_num = len(quad_overlaps_channels)
print('Total number of channels dysregulated in all 4 models =', quad_overlaps_channels_num)

Total number of channels dysregulated in all 4 models = 1


In [19]:
n_channels_dict = get_n_channels_dict(dfs_dict)
overlaps, count = simulate_quad_overlaps(dfs_dict,
                                         n_runs=_n_runs,
                                         observed_overlap_n=quad_overlaps_channels_num,
                                         n_channels_dict=n_channels_dict)

print('Count of number of runs out of', _n_runs, 'greater than/equal to', quad_overlaps_channels_num, 'channel genes in overlap of all 4 models:', count)
Three_sim_pvalue = count/_n_runs
print('p-value for greater than/equal to 20 channel genes in all intersections of 3 models:', Three_sim_pvalue)

Count of number of runs out of 1000 greater than/equal to 1 channel genes in overlap of all 4 models: 45
p-value for greater than/equal to 20 channel genes in all intersections of 3 models: 0.045


### 8. Plot all unique intersections between models for final figure. 
This requires some restructuring of the data produced above

In [20]:
# Populate groups with every combination of models from size none to 4
# Each group is a list of model names in that combination.
groups = [[]]
for k_size in range(1, 5):
    
    for combination in get_combinations(model_names, k_size):
        groups.append(list(combination))

# Define dysreg_channels_df, channels_dict without saving
dysreg_channels_df, channels_dict = set_channels_df(dfs_dict, False)

In [21]:
# Create a dataframe where each column is one of every combination
# of model names size none to 4 referring to the list of channels
# dysregulated in the intersection of that combination
master_overlaps_df = pd.concat([dysreg_channels_df,
                                pairwise_channels_overlaps,
                                triplet_channels_overlaps,
                                quad_channels_overlaps], ignore_index=False, sort=False)
# Other sets that are used in contrasts below. We need a list of all channels in the
# IUPHAR channels database and a list of all the channels dysregulataed in any of the
# models to create the none set
all_channels_list = [master_overlaps_df.loc[:, column].dropna() for column in master_overlaps_df.columns]
all_channels_names = pd.concat(all_channels_list).unique()
IUPHAR_Channels_set = set(IUPHAR_Channels_names)
none_set = IUPHAR_Channels_set.difference(all_channels_names)

In [22]:
# Create a list of the number of channels dysregulated uniquely in the
# intersection of each combination of models from size none to 4.
n_unique_channels_list = [len(none_set)]
for i in range(1, len(groups)):    
    
    group = groups[i]    
    if len(group) == 1:
        key = group[0]
        channels = master_overlaps_df[key].dropna()     
        unique_names = set(channels).difference(set(pairwise_overlaps_names))
        n_unique_channels_list.append(len(unique_names))

    elif len(group) == 2:
        key = '-'.join(group)
        channels = master_overlaps_df[key].dropna()
        unique_names = set(channels).difference(set(triplet_overlaps_names))
        n_unique_channels_list.append(len(unique_names))
        
    elif len(group) == 3:
        key = '-'.join(group)
        channels = master_overlaps_df[key].dropna()
        unique_names = set(channels).difference(set(quad_overlaps_names))
        n_unique_channels_list.append(len(unique_names))
        
    elif len(group) == 4:
        key = '-'.join(group)
        channels = master_overlaps_df[key].dropna()
        unique_names = channels
        n_unique_channels_list.append(len(unique_names))        
    else:
        print("Too many groups in list")

In [23]:
n_unique_channels_list

[164, 15, 4, 44, 13, 0, 7, 1, 3, 2, 5, 3, 0, 5, 3, 1]

In [24]:
# Rename keys for aesthetic purposes
keys = ['ATXN1[82Q]',
        'Atxn1(154Q)',
        'ATXN2[Q127]',
        'ATXN2-BAC-Q72']
# Get every combination of models from size none to 4
# from keys defined above. Used to label intersections
# on x axis in plot below
plot_groups = [[]]
for k_size in range(1, 5):    
    for combination in get_combinations(keys, k_size):
        plot_groups.append(list(combination))

In [25]:
plot_list = from_memberships(plot_groups[:], data=n_unique_channels_list[:])
plot_dict = plot(plot_list)

x_lim = ()
y_lim = (0, 50)

plot_dict['intersections'].set_ylim(y_lim[0], y_lim[1])
plot_dict['intersections'].set_ylabel('IUPHAR Ion Channel Genes (Count)', fontsize=10)

plt.savefig(f'Upset_all_groups_ylim({y_lim[0]},{y_lim[1]}).svg', dpi=300)

(0, 50)

Text(0,0.5,'IUPHAR Ion Channel Genes (Count)')

In [26]:
plot_list = from_memberships(plot_groups[:], data=n_unique_channels_list[:])
plot_dict = plot(plot_list)

x_lim = ()
y_lim = (150, 200)

plot_dict['intersections'].set_ylim(y_lim[0], y_lim[1])
plot_dict['intersections'].set_ylabel('IUPHAR Ion Channel Genes (Count)', fontsize=10)

plt.savefig(f'Upset_all_groups_ylim({y_lim[0]},{y_lim[1]}).svg', dpi=300)

(150, 200)

Text(0,0.5,'IUPHAR Ion Channel Genes (Count)')