In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
%reload_ext autoreload
%autoreload 2
%matplotlib inline

Import dependencies

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import cobra
import operator

from scipy.cluster import hierarchy
from scipy.spatial.distance import pdist, squareform
from scipy.stats import zscore

from sklearn.metrics.pairwise import pairwise_distances
from sklearn.decomposition import PCA

from src.gem.yeast8model import Yeast8Model

Construct model object, optimise

In [None]:
glc_exch_rate = 16.89

wt = Yeast8Model("../data/gemfiles/ecYeastGEM_batch_8-6-0.xml")

Nutrient options

In [None]:
# Default: lots of glucose
wt.model.reactions.get_by_id("r_1714").bounds = (-glc_exch_rate, 0)
wt.model.reactions.get_by_id("r_1714_REV").bounds = (0, glc_exch_rate)

In [None]:
# Custom glucose & ammonium exchange
glc_exch_rate = 0
amm_exch_rate = 0

wt.model.reactions.get_by_id("r_1714").bounds = (-glc_exch_rate, 0)
wt.model.reactions.get_by_id("r_1714_REV").bounds = (0, glc_exch_rate)
wt.model.reactions.get_by_id("r_1654").bounds = (-amm_exch_rate, 0)
wt.model.reactions.get_by_id("r_1654_REV").bounds = (0, amm_exch_rate)

In [None]:
# Custom pyruvate & ammonium exchange
pyr_exch_rate = 0
amm_exch_rate = 0

wt.model.reactions.get_by_id("r_1714").bounds = (0, 0)
wt.model.reactions.get_by_id("r_2033").bounds = (-pyr_exch_rate, 0)
wt.model.reactions.get_by_id("r_2033_REV").bounds = (0, pyr_exch_rate)
wt.model.reactions.get_by_id("r_1654").bounds = (-amm_exch_rate, 0)
wt.model.reactions.get_by_id("r_1654_REV").bounds = (0, amm_exch_rate)

Optimise and ablate

In [None]:
wt.solution = wt.optimize()

# Ablate and store fluxes in each round
wt.ablation_result = wt.ablate()
ablation_fluxes = wt.ablation_fluxes

# Pairwise Euclidean distances

In [None]:
# Convert dictionary of pandas dataframes to numpy array for scipy input
enz_use_array = np.stack([df.to_numpy() for df in ablation_fluxes.values()])
print(enz_use_array.shape)

In [None]:
# Remove enzymes that have all-zeros across components
# because (a) they're not informative,
# (b) they cause problems in downstream functions
enz_use_array = enz_use_array[:, np.any(enz_use_array, axis=0)]
print(enz_use_array.shape)

In [None]:
# OPTIONAL
# Standardise vector -- compute z-scores
# Accounts for different dynamic ranges of fluxes for each enzyme
enz_use_array = zscore(enz_use_array, axis=1)

In [None]:
# Get list of components
list_components = list(ablation_fluxes.keys())

In [None]:
# Compute pairwise Euclidean distances
distances = pdist(enz_use_array, metric="euclidean")
distance_matrix = squareform(distances)
distance_triangle = np.tril(distance_matrix)
distance_triangle[np.triu_indices(distance_triangle.shape[0])] = np.nan

In [None]:
# Visualise
fig, ax = plt.subplots()
sns.heatmap(
    distance_triangle,
    xticklabels=list_components,
    yticklabels=list_components,
    cmap="cividis_r",
    cbar_kws = {
        "label": "Pairwise Euclidean distances of flux vectors"
    },
    ax=ax,
)

# Hierarchical clustering

In [None]:
# Prepare dataframe for input to seaborn.clustermap
enz_use_df = pd.DataFrame(enz_use_array, index=list_components)

In [None]:
sns.clustermap(
    enz_use_df,
    #z_score=1,
    cbar_kws={
        "label": "Fluxes"
    },
    col_cluster=False,
    dendrogram_ratio=0.5,
)

# PCA

In [None]:
pca = PCA()
Xt = pca.fit_transform(enz_use_array)

In [None]:
fig, ax = plt.subplots(figsize=(4.5,4.5))
pca1 = Xt[:,0]
pca2 = Xt[:,1]
ax.scatter(pca1, pca2)
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
for idx, label in enumerate(list_components):
    ax.annotate(label, (pca1[idx], pca2[idx]))

# Non-zero fluxes

In other words: I make a list of enzymes that have non-zero fluxes for each component.  Going through each pair of components, how many enzymes do each pair have in common?

This is similar to Jaccard distance, but not quite.

In [None]:
enz_use_nonzero = (enz_use_array == 0)

In [None]:
def and_wrapper(a, b):
    return np.sum(np.logical_and(a, b))

In [None]:
commons = pdist(enz_use_nonzero, and_wrapper)
commons_matrix = squareform(commons)
commons_triangle = np.tril(commons_matrix)
commons_triangle[np.triu_indices(commons_triangle.shape[0])] = np.nan

In [None]:
# Visualise
fig, ax = plt.subplots()
sns.heatmap(
    commons_triangle,
    annot=True,
    fmt=".0f",
    xticklabels=list_components,
    yticklabels=list_components,
    cmap="cividis",
    cbar_kws = {
        "label": "Number of enzymes in common with nonzero flux"
    },
    ax=ax,
)

# How top flux-carrying reactions change

In [None]:
# Convenience functions
def get_topn_list(series, ntop):
    """Get top N flux-carrying reactions from a Series."""
    return series.sort_values(ascending=False)[:ntop].index.to_list()

def rxns_to_hues(rxn_list, hue_lookup):
    """Convert reactions to hues"""
    hues = []
    for rxn_id in rxn_list:
        try:
            hue = hue_lookup[rxn_id]
            hues.append(hue)
        except KeyError:
            hues.append(np.nan)
    return hues

In [None]:
ntop = 200

In [None]:
# List of top N reactions, original (un-ablated)
original_topn_list = get_topn_list(ablation_fluxes['original'], ntop)

# Assign 'hues' and create lookup table
hue_lookup = dict((zip(original_topn_list, range(ntop))))

# Find hues for all components
hues_array = []
for series in ablation_fluxes.values():
    topn_list = get_topn_list(series, ntop)
    hues = rxns_to_hues(topn_list, hue_lookup)
    hues_array.append(hues)
    
hues_array = np.array(hues_array).T

In [None]:
# Visualise
fig, ax = plt.subplots(figsize=(5,8))
sns.heatmap(
    hues_array,
    xticklabels=list_components,
    cmap="magma_r",
    cbar=False,
)
ax.set_xlabel("Biomass component")
ax.set_ylabel("Rank")