In [1]:
%matplotlib ipympl

from scipy.spatial.distance import pdist, squareform
from scipy.sparse.linalg import eigs, eigsh
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.lines import Line2D

In [2]:
# Run analysis on all samples

In [3]:
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

In [4]:
# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

In [5]:
# Analysis function
def abundance_to_eigenvector(filtered_abundance_table, debug=False, pandas_mode=False):
    adjacency_matrix = squareform(pdist(filtered_abundance_table, metric="minkowski", p=1))
    kernel = np.exp(- (adjacency_matrix ** 2) / (3000**2))
    diagonal = np.diag(np.sum(kernel,axis=1))
    laplacian = diagonal - kernel
    eigenvalues, eigenvectors = eigs(laplacian, k=len(laplacian) - 1, M=diagonal)
    sample_eigens = zip(eigenvalues.real, eigenvectors.T, filtered_abundance_table.index)
    eigenvalues, eigenvectors, sample_ids = zip(*sorted(sample_eigens, key = lambda tup:tup[0]))

    if debug:
        print("Adjacency Matrix:\n", adjacency_matrix, "\n")
        print("Kernel:\n", kernel, "\n")
        print("Diagonal:\n", diagonal, "\n")
        print("Laplacian:\n", laplacian, "\n")
        print("Eigenvalues:\n", eigenvalues, "\n")
        print("Eigenvectors:\n", eigenvectors, "\n")
        print("Sample ID's:\n", sample_ids, "\n")
    
    if pandas_mode:
        return pd.DataFrame(eigenvectors, columns = filtered_abundance_table.index), filtered_abundance_table.index
    
    return eigenvectors, filtered_abundance_table.index

In [10]:
# Plotting function
def eigenvector_to_plot(eigenvectors, metadata, title, text = None, color_descriptor = "gps"):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_zlabel('Z axis')
    if text == None and color_descriptor == None:
        ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3])
    if text != None:
        for label, x, y, z in zip(text, eigenvectors[1], eigenvectors[2], eigenvectors[3]):
            ax.text(x, y, z, label, None)
    if color_descriptor == "gps":
        location_number = metadata["lat"] + metadata["long"]
        color_number = (location_number - min(location_number)) / (max(location_number) - min(location_number))
        colors = [(0, color, 0) for color in color_number]
        for color, x, y, z in zip(colors, eigenvectors[1], eigenvectors[2], eigenvectors[3]):
            ax.scatter(x, y, z, color=color)
        custom_lines = [Line2D([0], [0], color=(0, 1, 0), lw=4),
                        Line2D([0], [0], color=(0, 0, 0), lw=4)]
        ax.legend(custom_lines, ['Upper Left', 'Lower Right'], loc ="center left", bbox_to_anchor=(-.1, 0))
    elif color_descriptor != None:
        for color, x, y, z in zip(color_descriptor, eigenvectors[1], eigenvectors[2], eigenvectors[3]):
            ax.scatter(x, y, z, c=color)
    plt.title(title)
    plt.show()
    plt.savefig(f"{figure_path}/{title}.png")

In [7]:
# Filtering function
def filtered_data(key, filterer, dropper = None):
    filtered_metadata = metadata.loc[metadata[key] == filterer]
    if dropper == None:
        filtered_abundance = abundance_table.filter(
            items=list(filtered_metadata.index), axis=0).drop(["Abundance", "Presence"],axis=1)
    if dropper != None:
        filtered_abundance = abundance_table.filter(
            items=list(filtered_metadata.index), axis=0).drop(["Abundance", "Presence"],axis=1).drop(dropper, axis=0)
    filtered_metadata = filtered_metadata.loc[filtered_abundance.index]
    return filtered_abundance, filtered_metadata

In [8]:
# Filtering Based on Drosophila
filtered_abundance, filtered_metadata  = filtered_data("sample_type", "Drosophila")
eigenvectors, filtered_abundance_index = abundance_to_eigenvector(filtered_abundance, pandas_mode = True)
eigenvector_to_plot(eigenvectors.to_numpy(), filtered_metadata, "Drosophila Generalized Eigenvectors")



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
# Filtering Based on Coral
filtered_abundance, filtered_metadata  = filtered_data("sample_type", "Coral")
eigenvectors, filtered_abundance_index = abundance_to_eigenvector(filtered_abundance, pandas_mode = True)
eigenvector_to_plot(eigenvectors.to_numpy(), filtered_metadata, "Coral Generalized Eigenvectors")



TypeError: eigenvector_to_plot() missing 1 required positional argument: 'title'

In [None]:
# Filtering Based on Mosquito
filtered_abundance, filtered_metadata  = filtered_data("sample_type", "Mosquito")
eigenvectors, filtered_abundance_index = abundance_to_eigenvector(filtered_abundance, pandas_mode = True)
eigenvector_to_plot(eigenvectors.to_numpy(), filtered_metadata, "Mosquito Generalized Eigenvectors")

In [None]:
# Filtering Based on Mosquito
filtered_abundance, filtered_metadata  = filtered_data("sample_type", "Mosquito", dropper = [105279, 105525, 105502])
eigenvectors, filtered_abundance_index = abundance_to_eigenvector(filtered_abundance, pandas_mode = True)
eigenvector_to_plot(eigenvectors.to_numpy(), filtered_metadata, "Mosquito Generalized Eigenvectors Dropped 3")