In [1]:
%matplotlib ipympl

from scipy.spatial.distance import pdist, squareform
from scipy.sparse.linalg import eigs, eigsh
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [2]:
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

In [39]:
# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

In [83]:
# Analysis function
def abundance_to_eigenvector(filtered_abundance_table, debug=False, pandas_mode=False):
    adjacency_matrix = squareform(pdist(filtered_abundance_table, metric="minkowski", p=1))
    kernel = np.exp(- (adjacency_matrix ** 2) / (3000**2))
    diagonal = np.diag(np.sum(kernel,axis=1))
    laplacian = diagonal - kernel
    eigenvalues, eigenvectors = eigs(laplacian, k=len(laplacian) - 1, M=diagonal)
    sample_eigens = zip(eigenvalues.real, eigenvectors.T, filtered_abundance_table.index)
    eigenvalues, eigenvectors, sample_ids = zip(*sorted(sample_eigens, key = lambda tup:tup[0]))

    if debug:
        print("Adjacency Matrix:\n", adjacency_matrix, "\n")
        print("Kernel:\n", kernel, "\n")
        print("Diagonal:\n", diagonal, "\n")
        print("Laplacian:\n", laplacian, "\n")
        print("Eigenvalues:\n", eigenvalues, "\n")
        print("Eigenvectors:\n", eigenvectors, "\n")
        print("Sample ID's:\n", sample_ids, "\n")
    
    if pandas_mode:
        return pd.DataFrame(eigenvectors, columns = filtered_abundance_table.index), filtered_abundance_table.index
    
    return eigenvectors, filtered_abundance_table.index

In [163]:
# Plotting function
def eigenvector_to_plot(eigenvectors, title, text = None, color_descriptor = None):
    # color_descriptor = (column, color)
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_zlabel('Z axis')
    #ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3])
    if text != None:
        for label, x, y, z in zip(text, eigenvectors[1], eigenvectors[2], eigenvectors[3]):
            ax.text(x, y, z, label, None)
    if color_descriptor != None:
        for color, x, y, z in zip(color_descriptor, eigenvectors[1], eigenvectors[2], eigenvectors[3]):
            ax.scatter(x, y, z, c=color)
    plt.title(title)
    plt.show()
    plt.savefig(f"{figure_path}/{title}.png")

In [85]:
# Filtering 
def filtered_data(key, filterer, dropper = None):
    filtered_metadata = metadata.loc[metadata[key] == filterer]
    if dropper == None:
        filtered_abundance = abundance_table.filter(
            items=list(filtered_metadata.index), axis=0).drop(["Abundance", "Presence"],axis=1)
    if dropper != None:
        filtered_abundance = abundance_table.filter(
            items=list(filtered_metadata.index), axis=0).drop(["Abundance", "Presence"],axis=1).drop(dropper, axis=0)
    filtered_metadata = filtered_metadata.loc[filtered_abundance.index]
    return filtered_abundance, filtered_metadata

In [168]:
# Animal Filter
filtered_abundance, filtered_metadata  = filtered_data("host", "Animal")
eigenvectors, filtered_abundance_index = abundance_to_eigenvector(filtered_abundance, pandas_mode = True)
eigenvector_to_plot(eigenvectors.to_numpy(), "Animal Generalized Eigenvectors", color_descriptor = colors)

['g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'r', 'b', 'b', 'b', 'r', 'r', 'r', 'b', 'b', 'b', 'r', 'b', 'b', 'b', 'b', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r']


  fig = plt.figure()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [138]:
metadata.loc[eigenvectors.T[(eigenvectors.loc[1] >= 0.05) & (eigenvectors.loc[2] <= 0)].index].to_csv(f"{data_path}/animal_positive_negative_cluster.csv")

In [167]:
filtered_abundance, filtered_metadata  = filtered_data("host", "Animal")
colors = []
for i in filtered_metadata["sample_type"]:
    if i == "Coral":
        colors.append("g")
    if i == "Drosophila":
        colors.append("b")
    if i == "Mosquito":
        colors.append("r")
len(colors)

109

In [86]:
filtered_abundance, filtered_metadata  = filtered_data("sample_type", "Mosquito")
eigenvectors, filtered_abundance_index = abundance_to_eigenvector(filtered_abundance, pandas_mode = True)
eigenvector_to_plot(eigenvectors.to_numpy(), filtered_abundance_index, "Mosquito Generalized Eigenvectors Dropped None")



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [99]:
print((eigenvectors.loc[1][105279], eigenvectors.loc[2][105279], eigenvectors.loc[3][105279]))

(-0.0006835388889842191, 0.0014441560837985916, 0.9673373596860851)


In [10]:
# Filtering Method
filtered_metadata = metadata.loc[metadata["sample_type"] == "Mosquito"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0).drop(["Abundance", "Presence"],axis=1).drop([105279, 105525, 105502, 105312, 105546], axis=0)
mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.index]
#.drop([105279, 105525, 105502, 105312, 105546], axis=0)

In [17]:
# Filtering Method
mosquito_metadata = metadata.loc[metadata["sample_type"] == "Mosquito"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0).drop(["Abundance", "Presence"],axis=1).drop([105279, 105525, 105502], axis=0)
mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.index]

In [12]:
# Filtering Method
mosquito_metadata = metadata.loc[metadata["sample_type"] == "Mosquito"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0).drop(["Abundance", "Presence"],axis=1)
mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.index]

In [7]:
# Filtering Method
mosquito_metadata = metadata.loc[metadata["host"] == "Animal"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0).drop(["Abundance", "Presence"],axis=1)
mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.index]

In [16]:
# 
eigenvector_to_plot(*abundance_to_eigenvector(mosquito_abundance), "Mosquito Generalized Eigenvalue Dropped None")



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [21]:
mosquito_abundance.loc[105279]

KeyError: 105279

In [18]:
# 105279, 105525, 105502
eigenvector_to_plot(*abundance_to_eigenvector(mosquito_abundance), "Mosquito Generalized Eigenvalue Dropped 3")



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
eigenvector_to_plot(*abundance_to_eigenvector(mosquito_abundance), "Mosquito Generalized Eigenvalue Dropped 5")



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [8]:
eigenvector_to_plot(*abundance_to_eigenvector(mosquito_abundance), "Animal Generalized Eigenvalue")



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [41]:
filtered_metadata = metadata.loc[metadata["sample_type"] == "Mosquito"]
filtered_abundance = abundance_table.filter(
    items=list(filtered_metadata.index), axis=0)

In [45]:
filtered_abundance.sort_values(by=["Presence"],ascending=False)

Unnamed: 0,Otu00003,Otu00004,Otu00005,Otu00006,Otu00007,Otu00008,Otu00009,Otu00010,Otu00011,Otu00012,...,Otu25365,Otu25380,Otu25387,Otu25399,Otu25438,Otu25456,Otu25470,Otu25476,Abundance,Presence
105502,540,0,112,0,126,51,0,0,1,13,...,0,0,0,0,0,0,0,0,3692,328
105525,0,0,63,90,0,177,0,0,0,0,...,0,0,3,0,0,0,0,0,3693,292
105153,1993,0,0,43,1499,0,20,0,0,0,...,0,0,0,0,0,0,0,0,3700,51
105123,2464,11,0,0,914,0,21,0,30,6,...,0,0,0,0,0,0,0,0,3695,49
105128,2085,0,4,0,751,2,721,0,0,0,...,0,0,0,0,0,0,0,0,3700,47
105503,2473,0,11,3,1125,0,0,0,0,0,...,0,0,0,0,0,0,0,0,3699,43
105312,1112,0,0,480,460,0,112,0,0,0,...,0,0,0,0,0,0,0,0,3699,39
105234,2892,0,0,0,6,0,0,0,0,0,...,0,0,0,0,0,0,0,0,3698,26
105279,0,0,0,0,2555,0,0,397,0,0,...,0,0,0,0,0,0,0,0,3696,22
105387,3461,0,0,107,0,0,53,0,0,0,...,0,0,0,0,0,0,0,0,3699,20


In [44]:
mosquito_abundance.loc[105525]

KeyError: 105525