In [25]:
%matplotlib ipympl

# Import Dependencies
from scipy.spatial.distance import pdist, squareform
from scipy.sparse.linalg import eigs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import skbio

In [15]:


# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["sample_type"] == "Mosquito"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).drop([105525, 105502], axis=0), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).drop([105525, 105502], axis=0).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Mosquito 3D")

(32, 30)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.92, 'Generalized Eigenvector Mosquito 3D')

In [20]:
# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

# Taxonomy
taxonomy_path = f"{self.data_path}/annotations_97.taxonomy"

with open(taxonomy_path, 'r') as f:
    raw_metadata = [line.strip().split("\t") for line in f][1:]
taxonomies = {}
otu_names = []
for line in raw_metadata:
    taxonomies[line[0]] = line[2].split(";")[:-1]
    otu_names.append(line[0])

taxonomic_tree = skbio.TreeNode.from_taxonomy(
    [(x, taxonomies[x]) for x in taxonomies])
taxonomic_tree = taxonomic_tree.root_at(taxonomic_tree)
for node in taxonomic_tree.traverse():
    node.length = 1
    

def get_unifrac_distances(self):
    unifrac_dists = skbio.diversity.beta_diversity(
        'weighted_unifrac', self.otu_data, otu_ids=self.otu_names, validate=False, tree=self.taxonomic_tree, normalized=True)
    return unifrac_dists

def get_unweighted_unifrac_distances(self):
    unifrac_dists = skbio.diversity.beta_diversity(
        'unweighted_unifrac', self.otu_data, otu_ids=self.otu_names, validate=False, tree=self.taxonomic_tree)


mosquito_metadata = metadata.loc[metadata["host"] == "Animal"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Animal 3D")

(109, 30)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.92, 'Generalized Eigenvector Animal 3D')

In [21]:


# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["host"] == "Plant"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Plant 3D")

(110, 30)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.92, 'Generalized Eigenvector Plant 3D')

In [23]:


# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["host"] == "Nonhost"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Nonhost 3D")

(240, 30)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.92, 'Generalized Eigenvector Nonhost 3D')