In [1]:
%matplotlib ipympl

# Import Dependencies
from scipy.spatial.distance import pdist, squareform
from scipy.sparse.linalg import eigs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [2]:
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

In [3]:
# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

In [4]:
# Filtering Method


In [5]:
# Abundance -> Eigenvector pipeline
def abundance_to_eigenvector(filtered_abundance_table, filtered_meta_data):
    adjacency_matrix = squareform(pdist(filtered_abundance_table), metric="minkowski", p=1)
    kernel = np.exp(- (adjacency_matrix))

In [6]:
mosquito_metadata = metadata.loc[metadata["sample_type"] == "Mosquito"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).drop([105525, 105502], axis=0), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).drop([105525, 105502], axis=0).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Mosquito 3D")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.92, 'Generalized Eigenvector Mosquito 3D')

In [7]:


# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["sample_type"] == "Mosquito"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).drop([105525, 105502], axis=0), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).drop([105525, 105502], axis=0).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real


site_name = list(mosquito_metadata["transect_name"])
site_colors = ["Gardens","Falls","DrumRoad","AboveFalls"]

for i, site in enumerate(site_name):
    if "Gardens" in site:
        site_name[i] = 1
    if "STFalls" in site:
        site_name[i] = 2
    if "DrumRoad" in site:
        site_name[i] = 3
    if "AboveFalls" in site:
        site_name[i] = 4

        
# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
scatter = ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=site_name)
plt.legend(handles = scatter.legend_elements()[0], labels = site_colors)
plt.title("Generalized Eigenvector Mosquito 3D")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.92, 'Generalized Eigenvector Mosquito 3D')

In [8]:
# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["sample_type"] == "Mosquito"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).drop([105525, 105502], axis=0), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).drop([105525, 105502], axis=0).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real


site_name = list(mosquito_metadata["transect_name"])
site_colors = ["Gardens","Falls","DrumRoad","AboveFalls"]

for i, site in enumerate(site_name):
    if "Gardens" in site:
        site_name[i] = 1
    if "STFalls" in site:
        site_name[i] = 2
    if "DrumRoad" in site:
        site_name[i] = 3
    if "AboveFalls" in site:
        site_name[i] = 4

print(mosquito_metadata.columns)
        
def on_pick(event):
    print("bruh")

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
#for label, x, y, z in zip(mosquito_metadata["sample_id"], eigenvectors[1], eigenvectors[2], eigenvectors[3]):
#    ax.text(x, y, z, label, None)
points = np.array([(x,y,z) for (x,y,z) in list(zip(*eigenvectors[1:4])) if y < 0])
scatter = ax.scatter3D(np.split(points,3,axis=1), c=site_name)
plt.legend(handles = scatter.legend_elements()[0], labels = site_colors)
fig.canvas.callbacks.connect('pick_event', on_pick)
plt.title("Generalized Eigenvector Mosquito 3D")
plt.show()

Index(['run', 'index', 'sample_barcode', 'locus', 'primer_plate', 'primer_row',
       'primer_col', 'extraction_well', 'forward_primer', 'reverse_primer',
       'primer_name', 'sample_id', 'collection_label', 'project', 'site_order',
       'site_code', 'site_name', 'transect_name', 'collection_date',
       'sample_type', 'habitat', 'host', 'trophic', 'site_type', 'metadata',
       'lat', 'long', 'processing', 'notes', 'ExtraClassifier'],
      dtype='object')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

TypeError: scatter() missing 1 required positional argument: 'ys'

In [None]:
# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

# Taxonomy
taxonomy_path = f"{data_path}/annotations_97.taxonomy"

with open(taxonomy_path, 'r') as f:
    raw_metadata = [line.strip().split("\t") for line in f][1:]
taxonomies = {}
otu_names = []
for line in raw_metadata:
    taxonomies[line[0]] = line[2].split(";")[:-1]
    otu_names.append(line[0])

taxonomic_tree = skbio.TreeNode.from_taxonomy(
    [(x, taxonomies[x]) for x in taxonomies])
taxonomic_tree = taxonomic_tree.root_at(taxonomic_tree)
for node in taxonomic_tree.traverse():
    node.length = 1
    



mosquito_metadata = metadata.loc[metadata["host"] == "Animal"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0).drop(["Abundance", "Presence"], axis=1)

unifrac_dists = skbio.diversity.beta_diversity('weighted_unifrac', mosquito_abundance, otu_ids=otu_names, validate=False, tree=taxonomic_tree, normalized=True)
uw_unifrac_dists = skbio.diversity.beta_diversity('unweighted_unifrac', mosquito_abundance, otu_ids=otu_names, validate=False, tree=taxonomic_tree)

mosquito_adj = unifrac_dists.to_data_frame().to_numpy()

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Unifrac Animal 3D")

In [None]:
# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

# Taxonomy
taxonomy_path = f"{data_path}/annotations_97.taxonomy"

with open(taxonomy_path, 'r') as f:
    raw_metadata = [line.strip().split("\t") for line in f][1:]
taxonomies = {}
otu_names = []
for line in raw_metadata:
    taxonomies[line[0]] = line[2].split(";")[:-1]
    otu_names.append(line[0])

taxonomic_tree = skbio.TreeNode.from_taxonomy(
    [(x, taxonomies[x]) for x in taxonomies])
taxonomic_tree = taxonomic_tree.root_at(taxonomic_tree)
for node in taxonomic_tree.traverse():
    node.length = 1
    



mosquito_metadata = metadata.loc[metadata["host"] == "Animal"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0).drop(["Abundance", "Presence"], axis=1)

unifrac_dists = skbio.diversity.beta_diversity('weighted_unifrac', mosquito_abundance, otu_ids=otu_names, validate=False, tree=taxonomic_tree, normalized=True)
uw_unifrac_dists = skbio.diversity.beta_diversity('unweighted_unifrac', mosquito_abundance, otu_ids=otu_names, validate=False, tree=taxonomic_tree)

mosquito_adj = uw_unifrac_dists.to_data_frame().to_numpy()

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Unweighted Unifrac Animal 3D")

In [None]:


# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["host"] == "Animal"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Animal 3D")

In [None]:


# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["host"] == "Animal"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1), metric="minkowski", p=2))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector L2 Animal 3D")

In [None]:


# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["host"] == "Plant"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Plant 3D")

In [None]:


# Instantiation / Data Cleaning Zone
# File Paths
data_path = "./data"
figure_path = "./plots"
abundance_table_path = f"{data_path}/abundance_table_97.shared"
metadata_path = f"{data_path}/SuperTransect_mapping_file.csv"

# Abundance Table
with open(abundance_table_path, "r") as file_literal:
    raw_abundance_data = [line.strip().split("\t") for line in file_literal]
    otu_names = raw_abundance_data[0][3:]
    sample_names = list(map(int, [line[1] for line in raw_abundance_data[1:]]))
    otu_counts = [line[3:] for line in raw_abundance_data[1:]]
abundance_table = pd.DataFrame(
    np.array(otu_counts, dtype=np.int64),
    index=sample_names,
    columns=otu_names)
abundance_table["Abundance"] = abundance_table.sum(axis=1)
abundance_table["Presence"] = abundance_table.drop("Abundance", axis=1).where(
    abundance_table == 0, 1).sum(axis=1)

# Metadata
metadata = pd.read_csv(metadata_path, index_col=0)

mosquito_metadata = metadata.loc[metadata["host"] == "Nonhost"]
mosquito_abundance = abundance_table.filter(
    items=list(mosquito_metadata.index), axis=0)


mosquito_adj = squareform(pdist(mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1), metric="minkowski", p=1))

mosquito_metadata = mosquito_metadata.loc[mosquito_abundance.drop(
    ["Abundance", "Presence"], axis=1).index]

kernel = np.exp(- (mosquito_adj ** 2) / (3000**2))

diagonal = np.diag(np.sum(kernel, axis=1))

laplacian = diagonal - mosquito_adj

#normalized_laplacian = np.linalg.inv(diagonal) * laplacian

eigenvalues, eigenvectors = eigs(laplacian, k=4)
eigenvectors = eigenvectors.T.real

diagonal= mosquito_metadata["lat"] + mosquito_metadata["long"]
low_right=min(diagonal)
high_left=max(diagonal)
gps_delta=high_left - low_right
diagonal=(diagonal - low_right) / gps_delta
color_gradient=[(0, i, 0) for i in list(diagonal)]

print(mosquito_metadata.shape)

# Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
ax.scatter3D(eigenvectors[1], eigenvectors[2], eigenvectors[3], c=color_gradient)
plt.title("Generalized Eigenvector Nonhost 3D")

In [None]:
x = [1,2,3]
y = [10, 20, 30]
points = zip(x,y)
print(list(points))