# Amils et al. 2023

In [None]:
# Add higher directory to python modules path

import sys

sys.path.append("..")

In [None]:
import os

import plotly.express as px

import pandas as pd

from modules.data.amils2023.data_loader import Amils2023DataLoader

In [None]:
data_loader = Amils2023DataLoader()

### Dataset S2 - ICP-MS elemental analysis of core samples (ppm)

In [None]:
elements_df_long = data_loader.get_elements()
elements_df_long

In [None]:
# Get sorted elements by their maximum concentration
elements_sorted = elements_df_long\
    .groupby("Species")\
    .max()\
    .sort_values("Concentration (ppm)", ascending=False)\
    .index\
    .to_list()

fig = px.scatter(
    data_frame=elements_df_long,
    x="Concentration (ppm)",
    y="Depth",
    color="Species",
    color_discrete_sequence=px.colors.qualitative.Pastel,
    category_orders={"Species": elements_sorted},
    title="Concentration of elements across the vertical column"
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show("png")

fig = px.scatter(
    data_frame=elements_df_long,
    x="Concentration (ppm)",
    log_x=True,
    y="Depth",
    color="Species",
    color_discrete_sequence=px.colors.qualitative.Pastel,
    category_orders={"Species": elements_sorted},
    title="Concentration of elements across the vertical column"
)
fig.update_layout(xaxis_title="Concentration log(ppm)")
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show("png")

fig = px.violin(
    data_frame=elements_df_long,
    y="Concentration (ppm)",
    log_y=True,
    color="Species",
    color_discrete_sequence=px.colors.qualitative.Pastel,
    category_orders={"Species": elements_sorted},
    title="Distribution of concentrations per element"
)
fig.update_layout(
    xaxis_title="Species",
    yaxis_title="Concentration log(ppm)"
)
fig.show("png")

### Dataset S3 - Ionic chromatography of BH10 soluble organic and inorganic anions (ppm)

In [None]:
anions_df_long = data_loader.get_anions()
anions_df_long

### Table S1 - Soluble cations (ppm)

In [None]:
cations_df_long = data_loader.get_cations()
cations_df_long

### Table S7 - Occluded gases and natural activities at different depths (10/8/22)

In [None]:
gases_df_long = data_loader.get_gases()
gases_df_long

### Final medium

In [None]:
medium_df = data_loader.get_data()
medium_df

### Microbial data

In [None]:
# ---------------------------------------------------------------------------- #
# Table S8-2 - Number of microbial species detected at different depths which
# have the potential of carry out key metabolic pathways of the C, H, N, S and
# Fe cycles. 

microbes_df = pd.read_excel(
    os.path.join(
        data_loader.data_dir,
        "emi16291-sup-0001-supinfo-tables8-2.ods"
    ),
    sheet_name="Sheet1"
)

# Rename pathway column
microbes_df = microbes_df.rename(columns={"Pathway/depth": "Pathway"})

# Drop last row containing the explanation
microbes_df = microbes_df.iloc[:-1, :].copy()

# Drop rows containing the cycles
microbes_df = microbes_df[
    ~microbes_df["Pathway"].str.endswith(" cycle")
]

# Convert to numeric
numeric_cols = [
    col for col in microbes_df.columns
    if col not in ["Pathway"]
]
microbes_df[numeric_cols] = microbes_df[numeric_cols].apply(
    pd.to_numeric,
    errors="coerce"
)

fig = px.imshow(
    img=microbes_df[numeric_cols].T.to_numpy(),
    x=microbes_df["Pathway"],
    y=numeric_cols,
    labels=dict(
        x="Pathway",
        y="Depth",
        color="Count"
    ),
    aspect="equal",
    width=600,
    height=600,
    title="Distribution of microbial functions across the vertical column"
)
fig.show("png")

complete_depths = microbes_df[microbes_df["Pathway"] == "nº compl cyc"]
complete_depths = complete_depths[complete_depths == 5]\
    .dropna(axis=1)\
    .columns

print(
    "[INFO] Depths with all microbial functions analyzed:",
    complete_depths
)

fig = px.imshow(
    img=microbes_df[complete_depths].T.to_numpy(),
    x=microbes_df["Pathway"],
    y=complete_depths,
    labels=dict(
        x="Pathway",
        y="Depth",
        color="Count"
    ),
    aspect="equal",
    width=600,
    height=600,
    title="Distribution of microbial functions (only complete functions)"
)
fig.add_shape(
    type="rect",
    x0=0.0,
    x1=1.0,
    y0=481,
    y1=493,
    xref="paper",
    yref="y",
    line_color="red"
)
fig.add_shape(
    type="rect",
    x0=0.0,
    x1=1.0,
    y0=587,
    y1=625,
    xref="paper",
    yref="y",
    line_color="red"
)
fig.show("png")

### Get complete depths

In [None]:
# ---------------------------------------------------------------------------- #
# Taxonomy abundances

taxonomy_abundances = pd.read_csv(
    os.path.join(
        "../data/micom/rio_tinto/amils_2023/",
        "taxonomy.csv"
    ),
    sep=","
)

# Drop species without abundance data
taxonomy_abundances = taxonomy_abundances.dropna(subset="id", axis=0)

# Create depth column
taxonomy_abundances["Depth"] = taxonomy_abundances["sample_id"]\
    .str.split("-").str[1]\
    .astype(int)


# ---------------------------------------------------------------------------- #
# Get medium for the depths specified in the abundance data

medium_df = medium_df[
    medium_df["Depth"].isin(taxonomy_abundances["Depth"].unique())
]

# Get most complete medium according to its species
depth_counts = medium_df\
    .groupby("Depth")\
    .count()\
    ["Species"]\
    .sort_values(ascending=False)

complete_depths = depth_counts[
    depth_counts == medium_df["Species"].nunique()
].index

# Get missing species at depth 468
medium_df_wide = pd.pivot(
    data=medium_df,
    index="Depth",
    columns="Species",
    values="Concentration (ppm)"
)

missing_species = medium_df_wide\
    .loc[depth_counts[depth_counts > 35].index]\
    .isnull()\
    .sum(axis=0)\
    .sort_values(ascending=False)

# Filter by complete depths
taxonomy_abundances = taxonomy_abundances[
    taxonomy_abundances["Depth"].isin(complete_depths)
].reset_index(drop=True)

medium_df = medium_df[
    medium_df["Depth"].isin(complete_depths)
].reset_index(drop=True)


medium_df.to_csv(
    "../data/medium.csv",
    index=False,
    header=True,
    sep=";"
)

fig = px.scatter(
    data_frame=medium_df,
    x="Concentration (ppm)",
    log_x=True,
    y="Depth",
    color="Species",
    color_discrete_sequence=px.colors.qualitative.Pastel,
    category_orders={"Species": elements_sorted},
    title="Concentration of elements across the vertical column"
)
fig.update_layout(xaxis_title="Concentration log(ppm)")
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()
