In [None]:
import itertools
import warnings

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
# Plot styling.
plt.style.use(["seaborn-v0_8-white", "seaborn-v0_8-paper"])
colors = np.asarray(
    ["#9e0059", "#6da7de", "#ee266d", "#dee000", "#eb861e", "#63c5b5"]
)
sns.set_palette(colors)
sns.set_context("paper")

In [None]:
gene_names = set()
with open(
    "../data/external/crocosphaera_watsonii_wh_8501_200811.fasta"
) as f_in:
    for line in f_in:
        if line.startswith(">"):
            gene_names.add(line.split()[2])

gene_to_uniprot = pd.read_csv(
    "../data/external/crocosphaera_watsonii_wh_8501_uniprot_20210912.tab",
    sep="\t",
    usecols=["Entry", "Gene names"],
)
gene_to_uniprot["Gene names"] = gene_to_uniprot["Gene names"].str.split()
gene_to_uniprot = (
    gene_to_uniprot.explode("Gene names")
    .set_index("Gene names")
    .squeeze()
    .to_dict()
)
population = pd.Series(gene_to_uniprot.values()).drop_duplicates()
population.to_csv(
    "../data/interim/crocosphaera_watsonii_wh_8501_population.txt",
    index=False,
    header=False,
)

uniprot_to_gene = dict((v, k) for k, v in gene_to_uniprot.items())

In [None]:
data_columns = [f"151222_WH8501diel_T{t}_2ug" for t in range(1, 17)]
data = pd.read_csv(
    "../data/processed/160214_Crocodiel_Full_rawdata_noheader_fig_may7annotation.csv",
    usecols=["Identified Proteins (1170)", "Molecular Weight", *data_columns],
    thousands=",",
)

# Map gene names to UniProt ids.
data["uniprot_id"] = (
    data["Identified Proteins (1170)"].str.split().str[1]
    .map(gene_to_uniprot)
)

# Drop proteins without a molecular weight specified.
data = (
    data[data["Molecular Weight"] != "?"]
    .drop(columns=["Identified Proteins (1170)", "Molecular Weight"])
    .sort_values("uniprot_id")
    .set_index("uniprot_id")
    .drop_duplicates()
)

# Convert column names to times.
time_map = {
    0: pd.Timedelta(days=0, hours=0, minutes=0),
    1: pd.Timedelta(days=0, hours=1, minutes=30),
    2: pd.Timedelta(days=0, hours=3, minutes=0),
    3: pd.Timedelta(days=0, hours=4, minutes=30),
    4: pd.Timedelta(days=0, hours=5, minutes=30),
    5: pd.Timedelta(days=0, hours=6, minutes=30),
    6: pd.Timedelta(days=0, hours=8, minutes=30),
    7: pd.Timedelta(days=0, hours=10, minutes=30),
    8: pd.Timedelta(days=0, hours=12, minutes=30),
    9: pd.Timedelta(days=0, hours=14, minutes=30),
    10: pd.Timedelta(days=0, hours=15, minutes=30),
    11: pd.Timedelta(days=0, hours=16, minutes=40),
    12: pd.Timedelta(days=0, hours=18, minutes=30),
    13: pd.Timedelta(days=0, hours=20, minutes=30),
    14: pd.Timedelta(days=0, hours=22, minutes=30),
    15: pd.Timedelta(days=1, hours=0, minutes=0),
}
data = data.rename(
    columns=lambda col: time_map[
        int(col[len("151222_WH8501diel_T"):-len("_2ug")]) - 1
    ]
)

# Replace missing observations by half the minimum measured.
data = data.replace(0, np.nan)
min_non_zero = data.min().min() / 2
data = data.fillna(min_non_zero)

In [None]:
width = 3.5
height = width / 1.618
fig, ax = plt.subplots(figsize=(width, height))

x = [c.total_seconds() for c in data.columns]

for cwat, gene in [
    ("CwatDRAFT_3815", "nifB"),
    ("CwatDRAFT_3822", "nifE"),
    ("CwatDRAFT_3818", "nifH"),
    ("CwatDRAFT_3820", "nifK"),
]:
    ax.plot(
        x,
        data.loc[gene_to_uniprot[cwat]],
        "-o",
        label=f"{gene} - {cwat.replace('DRAFT_', '')}",
        clip_on=False,
    )

ax.set_xlim(x[0], x[-1])
ax.set_ylim(0, ax.get_ylim()[1])

# Highlight night period.
ax.axvspan(
    x[4],
    x[11],
    color="lightgray",
)

# Set x ticks every 3 hours.
ax.xaxis.set_major_locator(mticker.MultipleLocator(60 * 60 * 3))
ax.xaxis.set_major_formatter(lambda x, _: int(x // 3600))

ax.legend(loc="upper left", bbox_to_anchor=[0.7, 1])

ax.set_xlabel("Hours")
ax.set_ylabel("Abundance")

sns.despine(ax=ax)

plt.savefig("fig4.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
width = 3.5
height = width / 1.618
fig, axes = plt.subplots(3, 2, sharex=True, figsize=(width * 2, height * 3))

x = [c.total_seconds() for c in data.columns]

for ax, (cwats, title) in zip(
    np.ravel(axes),
    [
        (("CwatDRAFT_2696",), "Squalene synthase"),
        (("CwatDRAFT_2761",), "Radical SAM"),
        (("CwatDRAFT_0516", "CwatDRAFT_4537"), "Ni hydrogenase"),
        (("CwatDRAFT_4291",), "Flavodoxin"),
        (
            ("CwatDRAFT_4160", "CwatDRAFT_0228", "CwatDRAFT_0515"),
            "Quinone metabolism"
        ),
        (
            ("CwatDRAFT_5386", "CwatDRAFT_4107", "CwatDRAFT_5304"),
            "Cytochrome c oxidases"
        ),
        
    ]
):
    for cwat in cwats:
        id = gene_to_uniprot[cwat]
        ax.plot(
            x,
            data.loc[id],
            "-o",
            label=cwat.replace("DRAFT_", ""),
            clip_on=False,
        )

    ax.set_title(title, y=1.15)

    ax.set_xlim(x[0], x[-1])
    ax.set_ylim(0, ax.get_ylim()[1])
    
    # Highlight night period.
    ax.axvspan(
        x[4],
        x[11],
        color="lightgray",
    )
    
    # Set x ticks every 3 hours.
    ax.xaxis.set_major_locator(mticker.MultipleLocator(60 * 60 * 3))
    ax.xaxis.set_major_formatter(lambda x, _: int(x // 3600))
    
    ax.legend(
        loc="lower center", bbox_to_anchor=[0.5, 1], ncols=3, fontsize="small"
    )
    
    ax.set_xlabel("Hours")
    ax.set_ylabel("Abundance")

    sns.despine(ax=ax)

for ax, label in zip(np.ravel(axes), "ABCDEF"):
    ax.annotate(
        label,
        xy=(-0.17, 1.1),
        xycoords="axes fraction",
        fontsize="xx-large",
        weight="bold",
    )

plt.tight_layout()

plt.savefig("fig5.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
width = 3.5
height = width / 1.618
fig, axes = plt.subplots(1, 2, sharex=True, figsize=(2 * width, height))

x = [c.total_seconds() for c in data.columns]

for ax, (cwat, label) in zip(
    axes,
    [
        ("CwatDRAFT_6453", "rpaA"),
        ("CwatDRAFT_2868", "putative oxidoreductase"),
    ],
):
    ax.plot(x, data.loc[gene_to_uniprot[cwat]], "-o", clip_on=False)

    ax.set_xlim(x[0], x[-1])
    ax.set_ylim(0, ax.get_ylim()[1])

    # Highlight night period.
    ax.axvspan(
        x[4],
        x[11],
        color="lightgray",
    )

    # Set x ticks every 3 hours.
    ax.xaxis.set_major_locator(mticker.MultipleLocator(60 * 60 * 3))
    ax.xaxis.set_major_formatter(lambda x, _: int(x // 3600))

    #ax.legend(loc="upper left", bbox_to_anchor=[0.7, 1])

    ax.set_xlabel("Hours")
    ax.set_ylabel("Abundance")

    sns.despine(ax=ax)

for ax, label in zip(axes, "AB"):
    ax.annotate(
        label,
        xy=(-0.17, 1.0),
        xycoords="axes fraction",
        fontsize="xx-large",
        weight="bold",
    )

plt.tight_layout()

plt.savefig("fig6.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()