# Merge FeGenie Output and Taxa/Quality Tables

This notebook will merge FeGenie transposed heatmap table and big quality tables of the Kalø Vig, Løgten, and *Candidatus Electrothrix communis RB* samples. The "marine_gs" abbreviation stands for "Marine Golden Standard" which is an internal name for the *Candidatus Electrothrix communis RB* species.

In [1]:
import pandas as pd

In [2]:
# Columns of Kalø Vig and Løgten to use
k_l_cols = ["Bin Id", "classification", "Completeness", "Contamination", "AvgDepth"]

# Open taxa/quality tables
kaloevig_taxa_quality = pd.read_csv(
    "binning/results/2022-04-08/kaloevig_taxa_quality_abund_table_full.csv", usecols=k_l_cols
)
loegten_taxa_quality = pd.read_csv(
    "binning/results/2022-04-08/loegten_taxa_quality_abund_table_full.csv", usecols=k_l_cols
)
marine_gs_illumina_quality = pd.read_csv(
    "taxonomy/results/2022-05-19/marine_gs_illumina_taxa_quality_abund.csv"
)

# FeGenie columns
fg_cols = ["X", "iron_oxidation", "iron_reduction"]

# Open FeGenie tables
kaloevig_fegenie = pd.read_csv(
    "genes/results/2022-05-04/fegenie/kaloevig/FeGenie-heatmap-data.Kaloevig.T.csv",
    usecols=fg_cols,
)
loegten_fegenie = pd.read_csv(
    "genes/results/2022-05-04/fegenie/loegten/FeGenie-heatmap-data.Loegten.T.csv",
    usecols=fg_cols,
)
marine_gs_illumina_fegenie = pd.read_csv(
    "genes/results/2022-05-19/fegenie/marine_gs_illumina_proteins/FeGenie-heatmap-data.illumina.proteins.T.csv",
    usecols=fg_cols,
)

In [3]:
# Rename X to Bin Id in FeGenie for merging
for dataset in [kaloevig_fegenie, loegten_fegenie, marine_gs_illumina_fegenie]:
    dataset.rename(columns={"X": "Bin Id"}, inplace=True)
    
# Remove .faa extensions from FeGenie bin names
kaloevig_fegenie["Bin Id"] = kaloevig_fegenie["Bin Id"].str[:-4]
loegten_fegenie["Bin Id"] = loegten_fegenie["Bin Id"].str[:-4]
marine_gs_illumina_fegenie["Bin Id"] = marine_gs_illumina_fegenie["Bin Id"].str[:-4]

In [4]:
# Merge datasets
kaloevig_merged = kaloevig_taxa_quality.merge(kaloevig_fegenie, on=["Bin Id"])
loegten_merged = loegten_taxa_quality.merge(loegten_fegenie, on=["Bin Id"])
marine_gs_illumina_merged = marine_gs_illumina_quality.merge(marine_gs_illumina_fegenie, on=["Bin Id"])

In [5]:
# Save to csv files
kaloevig_merged.to_csv("genes/results/2022-05-19/kaloevig_quality_taxa_fegenie.csv", index=None)
loegten_merged.to_csv("genes/results/2022-05-19/loegten_quality_taxa_fegenie.csv", index=None)
marine_gs_illumina_merged.to_csv("genes/results/2022-05-19/marine_gs_illumina_quality_taxa_fegenie.csv", index=None)