# Merge taxonomic classification, FeGenie output, quality metrics, protein hits, and bin abundance into one table

In [1]:
import pandas as pd

In [2]:
# Open FeGenie/quality/abundance tables
kaloevig_fg_q_a = pd.read_csv("genes/results/2022-05-19/kaloevig_quality_taxa_fegenie.csv")
loegten_fg_q_a = pd.read_csv("genes/results/2022-05-19/loegten_quality_taxa_fegenie.csv")
marine_gs_illumina_fg_q_a = pd.read_csv("genes/results/2022-05-19/marine_gs_illumina_quality_taxa_fegenie.csv")

In [3]:
# Open BLAST tables
kaloevig_blast = pd.read_csv("genes/results/2022-04-28/blast/kaloevig_contig_hits.csv")
loegten_blast = pd.read_csv("genes/results/2022-04-28/blast/loegten_contig_hits.csv")
marine_gs_illumina_blast = pd.read_csv("genes/results/2022-05-20/blast/marine_gs_illumina_contig_hits.csv")

In [4]:
# Rename the first column of BLAST tables to match it with quality/abundance table
for df in [kaloevig_blast, loegten_blast, marine_gs_illumina_blast]:
    df.rename(columns={"Unnamed: 0": "Bin Id"}, inplace=True)

In [5]:
# Merge tables
kaloevig_merged = kaloevig_fg_q_a.merge(kaloevig_blast, on="Bin Id", how="outer")
loegten_merged = loegten_fg_q_a.merge(loegten_blast, on="Bin Id", how="outer")
marine_gs_illumina_merged = marine_gs_illumina_fg_q_a.merge(marine_gs_illumina_blast, on="Bin Id", how="outer")

In [6]:
# Save to csv files
kaloevig_merged.to_csv("genes/results/2022-05-23/kaloevig_genes.csv", index=False)
loegten_merged.to_csv("genes/results/2022-05-23/loegten_genes.csv", index=False)
marine_gs_illumina_merged.to_csv("genes/results/2022-05-23/marine_gs_illumina_genes.csv", index=False)