In [None]:
import itertools

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Plot styling.
plt.style.use(['seaborn-white', 'seaborn-paper'])
plt.rc('font', family='serif')
sns.set_palette('Set1')
sns.set_context('paper', font_scale=1.3)    # Single-column figure.

### GFOP sample type metadata 

In [None]:
def get_sample_types(gfop_metadata, simple_complex=None):
    if simple_complex is not None:
        gfop_metadata = gfop_metadata[
            gfop_metadata['simple_complex'] == simple_complex]
    col_sample_types = [f'sample_type_group{i}' for i in range(1, 7)]
    return (gfop_metadata[['filename', *col_sample_types]]
            .set_index('filename'))

In [None]:
def get_network_group_sample_type_counts(gnps_network, sample_types,
                                         groups_included):
    groups = {f'G{i}' for i in range(1, 7)}
    groups_excluded = groups - set(groups_included)
    gnps_network_selected = gnps_network[
        (gnps_network[groups_included] > 0).all(axis=1) &
        (gnps_network[groups_excluded] == 0).all(axis=1)]
    peak_filenames = set(itertools.chain.from_iterable(
        gnps_network_selected['UniqueFileSources'].str.split('|')))
    return sample_types.reindex(peak_filenames).value_counts()

In [None]:
gfop_metadata = pd.read_csv(
    '../data/GFOP/11442_foodomics_multiproject_metadata.txt',
    sep='\t')
# First row is empty.
gfop_metadata = gfop_metadata.drop(index=0)
# Remove trailing whitespace.
gfop_metadata = gfop_metadata.apply(lambda col: col.str.strip()
                                    if col.dtype == 'object' else col)

### ONR sample type overlap between fecal and plasma

In [None]:
gnps_network_onr = pd.read_csv(
    '../data/ONR/METABOLOMICS-SNETS-V2-e0bf255b-'
    'view_all_clusters_withID_beta-main.tsv', sep='\t')

In [None]:
sample_types_simple = (get_sample_types(gfop_metadata, 'simple')
                       ['sample_type_group1'])
sample_counts_fecal = get_network_group_sample_type_counts(
    gnps_network_onr, sample_types_simple, ['G1', 'G4']).rename('fecal')
sample_counts_plasma = get_network_group_sample_type_counts(
    gnps_network_onr, sample_types_simple, ['G2', 'G4']).rename('plasma')

In [None]:
sample_counts_fecal_plasma = pd.concat(
    [sample_counts_fecal, sample_counts_plasma], axis=1)

In [None]:
sample_counts_fecal_plasma

In [None]:
width = 7
height = width / 1.618
fig, ax = plt.subplots(figsize=(width, height))

sample_counts_fecal_plasma.plot.bar(ax=ax)

ax.set_xlabel('Food type')
ax.set_ylabel('Number of occurences')

sns.despine()

plt.show()
plt.close()