In [51]:
from aerobot.dataset import dataset_load
from aerobot.plot import *
# from aerobot.utils import print_taxonomy_info
from aerobot.io import FEATURE_SUBTYPES, FEATURE_TYPES, load_results_dict, RESULTS_PATH, DATA_PATH
import numpy as np 
import os
from matplotlib.gridspec import GridSpec
import pandas as pd 
import subprocess
from typing import Dict

%load_ext autoreload
%autoreload 2

plot_configure_mpl(n_colors=6)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# SI Figure 4

In [53]:
def plot_si_figure_4a(coverage_by_habitat_df:pd.DataFrame, fig:plt.Figure=None, grid:GridSpec=None):

    # Make the easiest plot, which just shows percent coverage by phylogenetic level. 
    ax = fig.add_subplot(grid)
    n = len(coverage_by_habitat_df) # Number of habitats. 
    
    human_habitat_color, color = 'tab:blue', 'tab:gray'
    colors = [color if not is_human else human_habitat_color for is_human in coverage_by_habitat_df.habitat.str.contains('human')]
    # facecolors = ['none' if not is_human else 'tab:blue' for is_human in human_samples]
    sizes = [3 if not is_human else 20 for is_human in coverage_by_habitat_df.habitat.str.contains('human')]

    for i, level in enumerate(LEVELS):
        ax.scatter(np.array([i] * n), coverage_by_habitat_df[level].values, linewidth=0.5, facecolors=colors, s=sizes)# , edgecolors=edgecolors)

    x_vals, y_vals = [], []
    for i, level in enumerate(LEVELS): # Make sure these are plotted on top.
        x_vals.append(i)
        y_vals.append(coverage_by_habitat_df[level].mean())

    ax.plot(x_vals, y_vals, c='black', lw=1)
    ax.scatter(x_vals, y_vals, c='black', s=20)

    # Custom legend. Colors indicate annotation-free or annotation-full, and hatching indicates training or validation set. 
    handles = [plt.Rectangle((0, 0), 1, 1, facecolor=human_sample_color, edgecolor='black', linewidth=0.5)]
    handles.append(plt.Rectangle((0, 0), 1, 1, facecolor=color, edgecolor='black', linewidth=0.5))
    handles.append(plt.Rectangle((0, 0), 1, 1, facecolor='black', edgecolor='black', linewidth=0.5))

    labels = ['human habitats', 'non-human habitats', 'mean']
    ax.legend(handles, labels, fontsize=7, columnspacing=0.3, handletextpad=0.3, loc='lower left')


    ax.set_xticks(np.arange(len(LEVELS)), labels=LEVELS)
    ax.set_ylabel('percent coverage')
    ax.set_xlabel('phylogenetic level')
    ax.set_title('Phylogenetic label coverage in the Earth Microbiome Project')
plt.show()