<a href="https://colab.research.google.com/github/clare-abreu/ocean_temperature_microbes/blob/main/Calculate_WMCNs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This code calculates the weighted mean copy numbers of all of the datasets used in [Abreu$^*$ & Dal Bello$^*$ et. al.](https://www.biorxiv.org/content/10.1101/2022.07.13.499956v1)

In [4]:
# Import required modules and specify plotting parameters:

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpy import loadtxt
from scipy import stats
from scipy.integrate import odeint

SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [7]:
def calc_wmcn_only(this_data,data_path):
  # This function takes in a metadata dataframe for a particular dataset, which includes 
  # taxa abundances and copy numbers, and calculates the weighted mean copy number (WMCN)
  # and adds it to the metadata dataframe.

  df_tax = pd.read_csv(f'{data_path}{this_data}_generic_data.csv', index_col=0)
  df_meta = pd.read_csv(f'{data_path}{this_data}_generic_metadata.csv', index_col=0)

  All_samples = df_meta.index.tolist()
  All_OTUs = df_tax.index.tolist()

  df_meta['WMCN'] = np.nan
  df_meta['WMCN, no SAR11'] = np.nan
  df_meta['WMCN, no CN1'] = np.nan
  df_meta['WMCN, Heterotrophs'] = np.nan

  #Exclude SAR11_clade from Order:
  df_tax_nosar11 = df_tax[df_tax['Order']!='SAR11_clade'].copy()
  df_tax_nosar11 = df_tax_nosar11[df_tax_nosar11['Order']!='SAR11 clade'].copy()
  df_tax_nosar11 = df_tax_nosar11[df_tax_nosar11['Order']!='Pelagibacterales'].copy()
  df_tax_noCN1 = df_tax[df_tax['Copy Number']>1].copy()
  df_tax_hetero = df_tax[df_tax['Phototroph']==0].copy()

  # Compute WMCNs
  df_tax_c = df_tax[~df_tax['Copy Number'].isna()]
  df_tax_nosar11_c = df_tax_nosar11[~df_tax_nosar11['Copy Number'].isna()]
  df_tax_noCN1_c = df_tax_noCN1[~df_tax_noCN1['Copy Number'].isna()]
  df_tax_hetero_c = df_tax_hetero[~df_tax_hetero['Copy Number'].isna()]
  
  for k in range(len(All_samples)):
      this_sample = All_samples[k]
      if this_sample in df_tax.columns.tolist():
          this_WMCN = np.sum(df_tax_c[this_sample])/np.sum(df_tax_c[this_sample]/df_tax_c['Copy Number'])
          df_meta.loc[this_sample,'WMCN'] = this_WMCN

  for k in range(len(All_samples)):
      this_sample = All_samples[k]
      if this_sample in df_tax.columns.tolist():
          this_WMCN_nosar11 = np.sum(df_tax_nosar11_c[this_sample])/np.sum(df_tax_nosar11_c[this_sample]/df_tax_nosar11_c['Copy Number'])
          df_meta.loc[this_sample,'WMCN, no SAR11'] = this_WMCN_nosar11
          
  for k in range(len(All_samples)):
      this_sample = All_samples[k]
      if this_sample in df_tax.columns.tolist():
          this_WMCN_noCN1 = np.sum(df_tax_noCN1_c[this_sample])/np.sum(df_tax_noCN1_c[this_sample]/df_tax_noCN1_c['Copy Number'])
          df_meta.loc[this_sample,'WMCN, no CN1'] = this_WMCN_noCN1
          
  for k in range(len(All_samples)):
      this_sample = All_samples[k]
      if this_sample in df_tax.columns.tolist():
          this_WMCN_hetero = np.sum(df_tax_hetero_c[this_sample])/np.sum(df_tax_hetero_c[this_sample]/df_tax_hetero_c['Copy Number'])
          df_meta.loc[this_sample,'WMCN, Heterotrophs'] = this_WMCN_hetero
          
  return(df_meta)