<a href="https://colab.research.google.com/github/clare-abreu/ocean_temperature_microbes/blob/main/Calculate_WMCNs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This code calculates the weighted mean copy numbers of all of the datasets used in [Abreu$^*$ & Dal Bello$^*$ et. al.](https://www.biorxiv.org/content/10.1101/2022.07.13.499956v1)

In [17]:
# Import required modules and specify plotting parameters:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpy import loadtxt
from scipy import stats
from scipy.integrate import odeint

SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [18]:
def calc_wmcn_only(data_link,metadata_link):
  # This function takes in a metadata dataframe for a particular dataset, which includes 
  # taxa abundances and copy numbers, and calculates the weighted mean copy number (WMCN)
  # and adds it to the metadata dataframe.

  df_tax = pd.read_csv(data_link, index_col=0)
  df_meta = pd.read_csv(metadata_link, index_col=0)

  All_samples = df_meta.index.tolist()
  All_OTUs = df_tax.index.tolist()

  df_meta['WMCN'] = np.nan
  df_meta['WMCN, no SAR11'] = np.nan
  df_meta['WMCN, no CN1'] = np.nan
  df_meta['WMCN, Heterotrophs'] = np.nan

  #Exclude SAR11_clade from Order:
  df_tax_nosar11 = df_tax[df_tax['Order']!='SAR11_clade'].copy()
  df_tax_nosar11 = df_tax_nosar11[df_tax_nosar11['Order']!='SAR11 clade'].copy()
  df_tax_nosar11 = df_tax_nosar11[df_tax_nosar11['Order']!='Pelagibacterales'].copy()
  df_tax_noCN1 = df_tax[df_tax['Copy Number']>1].copy()
  df_tax_hetero = df_tax[df_tax['Phototroph']==0].copy()

  # Compute WMCNs
  df_tax_c = df_tax[~df_tax['Copy Number'].isna()]
  df_tax_nosar11_c = df_tax_nosar11[~df_tax_nosar11['Copy Number'].isna()]
  df_tax_noCN1_c = df_tax_noCN1[~df_tax_noCN1['Copy Number'].isna()]
  df_tax_hetero_c = df_tax_hetero[~df_tax_hetero['Copy Number'].isna()]
  
  for k in range(len(All_samples)):
      this_sample = All_samples[k]
      if this_sample in df_tax.columns.tolist():
          this_WMCN = np.sum(df_tax_c[this_sample])/np.sum(df_tax_c[this_sample]/df_tax_c['Copy Number'])
          df_meta.loc[this_sample,'WMCN'] = this_WMCN

  for k in range(len(All_samples)):
      this_sample = All_samples[k]
      if this_sample in df_tax.columns.tolist():
          this_WMCN_nosar11 = np.sum(df_tax_nosar11_c[this_sample])/np.sum(df_tax_nosar11_c[this_sample]/df_tax_nosar11_c['Copy Number'])
          df_meta.loc[this_sample,'WMCN, no SAR11'] = this_WMCN_nosar11
          
  for k in range(len(All_samples)):
      this_sample = All_samples[k]
      if this_sample in df_tax.columns.tolist():
          this_WMCN_noCN1 = np.sum(df_tax_noCN1_c[this_sample])/np.sum(df_tax_noCN1_c[this_sample]/df_tax_noCN1_c['Copy Number'])
          df_meta.loc[this_sample,'WMCN, no CN1'] = this_WMCN_noCN1
          
  for k in range(len(All_samples)):
      this_sample = All_samples[k]
      if this_sample in df_tax.columns.tolist():
          this_WMCN_hetero = np.sum(df_tax_hetero_c[this_sample])/np.sum(df_tax_hetero_c[this_sample]/df_tax_hetero_c['Copy Number'])
          df_meta.loc[this_sample,'WMCN, Heterotrophs'] = this_WMCN_hetero
          
  return(df_meta)

In [19]:
from google.colab import drive 
drive.mount('/content/gdrive')
df=pd.read_csv('gdrive/Shared drives/petrov-lab/clare/rRNA_CN/Generic Data/LMO_free_generic_data.csv')
df

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Unnamed: 0,asv,Phylum,Class,Order,Family,Genus,Species,Sequence,Phototroph,Growth Rate,...,P16611_1092,P16611_1093,P17302_1014,P17302_1015,P17302_1016,P17302_1017,P17302_1018,P17302_1077,P17302_1080,P17302_1081
0,cac37e39df2461953f24398212940edf,Bacteroidetes,Bacteroidia,Flavobacteriales,Flavobacteriaceae,NS3a marine group,,TGAGGAATATTGGACAATGGGCGAGAGCCTGATCCAGCCATGCCGC...,0.0,0.208407,...,288.0,1888.0,989.0,2084.0,372.0,25.0,153.0,1599.0,2345.0,1455.0
1,1ead98754d34073a4606f7ff1e94126e,Cyanobacteria,Oxyphotobacteria,Synechococcales,Cyanobiaceae,Cyanobium PCC-6307,,TGGGGAATTTTCCGCAATGGGCGCAAGCCTGACGGAGCAACGCCGC...,1.0,0.117705,...,71.0,1302.0,3748.0,20841.0,702.0,452.0,326.0,14984.0,20126.0,6157.0
2,d638e3ef375faff8775879dcd2919456,Bacteroidetes,Bacteroidia,Flavobacteriales,Flavobacteriaceae,uncultured,uncultured Flavobacteriia bacterium,TGAGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCCATGCCGC...,0.0,0.235127,...,40.0,513.0,1671.0,32.0,354.0,10.0,24.0,183.0,1951.0,516.0
3,aaf363d33b413559f355276afbb79f37,Proteobacteria,Alphaproteobacteria,Rhodobacterales,Rhodobacteraceae,Pseudorhodobacter,Ambiguous_taxa,TGGGGAATCTTAGACAATGGGCGCAAGCCTGATCTAGCCATGCCGC...,0.0,0.372033,...,107.0,746.0,2836.0,2714.0,688.0,25.0,136.0,208.0,4454.0,603.0
4,450ac381cbb4a6822befbcdb2ca20bae,Cyanobacteria,Oxyphotobacteria,Chloroplast,uncultured Cryptomonadaceae,uncultured Cryptomonadaceae,uncultured Cryptomonadaceae,TAGGGAATTTTCCGCAATGGGCGAAAGCCTGACGGAGCAATACCGC...,1.0,,...,204.0,1472.0,1425.0,977.0,436.0,17.0,123.0,266.0,1228.0,888.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178435,af5489910e0e5ec022af397925053b18,Proteobacteria,Deltaproteobacteria,Desulfarculales,Desulfarculaceae,Desulfatiglans,uncultured delta proteobacterium,TGAGGAATTTTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGC...,0.0,0.034286,...,,,,,,,,,,
178436,78d486808fadd8164bcd0f666009d715,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingobium,,TAGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCAATGCCGC...,0.0,0.596183,...,,,,,,,,,,
178437,03a027ad28974d766f1658c823c1d3cd,Cyanobacteria,Oxyphotobacteria,Nostocales,Nostocaceae,Aphanizomenon NIES81,,TGAGGAATATTGGTCAATGGGCGCAAGCCTGAACCAGCCATGCCGC...,1.0,0.127571,...,,,,,,,,,,
178438,64b95e810417f6b094f5374ef461e3b4,Actinobacteria,Actinobacteria,Micrococcales,Microbacteriaceae,,,TGGGGAATTTTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGC...,0.0,0.717274,...,,,,,,,,,,


In [16]:
df

Unnamed: 0,asv,Phylum,Class,Order,Family,Genus,Species,Sequence,Phototroph,Growth Rate,...,P16611_1092,P16611_1093,P17302_1014,P17302_1015,P17302_1016,P17302_1017,P17302_1018,P17302_1077,P17302_1080,P17302_1081
0,cac37e39df2461953f24398212940edf,Bacteroidetes,Bacteroidia,Flavobacteriales,Flavobacteriaceae,NS3a marine group,,TGAGGAATATTGGACAATGGGCGAGAGCCTGATCCAGCCATGCCGC...,0.0,0.208407,...,288.0,1888.0,989.0,2084.0,372.0,25.0,153.0,1599.0,2345.0,1455.0
1,1ead98754d34073a4606f7ff1e94126e,Cyanobacteria,Oxyphotobacteria,Synechococcales,Cyanobiaceae,Cyanobium PCC-6307,,TGGGGAATTTTCCGCAATGGGCGCAAGCCTGACGGAGCAACGCCGC...,1.0,0.117705,...,71.0,1302.0,3748.0,20841.0,702.0,452.0,326.0,14984.0,20126.0,6157.0
2,d638e3ef375faff8775879dcd2919456,Bacteroidetes,Bacteroidia,Flavobacteriales,Flavobacteriaceae,uncultured,uncultured Flavobacteriia bacterium,TGAGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCCATGCCGC...,0.0,0.235127,...,40.0,513.0,1671.0,32.0,354.0,10.0,24.0,183.0,1951.0,516.0
3,aaf363d33b413559f355276afbb79f37,Proteobacteria,Alphaproteobacteria,Rhodobacterales,Rhodobacteraceae,Pseudorhodobacter,Ambiguous_taxa,TGGGGAATCTTAGACAATGGGCGCAAGCCTGATCTAGCCATGCCGC...,0.0,0.372033,...,107.0,746.0,2836.0,2714.0,688.0,25.0,136.0,208.0,4454.0,603.0
4,450ac381cbb4a6822befbcdb2ca20bae,Cyanobacteria,Oxyphotobacteria,Chloroplast,uncultured Cryptomonadaceae,uncultured Cryptomonadaceae,uncultured Cryptomonadaceae,TAGGGAATTTTCCGCAATGGGCGAAAGCCTGACGGAGCAATACCGC...,1.0,,...,204.0,1472.0,1425.0,977.0,436.0,17.0,123.0,266.0,1228.0,888.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178435,af5489910e0e5ec022af397925053b18,Proteobacteria,Deltaproteobacteria,Desulfarculales,Desulfarculaceae,Desulfatiglans,uncultured delta proteobacterium,TGAGGAATTTTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGC...,0.0,0.034286,...,,,,,,,,,,
178436,78d486808fadd8164bcd0f666009d715,Proteobacteria,Alphaproteobacteria,Sphingomonadales,Sphingomonadaceae,Sphingobium,,TAGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCAATGCCGC...,0.0,0.596183,...,,,,,,,,,,
178437,03a027ad28974d766f1658c823c1d3cd,Cyanobacteria,Oxyphotobacteria,Nostocales,Nostocaceae,Aphanizomenon NIES81,,TGAGGAATATTGGTCAATGGGCGCAAGCCTGAACCAGCCATGCCGC...,1.0,0.127571,...,,,,,,,,,,
178438,64b95e810417f6b094f5374ef461e3b4,Actinobacteria,Actinobacteria,Micrococcales,Microbacteriaceae,,,TGGGGAATTTTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGC...,0.0,0.717274,...,,,,,,,,,,
