# Metadata Microterroir 

In [122]:
wd = '/home/lfloerl/microterroir/Microbiome/Metadata'
%cd $wd 

/home/lfloerl/microterroir/Microbiome/Metadata


In [123]:
import pandas as pd
import biom
import re
import numpy as np

## Import raw metadata files

In [124]:
# FGCZ metadata 
md_pooling = pd.read_csv('RawMD-Demux.csv')
md_pooling['Library_ID'] = md_pooling['Name'].str.split('-').str[-2] + '-' + md_pooling['Name'].str.split('-').str[-1]

# our own sample file 
md_samples = pd.read_csv('RawMD-LibraryPrep-LP3.csv')
md_samples['Library_ID_16S'] = md_samples['unique_16S_ID'].str.split('-').str[-2] + '-' + md_samples['unique_16S_ID'].str.split('-').str[-1]
md_samples['Library_ID_ITS'] = md_samples['unique_ITS_ID'].str.split('-').str[-2] + '-' + md_samples['unique_ITS_ID'].str.split('-').str[-1]

# Make basic 16S / ITS metadata files 

In [125]:
# 16S 
md_16S = pd.merge(md_pooling, md_samples, left_on='Library_ID', right_on='Library_ID_16S')
md_16S = md_16S[['Name', 'SAMPLE_NAME', 'CTRL', 'Project', 'DNA_extraction_plate', 'Plate', 'Bacterial conc. (ng/uL)']]
md_16S['Bacterial conc. (ng/uL)'] = md_16S['Bacterial conc. (ng/uL)'].apply(lambda x: 0.001 if x <= 0 else x)
md_16S.set_index('Name', inplace=True)
md_16S.index.name = 'id'
md_16S.to_csv('16S_md.tsv', sep='\t')

# ITS 
md_ITS = pd.merge(md_pooling, md_samples, left_on='Library_ID', right_on='Library_ID_ITS')
md_ITS = md_ITS[['Name', 'SAMPLE_NAME', 'CTRL', 'Project', 'DNA_extraction_plate', 'Plate', 'Fungal conc. (ng/uL)']]
md_ITS['Fungal conc. (ng/uL)'] = md_ITS['Fungal conc. (ng/uL)'].apply(lambda x: 0.001 if x <= 0 else x)
md_ITS.set_index('Name', inplace=True)
md_ITS.index.name = 'id'
md_ITS.to_csv('ITS_md.tsv', sep='\t')

In [126]:
md_16S['Project'].unique()

array(['SoilColonialization', 'BotrytizedWine', 'NOT-USE', 'WINE',
       'Lavaux', 'PNA-test', 'SamplingBenchmarking', 'Valais'],
      dtype=object)

In [127]:
md_16S_subset = md_16S[(md_16S['Project'] == 'Lavaux') & (md_16S['CTRL'] == 'Sample')]
md_16S_subset.shape

(595, 6)

# Make Lavaux Metadata files 

In [128]:
# Lavaux metadata 
md_lavaux = pd.read_csv('RawMD-LavauxSamples.csv')
md_lavaux = md_lavaux.dropna(how='all')

In [129]:
# 16S 
md_16S = md_16S.reset_index(names='id')
merged_16S = pd.merge(md_16S[['id', 'SAMPLE_NAME', 'Bacterial conc. (ng/uL)']], 
    md_lavaux, on='SAMPLE_NAME', how='right')
merged_16S = merged_16S.set_index('id')
merged_16S = merged_16S[~merged_16S.index.isnull()]
merged_16S.to_csv('16S_lavaux.tsv', sep='\t')

# ITS 
md_ITS = md_ITS.reset_index(names='id')
merged_ITS = pd.merge(md_ITS[['id', 'SAMPLE_NAME', 'Fungal conc. (ng/uL)']], 
    md_lavaux, on='SAMPLE_NAME', how='right')
merged_ITS = merged_ITS.set_index('id')
merged_ITS = merged_ITS[~merged_ITS.index.isnull()]
merged_ITS.to_csv('ITS_lavaux.tsv', sep='\t')

In [116]:
merged_16S.shape

(586, 23)