In [1]:
import os
import pandas as pd
from qiime2 import Artifact
from qiime2 import Visualization
from qiime2 import Metadata
from qiime2.plugins import demux
from qiime2.plugins.metadata.visualizers import tabulate
import qiime2.plugins.dada2.actions as dada2_actions
import qiime2.plugins.metadata.actions as metadata_actions
from qiime2.plugins.feature_table.visualizers import tabulate_seqs
from qiime2.plugins.feature_table.visualizers import summarize
from qiime2.plugins.feature_table.visualizers import core_features
#from qiime2.plugins.feature_table.methods import filter_samples
from qiime2.plugins.demux.methods import filter_samples
from qiime2.plugins.feature_table.methods import filter_seqs

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
raw_data_path = os.path.join('.', 'data', 'raw')
interim_data_path = os.path.join('.', 'data', 'interim')

In [3]:
metadata_path = os.path.join(raw_data_path, 'metadata-all-ana-types.tsv')
metadata_qa = Metadata.load(metadata_path)
metadata_df = pd.read_csv(metadata_path, sep='\t', skiprows=[1])
metadata_df.shape
# metadata_df.head()
#tabulate(metadata_qa).visualization

(40, 4)

In [4]:
demux_path = os.path.join(interim_data_path, 'demux-paired.qza')
demux_qa = Artifact.load(demux_path)
demux_view = demux.visualizers.summarize(demux_qa)
demux_view.visualization

<Figure size 432x288 with 0 Axes>

In [5]:
from qiime2.plugins.demux.methods import filter_samples
#from qiime2.plugins.feature_table.methods import filter_samples

filtered_demux_qa = filter_samples(demux=demux_qa, metadata=metadata_qa).filtered_demux
filtered_demux_qa

<artifact: SampleData[PairedEndSequencesWithQuality] uuid: 4cd68957-18d3-4e10-9b26-3a0a896291c0>

In [15]:
filtered_demux_view = demux.visualizers.summarize(filtered_demux_qa)
filtered_demux_view.visualization

<Figure size 432x288 with 0 Axes>

In [6]:
# Load relation FileID - SampleName
relation_df = pd.read_csv(os.path.join(raw_data_path, 'sample_id-sample_name.tsv'), sep='\t')
relation_df

Unnamed: 0,sample-id,sample-name
0,210421121673,#1 N9.01
1,210421121674,#2 N11.01
2,210421121675,#3 N9.05
3,210421121676,#4 N11.02
4,210421121677,#5 N9.03
...,...,...
69,210707163913,#70 7d.AM2
70,210707163914,#71 7d.C3
71,210707163915,#72 7d.M3
72,210707163916,#73 7d.A3


In [7]:
# Load manifest list
manifest_df = pd.read_csv(os.path.join(raw_data_path, 'karina-samples-groups.csv'))
manifest_df

Unnamed: 0,sample-id,forward-path,reverse-path
0,210421121673,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
1,210421121674,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
2,210421121675,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
3,210421121676,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
4,210421121677,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
...,...,...,...
69,210707163913,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
70,210707163914,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
71,210707163915,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
72,210707163916,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...


In [8]:
manifest2_df = pd.read_csv(os.path.join(raw_data_path, 'samples-paths.tsv'), sep='\t')
manifest2_df

Unnamed: 0,sample-id,absolute-filepath,direction
0,210421121673,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward
1,210421121673,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,reverse
2,210421121674,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward
3,210421121674,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,reverse
4,210421121675,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward
...,...,...,...
143,210707163915,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,reverse
144,210707163916,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward
145,210707163916,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,reverse
146,210707163917,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward


In [9]:
# Join relation and manifest
joined_df = pd.merge(relation_df, manifest_df, on='sample-id')
joined_df = joined_df.drop(columns=['sample-id'])
joined_df = joined_df.rename(columns={'sample-name': 'sample-id'})
joined_df

Unnamed: 0,sample-id,forward-path,reverse-path
0,#1 N9.01,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
1,#2 N11.01,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
2,#3 N9.05,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
3,#4 N11.02,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
4,#5 N9.03,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
...,...,...,...
69,#70 7d.AM2,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
70,#71 7d.C3,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
71,#72 7d.M3,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...
72,#73 7d.A3,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...


In [10]:
# Join relation and manifest2
joined2_df = pd.merge(relation_df, manifest2_df, on='sample-id')
# joined2_df = joined2_df.drop(columns=['sample-id'])
# joined2_df = joined2_df.rename(columns={'sample-name': 'sample-id'})
# write to file
manifest_cols = ['sample-id', 'absolute-filepath', 'direction']
out_manifest = joined2_df[manifest_cols]
out_manifest.to_csv(os.path.join(raw_data_path, 'manifest-all-ana.csv'), index=False)
out_manifest

Unnamed: 0,sample-id,absolute-filepath,direction
0,210421121673,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward
1,210421121673,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,reverse
2,210421121674,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward
3,210421121674,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,reverse
4,210421121675,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward
...,...,...,...
143,210707163915,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,reverse
144,210707163916,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward
145,210707163916,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,reverse
146,210707163917,/home/lauro/nupeb/dados_brutos_rede_genoma/dat...,forward


In [11]:
relation_df['sample-name-redux'] = relation_df['sample-name'].str.split(' ').str[1]
relation_df

Unnamed: 0,sample-id,sample-name,sample-name-redux
0,210421121673,#1 N9.01,N9.01
1,210421121674,#2 N11.01,N11.01
2,210421121675,#3 N9.05,N9.05
3,210421121676,#4 N11.02,N11.02
4,210421121677,#5 N9.03,N9.03
...,...,...,...
69,210707163913,#70 7d.AM2,7d.AM2
70,210707163914,#71 7d.C3,7d.C3
71,210707163915,#72 7d.M3,7d.M3
72,210707163916,#73 7d.A3,7d.A3


In [12]:
metadata_df = pd.read_csv(os.path.join(raw_data_path, 'ana-flavia-samples-groups.tsv'), sep='\t')
metadata_df = metadata_df.rename(columns={'sample-id': 'sample-name'})
metadata_df

Unnamed: 0,sample-name,group-id,group-desc
0,N9.01,NC,Ninhada controle
1,N11.01,NC,Ninhada controle
2,N9.05,NC,Ninhada controle
3,N11.02,NC,Ninhada controle
4,N9.03,NR,Ninhada reduzida
5,N10.03,NR,Ninhada reduzida
6,N10.04,NR,Ninhada reduzida
7,N10.07,NR,Ninhada reduzida
8,N10.08,NR,Ninhada reduzida
9,N7.01,STD-NC,Grupo dieta padrão


In [13]:
joined_metadata_df = pd.merge(relation_df, metadata_df, left_on='sample-name-redux', right_on='sample-name')
joined_metadata_df = joined_metadata_df[['sample-id', 'sample-name-redux', 'group-id', 'group-desc']].rename(columns={'sample-name-redux': 'sample-name'})
joined_metadata_df.to_csv(os.path.join(raw_data_path, 'metadata-all-ana.tsv'), sep='\t', index=False)