In [1]:
# Importing packages
import os
import yaml
import logging
import pandas as pd
from qiime2 import Artifact
from qiime2 import Visualization
from qiime2 import Metadata
import qiime2.plugins.dada2.actions as dada2_actions
import qiime2.plugins.metadata.actions as metadata_actions
from qiime2.plugins.feature_table.visualizers import tabulate_seqs
from qiime2.plugins.feature_table.visualizers import summarize
from qiime2.plugins.feature_table.visualizers import core_features

from qiime2.plugins.feature_table.methods import merge
from qiime2.plugins.feature_table.methods import merge_seqs
from qiime2.plugins.feature_table.methods import merge_taxa

from qiime2.plugins.feature_table.methods import filter_samples
from qiime2.plugins.feature_table.methods import filter_seqs

import matplotlib.pyplot as plt

%matplotlib inline

In [23]:
experiment_name = "thayane-PM-joined"
base_dir = "/mnt/nupeb/rede-micro/redemicro-thayane"

meno_metadata_file = f"{base_dir}/data/metadata-meno.tsv"
joined_meno_metadata_file = f"{base_dir}/data/metadata-meno-joined.tsv"
hist_joined_meno_metadata_file = f"{base_dir}/data/metadata-meno-joined-hist.tsv"
raw_single_metadata_file = f"{base_dir}/data/single-end-metadata.tsv"

In [3]:
meno_metadata_df = pd.read_csv(meno_metadata_file, sep='\t')
raw_single_metadata_df = pd.read_csv(raw_single_metadata_file, sep='\t', skiprows=[1])

In [4]:
meno_metadata_df

Unnamed: 0,sample-id,age,class
0,#q2:types,numeric,categorical
1,M05,56,2
2,M33,52,2
3,M34,58,2
4,M35,58,2
...,...,...,...
72,M155,47,2
73,M156,46,2
74,M160,52,2
75,M162,59,1


In [5]:
raw_single_metadata_df

Unnamed: 0,sample-id,class-straw,local-coleta,idade,idade-menarca,tempo-menopausa,above_10,menopausa-age-qcut,menopausa-age-bins
0,M01,PM,LAPAC,56,13,7,0,3,2
1,M03,PM,LAPAC,59,13,8,0,3,2
2,M06,PM,LAPAC,62,14,10,1,4,3
3,M09,PM,LAPAC,53,12,3,0,1,1
4,M12,PM,LAPAC,56,16,2,0,1,0
5,M19,PM,LAPAC,58,12,7,0,3,2


In [6]:
single_metadata_df = raw_single_metadata_df[['sample-id', 'idade']]
single_metadata_df.rename(columns={'idade': 'age'}, inplace=True)
single_metadata_df.loc[:, 'class'] = 2
single_metadata_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  single_metadata_df.rename(columns={'idade': 'age'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  single_metadata_df.loc[:, 'class'] = 2


Unnamed: 0,sample-id,age,class
0,M01,56,2
1,M03,59,2
2,M06,62,2
3,M09,53,2
4,M12,56,2
5,M19,58,2


In [7]:
concat_df = pd.concat([meno_metadata_df, single_metadata_df], axis=0)
concat_df

Unnamed: 0,sample-id,age,class
0,#q2:types,numeric,categorical
1,M05,56,2
2,M33,52,2
3,M34,58,2
4,M35,58,2
...,...,...,...
1,M03,59,2
2,M06,62,2
3,M09,53,2
4,M12,56,2


In [8]:
concat_df.to_csv(joined_meno_metadata_file, index=False, sep='\t')

In [14]:
class_ids = concat_df.iloc[1:,-1].astype(int) + 1
concat_df.iloc[1:, -1] = class_ids
concat_df

Unnamed: 0,sample-id,age,class
0,#q2:types,numeric,categorical
1,M05,56,3
2,M33,52,3
3,M34,58,3
4,M35,58,3
...,...,...,...
1,M03,59,3
2,M06,62,3
3,M09,53,3
4,M12,56,3


In [20]:
concat_df.groupby(by='class').count()

Unnamed: 0_level_0,sample-id,age
class,Unnamed: 1_level_1,Unnamed: 2_level_1
1,14,14
2,16,16
3,52,52
categorical,1,1


In [21]:
histerectomia_ids = ['M40', 'M56', 'M76', 'M80', 'M97', 'M100', 'M111', 'M113', 'M124', 'M160']
histerectomia_rows = concat_df[concat_df['sample-id'].isin(histerectomia_ids)].index
histerectomia_rows

Int64Index([9, 21, 29, 33, 38, 41, 48, 50, 56, 74], dtype='int64')

In [22]:
concat_df.iloc[histerectomia_rows, -1] = 0
concat_df.groupby(by='class').count()

Unnamed: 0_level_0,sample-id,age
class,Unnamed: 1_level_1,Unnamed: 2_level_1
0,10,10
1,13,13
2,15,15
3,44,44
categorical,1,1


In [24]:
concat_df.to_csv(hist_joined_meno_metadata_file, index=False, sep='\t')