# Uploading raw data and metadata to ENA



### first, install the required plugin:

first, activate the qiime2-amplicon-2024.10 env

In [None]:
#run in therminal:
pip install git+https://github.com/bokulich-lab/q2-ena-uploader.git

In [None]:
#also run in terminal:
qiime dev refresh-cache
qiime ena-uploader --help

In [1]:
import os
import qiime2
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
print("Current working directory:", os.getcwd())

Current working directory: /home/meyeanni


In [3]:
import os
import qiime2
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Define the working directory
wd = '/home/meyeanni/cloud/meyeanni/LP4'

# Change to the working directory
os.chdir(wd)

# Verify current working directory
print("Current working directory:", os.getcwd())

Current working directory: /home/meyeanni/cloud/meyeanni/LP4


In [4]:
import qiime2 as q2

from qiime2 import (Artifact,
                    Metadata as qmd)

from qiime2.plugins import (cutadapt,
                            demux,
                            feature_table as qft,
                            taxa as q2t,)

from qiime2 import Metadata
from qiime2 import Visualization

from qiime2.plugins.feature_table.methods import (merge_seqs, merge, filter_seqs, filter_samples, filter_features) 
import qiime2.plugins.feature_classifier.actions as feature_classifier_actions
import qiime2.plugins.metadata.actions as metadata_actions
import qiime2.plugins.taxa.actions as taxa_actions
import qiime2.plugins.phylogeny.actions as phylogeny_actions
from qiime2.plugins.fragment_insertion.methods import sepp


%matplotlib inline

## filter the 16S and ITS reads (which are currently separated), then recombined them in one single read file

In [6]:
#import metadata to change ids
meta1 = pd.read_csv(
    'LP4_metadata.tsv',
    sep='\t',
    index_col=0
)
meta1

Unnamed: 0_level_0,label_barcode1,label_barcode2,created_date,amplicon,unique_id,sample_id,plate,plate_position,amplicon_conc,DNA_extraction_plate,project,sample_type,day,temperature,hand
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
366293_001-LP4-16S-0001,CAAGTCGTTTAC,AGCCTTCGTCGC,16.10.2024 14:59,16S,LP4-16S-0001,1701-d0-r,P1,A01,0.42810,DNA55,highschool_hs,hand_swabs,0.0,,r
366293_002-LP4-16S-0002,AAGTTCGCGCTA,TTCCTTAGTAGT,16.10.2024 14:59,16S,LP4-16S-0002,1707-d0-r,P1,B01,2.50740,DNA55,highschool_hs,hand_swabs,0.0,,r
366293_003-LP4-16S-0003,TATTCTAAGCGC,CGTTTGGAATGA,16.10.2024 14:59,16S,LP4-16S-0003,1713-d0-r,P1,C01,0.88976,DNA55,highschool_hs,hand_swabs,0.0,,r
366293_004-LP4-16S-0004,GCGGGCCTTTGC,TACGGATTATGG,16.10.2024 14:59,16S,LP4-16S-0004,1719-d0-r,P1,D01,0.26646,DNA55,highschool_hs,hand_swabs,0.0,,r
366293_005-LP4-16S-0005,GTGTCAGATGTC,ACATACTGAGCA,16.10.2024 14:59,16S,LP4-16S-0005,1724-d0-l,P1,E01,1.43754,DNA55,highschool_hs,hand_swabs,0.0,,l
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366292_648-LP4-ITS-1148,GCCGATTCGGAA,TAGGAGAGACAG,16.10.2024 14:59,ITS,LP4-ITS-1148,empty,P12,D12,,,,empty,,,
366292_649-LP4-ITS-1149,GACACATTTCTG,GGTGTGAGAAAG,16.10.2024 14:59,ITS,LP4-ITS-1149,empty,P12,E12,,,,empty,,,
366292_650-LP4-ITS-1150,TACAGTCTCATG,GTTCCATCGGCC,16.10.2024 14:59,ITS,LP4-ITS-1150,empty,P12,F12,,,,empty,,,
366292_651-LP4-ITS-1151,CTTGTCCACCTT,TACTTAAACATC,16.10.2024 14:59,ITS,LP4-ITS-1151,empty,P12,G12,,,,empty,,,


In [13]:
#import again:
meta2 = pd.read_csv(
    'id_remapping.tsv',
    sep='\t',
    index_col=1
)
meta2

Unnamed: 0_level_0,sample ID,forward sequence count,reverse sequence count
sample ID trunc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
366293_001-LP4-16S-0001,366293_001-LP4-16S-0001_S1182,405,405
366293_002-LP4-16S-0002,366293_002-LP4-16S-0002_S590,35,35
366293_003-LP4-16S-0003,366293_003-LP4-16S-0003_S889,6,6
366293_004-LP4-16S-0004,366293_004-LP4-16S-0004_S543,40,40
366293_005-LP4-16S-0005,366293_005-LP4-16S-0005_S2302,9,9
...,...,...,...
366292_648-LP4-ITS-1148,366292_648-LP4-ITS-1148_S1984,51,51
366292_649-LP4-ITS-1149,366292_649-LP4-ITS-1149_S633,84,84
366292_650-LP4-ITS-1150,366292_650-LP4-ITS-1150_S1169,94,94
366292_651-LP4-ITS-1151,366292_651-LP4-ITS-1151_S1711,74,74


In [14]:
merge = meta1.merge(meta2, how = 'left', left_on = meta1.index, right_on = meta2.index)
merge

Unnamed: 0,key_0,label_barcode1,label_barcode2,created_date,amplicon,unique_id,sample_id,plate,plate_position,amplicon_conc,DNA_extraction_plate,project,sample_type,day,temperature,hand,sample ID,forward sequence count,reverse sequence count
0,366293_001-LP4-16S-0001,CAAGTCGTTTAC,AGCCTTCGTCGC,16.10.2024 14:59,16S,LP4-16S-0001,1701-d0-r,P1,A01,0.42810,DNA55,highschool_hs,hand_swabs,0.0,,r,366293_001-LP4-16S-0001_S1182,405.0,405.0
1,366293_002-LP4-16S-0002,AAGTTCGCGCTA,TTCCTTAGTAGT,16.10.2024 14:59,16S,LP4-16S-0002,1707-d0-r,P1,B01,2.50740,DNA55,highschool_hs,hand_swabs,0.0,,r,366293_002-LP4-16S-0002_S590,35.0,35.0
2,366293_003-LP4-16S-0003,TATTCTAAGCGC,CGTTTGGAATGA,16.10.2024 14:59,16S,LP4-16S-0003,1713-d0-r,P1,C01,0.88976,DNA55,highschool_hs,hand_swabs,0.0,,r,366293_003-LP4-16S-0003_S889,6.0,6.0
3,366293_004-LP4-16S-0004,GCGGGCCTTTGC,TACGGATTATGG,16.10.2024 14:59,16S,LP4-16S-0004,1719-d0-r,P1,D01,0.26646,DNA55,highschool_hs,hand_swabs,0.0,,r,366293_004-LP4-16S-0004_S543,40.0,40.0
4,366293_005-LP4-16S-0005,GTGTCAGATGTC,ACATACTGAGCA,16.10.2024 14:59,16S,LP4-16S-0005,1724-d0-l,P1,E01,1.43754,DNA55,highschool_hs,hand_swabs,0.0,,l,366293_005-LP4-16S-0005_S2302,9.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2299,366292_648-LP4-ITS-1148,GCCGATTCGGAA,TAGGAGAGACAG,16.10.2024 14:59,ITS,LP4-ITS-1148,empty,P12,D12,,,,empty,,,,366292_648-LP4-ITS-1148_S1984,51.0,51.0
2300,366292_649-LP4-ITS-1149,GACACATTTCTG,GGTGTGAGAAAG,16.10.2024 14:59,ITS,LP4-ITS-1149,empty,P12,E12,,,,empty,,,,366292_649-LP4-ITS-1149_S633,84.0,84.0
2301,366292_650-LP4-ITS-1150,TACAGTCTCATG,GTTCCATCGGCC,16.10.2024 14:59,ITS,LP4-ITS-1150,empty,P12,F12,,,,empty,,,,366292_650-LP4-ITS-1150_S1169,94.0,94.0
2302,366292_651-LP4-ITS-1151,CTTGTCCACCTT,TACTTAAACATC,16.10.2024 14:59,ITS,LP4-ITS-1151,empty,P12,G12,,,,empty,,,,366292_651-LP4-ITS-1151_S1711,74.0,74.0


In [15]:
merge.index = merge['sample ID']
merge

Unnamed: 0_level_0,key_0,label_barcode1,label_barcode2,created_date,amplicon,unique_id,sample_id,plate,plate_position,amplicon_conc,DNA_extraction_plate,project,sample_type,day,temperature,hand,sample ID,forward sequence count,reverse sequence count
sample ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
366293_001-LP4-16S-0001_S1182,366293_001-LP4-16S-0001,CAAGTCGTTTAC,AGCCTTCGTCGC,16.10.2024 14:59,16S,LP4-16S-0001,1701-d0-r,P1,A01,0.42810,DNA55,highschool_hs,hand_swabs,0.0,,r,366293_001-LP4-16S-0001_S1182,405.0,405.0
366293_002-LP4-16S-0002_S590,366293_002-LP4-16S-0002,AAGTTCGCGCTA,TTCCTTAGTAGT,16.10.2024 14:59,16S,LP4-16S-0002,1707-d0-r,P1,B01,2.50740,DNA55,highschool_hs,hand_swabs,0.0,,r,366293_002-LP4-16S-0002_S590,35.0,35.0
366293_003-LP4-16S-0003_S889,366293_003-LP4-16S-0003,TATTCTAAGCGC,CGTTTGGAATGA,16.10.2024 14:59,16S,LP4-16S-0003,1713-d0-r,P1,C01,0.88976,DNA55,highschool_hs,hand_swabs,0.0,,r,366293_003-LP4-16S-0003_S889,6.0,6.0
366293_004-LP4-16S-0004_S543,366293_004-LP4-16S-0004,GCGGGCCTTTGC,TACGGATTATGG,16.10.2024 14:59,16S,LP4-16S-0004,1719-d0-r,P1,D01,0.26646,DNA55,highschool_hs,hand_swabs,0.0,,r,366293_004-LP4-16S-0004_S543,40.0,40.0
366293_005-LP4-16S-0005_S2302,366293_005-LP4-16S-0005,GTGTCAGATGTC,ACATACTGAGCA,16.10.2024 14:59,16S,LP4-16S-0005,1724-d0-l,P1,E01,1.43754,DNA55,highschool_hs,hand_swabs,0.0,,l,366293_005-LP4-16S-0005_S2302,9.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366292_648-LP4-ITS-1148_S1984,366292_648-LP4-ITS-1148,GCCGATTCGGAA,TAGGAGAGACAG,16.10.2024 14:59,ITS,LP4-ITS-1148,empty,P12,D12,,,,empty,,,,366292_648-LP4-ITS-1148_S1984,51.0,51.0
366292_649-LP4-ITS-1149_S633,366292_649-LP4-ITS-1149,GACACATTTCTG,GGTGTGAGAAAG,16.10.2024 14:59,ITS,LP4-ITS-1149,empty,P12,E12,,,,empty,,,,366292_649-LP4-ITS-1149_S633,84.0,84.0
366292_650-LP4-ITS-1150_S1169,366292_650-LP4-ITS-1150,TACAGTCTCATG,GTTCCATCGGCC,16.10.2024 14:59,ITS,LP4-ITS-1150,empty,P12,F12,,,,empty,,,,366292_650-LP4-ITS-1150_S1169,94.0,94.0
366292_651-LP4-ITS-1151_S1711,366292_651-LP4-ITS-1151,CTTGTCCACCTT,TACTTAAACATC,16.10.2024 14:59,ITS,LP4-ITS-1151,empty,P12,G12,,,,empty,,,,366292_651-LP4-ITS-1151_S1711,74.0,74.0


In [16]:
merge.to_csv('LP4_metadata_reind.tsv', sep='\t', index=True)


In [None]:
# summarize the import files
qiime demux summarize --i-data ITS/ITS-demux-paired-end.qza --o-visualization ITS/ITS-demux-paired-end.qzv

qiime demux summarize --i-data 16S/16S-demux-paired-end.qza --o-visualization 16S/16S-demux-paired-end.qzv


## now filter for only shipping samples in both 16S and ITS data before merging them.

In [21]:
! qiime demux filter-samples \
  --i-demux artifacts/16S/16S-demux-paired-end.qza \
  --m-metadata-file LP4_metadata_reind_no_fails_16S.tsv \
  --p-where "[project]='shipping'" \
  --o-filtered-demux artifacts/16S/16S-demux-shipping-only.qza


[32mSaved SampleData[PairedEndSequencesWithQuality] to: artifacts/16S/16S-demux-shipping-only.qza[0m
[?25h[0m

In [22]:
#to checkout if the sample count makes sense: 
!qiime demux summarize --i-data artifacts/16S/16S-demux-shipping-only.qza --o-visualization artifacts/16S/16S-demux-shipping-only.qzv

[32mSaved Visualization to: artifacts/16S/16S-demux-shipping-only.qzv[0m
[?25h[0m

and for ITS:

In [23]:
! qiime demux filter-samples \
  --i-demux artifacts/ITS/ITS-demux-paired-end.qza \
  --m-metadata-file LP4_metadata_reind_no_fails_ITS.tsv \
  --p-where "[project]='shipping'" \
  --o-filtered-demux artifacts/ITS/ITS-demux-shipping-only.qza

[32mSaved SampleData[PairedEndSequencesWithQuality] to: artifacts/ITS/ITS-demux-shipping-only.qza[0m
[?25h[0m

In [24]:
#to checkout if the sample count makes sense: 
!qiime demux summarize --i-data artifacts/ITS/ITS-demux-shipping-only.qza --o-visualization artifacts/ITS/ITS-demux-shipping-only.qzv

[32mSaved Visualization to: artifacts/ITS/ITS-demux-shipping-only.qzv[0m
[?25h[0m

-> looks fine, now combined the 16S and ITS to the same artifact:

In [25]:
!qiime tools export --input-path artifacts/16S/16S-demux-shipping-only.qza --output-path artifacts/16S/exported_demux_shipping
!qiime tools export --input-path artifacts/ITS/ITS-demux-shipping-only.qza --output-path artifacts/ITS/exported_demux_shipping
# qiime tools export --input-path dada2-stats.qza --output-path exported_stats
# qiime tools export --input-path dada2-table.qza --output-path exported_table


[32mExported artifacts/16S/16S-demux-shipping-only.qza as SingleLanePerSamplePairedEndFastqDirFmt to directory artifacts/16S/exported_demux_shipping[0m
[?25h[0m[32mExported artifacts/ITS/ITS-demux-shipping-only.qza as SingleLanePerSamplePairedEndFastqDirFmt to directory artifacts/ITS/exported_demux_shipping[0m
[?25h[0m

then put it all in the same folder, reimport ant then finally I have it all in one single artifact for ENA upload

-> copy the 16S data in the ITS export folder (since 16S is smaller to copy and move around)

In [26]:
!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ~/public/Cloud/meyeanni/LP4/artifacts/ITS/exported_demux_shipping \
--input-format CasavaOneEightSingleLanePerSampleDirFmt \
--output-path shipping_ena/shipping-demux-paired-end.qza

[31m[1mThere was a problem importing /home/meyeanni/public/Cloud/meyeanni/LP4/artifacts/ITS/exported_demux_shipping:

  /home/meyeanni/public/Cloud/meyeanni/LP4/artifacts/ITS/exported_demux_shipping is not a(n) CasavaOneEightSingleLanePerSampleDirFmt:

  These samples do not have matching pairs of forward and reverse reads: {'366294_242-LP4-16S-0742_S760', '366294_209-LP4-16S-0709_S25', '366294_121-LP4-16S-0621_S1838'}[0m

[?25h[0m

-> ok not working.. will have to upload the data separately once for 16S and once for ITS. use these files:

In [None]:
--o-filtered-demux artifacts/ITS/ITS-demux-shipping-only.qza
--o-filtered-demux artifacts/16S/16S-demux-shipping-only.qza

continue with preparing the metadata files