Skip to content

Commit

Permalink
Updates to Dev branch (#161)
Browse files Browse the repository at this point in the history
* Updated README.md

* Reapply Metagenomics -> Metagenomic

* Updates to notebooks

Updates to notebooks that modify/use YYYY_MM_DD_FinRisk_33-36_samplesheet.csv.
Confirmed current csv is correct in form.
Notebooks updated to use validate_sample_sheet() correctly.
validate_sample_sheet() and its helpers updated to no longer return a
sheet object.

* tests updated

* Removing obsolete notebook + files.

Removing obsolete notebook on Rodolfo's recommendation.
Removing files that are no longer mentioned in any file in this repo.

* Updating files

* Removed obsolete test
  • Loading branch information
charles-cowart committed Dec 15, 2023
1 parent 2dea33f commit f8d0323
Show file tree
Hide file tree
Showing 16 changed files with 4,267 additions and 7,779 deletions.
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ A collection of Jupyter notebooks and modules to support the wet lab shotgun pip

## Installation

**Note that this package is designed to be used with Qiita, and the tests
will not run without a Qiita installation.**
**Note that this package is designed to be used with Qiita, and some tests will
fail without a Qiita installation.**

To install this package, first clone the repository from GitHub:

Expand Down Expand Up @@ -48,6 +48,14 @@ Second, install the table of contents (TOC) extension
jupyter nbextension enable toc2/main
```

## Test

To run all tests, including those that depend on Qiita, type:

```bash
cd metagenomics_pooling_notebook
nosetests
```

## Use

Expand Down
77 changes: 33 additions & 44 deletions metapool/sample_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,61 +518,54 @@ def validate_and_scrub_sample_sheet(self):
quiet_validate_and_scrub_sample_sheet is that this function will
*always* print errors and warnings to standard output.
Parameters
----------
sheet: sample_sheet.KLSampleSheet
The sample sheet object to validate and scrub.
Returns
-------
sample_sheet.SampleSheet
Corrected and validated sample sheet if no errors are found.
Boolean
True if sample-sheet is valid or if only warnings were reported,
False if one or more errors were reported.
"""
msgs, sheet = self.quiet_validate_and_scrub_sample_sheet()
msgs = self.quiet_validate_and_scrub_sample_sheet()

# display Errors and Warnings directly to stdout.
[msg.echo() for msg in msgs]

if sheet is not None:
return sheet
# in addition to displaying any messages, return False if any Errors
# were found, or True if there were just Warnings or no messages at
# all.
if not any([isinstance(m, ErrorMessage) for m in msgs]):
return True
else:
return False

def quiet_validate_and_scrub_sample_sheet(self):
"""Quietly validate the sample sheet and scrub invalid characters
The character scrubbing is only applied to the Sample_Project and the
Sample_ID columns.
Parameters
----------
sheet: sample_sheet.KLSampleSheet
The sample sheet object to validate and scrub.
Returns
-------
list
List of error or warning messages.
sample_sheet.SampleSheet or None
Corrected and validated sample sheet if no errors are found.
Otherwise None is returned.
"""
msgs = []

# we print an error return None and exit when this happens otherwise we
# won't be able to run some of the other checks
# we print an error return None and exit when this happens otherwise
# we won't be able to run other checks
for column in self.data_columns:
if column not in self.all_sample_keys:
msgs.append(
ErrorMessage(
'The %s column in the Data section is missing' %
column))
msgs.append(ErrorMessage(f'The {column} column in the Data '
'section is missing'))

for section in ['Bioinformatics', 'Contact']:
if getattr(self, section) is None:
msgs.append(ErrorMessage('The %s section cannot be empty' %
section))
msgs.append(ErrorMessage(f'The {section} section cannot be '
'empty'))

# if any errors are found up to this point then we can't continue with
# the validation
# the validation process.
if msgs:
return msgs, None
return msgs

# we track the updated projects as a dictionary so we can propagate
# these changes to the Bioinformatics and Contact sections
Expand All @@ -589,18 +582,17 @@ def quiet_validate_and_scrub_sample_sheet(self):
sample['Sample_Project'] = new_project

if updated_samples:
msgs.append(
WarningMessage('The following sample names were scrubbed for'
' bcl2fastq compatibility:\n%s' %
', '.join(updated_samples)))
msgs.append(WarningMessage('The following sample names were '
'scrubbed for bcl2fastq compatibility'
':\n%s' % ', '.join(updated_samples)))
if updated_projects:
msgs.append(
WarningMessage('The following project names were scrubbed for'
' bcl2fastq compatibility. If the same invalid '
'characters are also found in the '
'Bioinformatics and Contacts sections those '
'will be automatically scrubbed too:\n%s' %
', '.join(sorted(updated_projects))))
msgs.append(WarningMessage('The following project names were '
'scrubbed for bcl2fastq compatibility. '
'If the same invalid characters are '
'also found in the Bioinformatics and '
'Contacts sections those will be '
'automatically scrubbed too:\n%s' %
', '.join(sorted(updated_projects))))

# make the changes to prevent useless errors where the scurbbed
# names fail to match between sections.
Expand Down Expand Up @@ -654,11 +646,8 @@ def quiet_validate_and_scrub_sample_sheet(self):
' to be included in the Contact section.') %
', '.join(sorted(contact - projects))))

# if there are no error messages then return the sheet
if not any([isinstance(m, ErrorMessage) for m in msgs]):
return msgs, self
else:
return msgs, None
# return all collected Messages, even if it's an empty list.
return msgs

def _validate_sample_sheet_metadata(self, metadata):
msgs = []
Expand Down
32 changes: 0 additions & 32 deletions metapool/tests/test_metapool.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,38 +89,6 @@ def setUp(self):
self.fp = path
self.plates = [p1, p2, p3, p4]

# def test_compute_shotgun_normalization_values(self):
# input_vol = 3.5
# input_dna = 10
# plate_layout = []
# for i in range(4):
# row = []
# for j in range(4):
# row.append({'dna_concentration': 10,
# 'sample_id': "S%s.%s" % (i, j)})
# plate_layout.append(row)

# obs_sample, obs_water = compute_shotgun_normalization_values(
# plate_layout, input_vol, input_dna)

# exp_sample = np.zeros((4, 4), dtype=np.float)
# exp_water = np.zeros((4, 4), dtype=np.float)
# exp_sample.fill(1000)
# exp_water.fill(2500)

# npt.assert_almost_equal(obs_sample, exp_sample)
# npt.assert_almost_equal(obs_water, exp_water)

# # Make sure that we don't go above the limit
# plate_layout[1][1]['dna_concentration'] = 0.25
# obs_sample, obs_water = compute_shotgun_normalization_values(
# plate_layout, input_vol, input_dna)

# exp_sample[1][1] = 3500
# exp_water[1][1] = 0

# npt.assert_almost_equal(obs_sample, exp_sample)
# npt.assert_almost_equal(obs_water, exp_water)
def test_read_visionmate_file(self):
# Raises error when tries to validate that all expected
# columns from VisionMate file are present.
Expand Down
47 changes: 15 additions & 32 deletions metapool/tests/test_sample_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,65 +1138,55 @@ def assertStdOutEqual(self, expected):

def test_validate_and_scrub_sample_sheet(self):
sheet = MetagenomicSampleSheetv100(self.good_ss)
sheet = sheet.validate_and_scrub_sample_sheet()
# no errors
self.assertStdOutEqual('')
self.assertTrue(isinstance(sheet, KLSampleSheet))
self.assertTrue(isinstance(sheet, MetagenomicSampleSheetv100))
self.assertTrue(sheet.validate_and_scrub_sample_sheet())

def test_quiet_validate_and_scrub_sample_sheet(self):
sheet = MetagenomicSampleSheetv100(self.good_ss)
msgs, sheet = sheet.quiet_validate_and_scrub_sample_sheet()
msgs = sheet.quiet_validate_and_scrub_sample_sheet()
# no errors
self.assertStdOutEqual('')
self.assertEqual(msgs, [])
self.assertTrue(isinstance(sheet, KLSampleSheet))
self.assertTrue(isinstance(sheet, MetagenomicSampleSheetv100))

def test_validate_and_scrub_sample_sheet_no_sample_project(self):
sheet = MetagenomicSampleSheetv100(self.no_project_ss)
sheet = sheet.validate_and_scrub_sample_sheet()
self.assertFalse(sheet.validate_and_scrub_sample_sheet())

self.assertStdOutEqual('ErrorMessage: The Sample_Project column in the'
' Data section is missing')
self.assertIsNone(sheet)

def test_quiet_validate_and_scrub_sample_sheet_no_sample_project(self):
sheet = MetagenomicSampleSheetv100(self.no_project_ss)
msgs, sheet = sheet.quiet_validate_and_scrub_sample_sheet()
msgs = sheet.quiet_validate_and_scrub_sample_sheet()

self.assertStdOutEqual('')
self.assertEqual(msgs, [ErrorMessage('The Sample_Project column in '
'the Data section is missing')])
self.assertIsNone(sheet)

def test_validate_and_scrub_sample_sheet_missing_bioinformatics(self):
sheet = MetagenomicSampleSheetv100(self.good_ss)
sheet.Bioinformatics = None
sheet = sheet.validate_and_scrub_sample_sheet()
self.assertFalse(sheet.validate_and_scrub_sample_sheet())

self.assertStdOutEqual('ErrorMessage: The Bioinformatics section '
'cannot be empty')
self.assertIsNone(sheet)

def test_quiet_validate_scrub_sample_sheet_missing_bioinformatics(self):
sheet = MetagenomicSampleSheetv100(self.good_ss)
sheet.Bioinformatics = None
msgs, sheet = sheet.quiet_validate_and_scrub_sample_sheet()
msgs = sheet.quiet_validate_and_scrub_sample_sheet()

self.assertStdOutEqual('')
self.assertEqual(msgs, [ErrorMessage('The Bioinformatics section '
'cannot be empty')])
self.assertIsNone(sheet)

def test_validate_and_scrub_sample_sheet_missing_contact(self):
sheet = MetagenomicSampleSheetv100(self.good_ss)
sheet.Contact = None
sheet = sheet.validate_and_scrub_sample_sheet()
self.assertFalse(sheet.validate_and_scrub_sample_sheet())

self.assertStdOutEqual('ErrorMessage: The Contact section '
'cannot be empty')
self.assertIsNone(sheet)

def test_validate_and_scrub_sample_sheet_scrubbed_names(self):
sheet = MetagenomicSampleSheetv100(self.scrubbable_ss)
Expand Down Expand Up @@ -1228,10 +1218,9 @@ def test_validate_and_scrub_sample_sheet_scrubbed_names(self):
'361, P21_E.coli ELI362, P21_E.coli ELI363, P21_E.coli '
'ELI364, P21_E.coli ELI365, P21_E.coli ELI366, P21_E.coli '
'ELI367, P21_E.coli ELI368, P21_E.coli ELI369')
sheet = sheet.validate_and_scrub_sample_sheet()

self.assertTrue(sheet.validate_and_scrub_sample_sheet())
self.assertStdOutEqual(message)
self.assertTrue(isinstance(sheet, MetagenomicSampleSheetv100))

def test_quiet_validate_and_scrub_sample_sheet_scrubbed_names(self):
message = ('The following sample names were scrubbed for bcl2fastq '
Expand Down Expand Up @@ -1263,11 +1252,8 @@ def test_quiet_validate_and_scrub_sample_sheet_scrubbed_names(self):
message = WarningMessage(message)

sheet = MetagenomicSampleSheetv100(self.scrubbable_ss)
msgs, sheet = sheet.quiet_validate_and_scrub_sample_sheet()
msgs = sheet.quiet_validate_and_scrub_sample_sheet()
self.assertStdOutEqual('')
self.assertTrue(isinstance(sheet, KLSampleSheet))
self.assertTrue(isinstance(sheet, MetagenomicSampleSheetv100))
self.assertFalse(isinstance(sheet, MetatranscriptomicSampleSheet))
self.assertEqual(msgs, [message])

def test_validate_and_scrub_sample_sheet_scrubbed_project_names(self):
Expand All @@ -1285,7 +1271,7 @@ def test_validate_and_scrub_sample_sheet_scrubbed_project_names(self):
sheet.Contact.Sample_Project.replace(remapper, inplace=True)
sheet.Bioinformatics.Sample_Project.replace(remapper, inplace=True)

obs = sheet.validate_and_scrub_sample_sheet()
sheet.validate_and_scrub_sample_sheet()

message = (
'WarningMessage: The following project names were scrubbed for '
Expand All @@ -1295,22 +1281,21 @@ def test_validate_and_scrub_sample_sheet_scrubbed_project_names(self):
"NYU's Tisch Art Microbiome 13059, The x.x microbiome project 1337"
)
self.assertStdOutEqual(message)
self.assertIsNotNone(obs)

scrubbed = {
'NYU_s_Tisch_Art_Microbiome_13059',
'The_x_x_microbiome_project_1337',
'Gerwick_6123'
}

for sample in obs:
for sample in sheet:
self.assertTrue(sample['Sample_Project'] in scrubbed,
sample['Sample_Project'])

for project in obs.Bioinformatics.Sample_Project:
for project in sheet.Bioinformatics.Sample_Project:
self.assertTrue(project in scrubbed)

for project in obs.Contact.Sample_Project:
for project in sheet.Contact.Sample_Project:
self.assertTrue(project in scrubbed)

def test_validate_and_scrub_sample_sheet_bad_project_names(self):
Expand All @@ -1320,9 +1305,8 @@ def test_validate_and_scrub_sample_sheet_bad_project_names(self):
'Sample_Project column are missing a Qiita study '
'identifier: Feist, Gerwick')

sheet = sheet.validate_and_scrub_sample_sheet()
self.assertFalse(sheet.validate_and_scrub_sample_sheet())
self.assertStdOutEqual(message)
self.assertIsNone(sheet)

def test_validate_and_scrub_sample_sheet_project_missing_lane(self):
sheet = MetagenomicSampleSheetv100(self.good_ss)
Expand All @@ -1332,11 +1316,10 @@ def test_validate_and_scrub_sample_sheet_project_missing_lane(self):
if sample.Sample_Project == 'Feist_11661':
sample.Lane = ' '

sheet = sheet.validate_and_scrub_sample_sheet()
self.assertFalse(sheet.validate_and_scrub_sample_sheet())
message = ('ErrorMessage: The following projects are missing a Lane '
'value: Feist_11661')
self.assertStdOutEqual(message)
self.assertIsNone(sheet)

def test_sample_sheet_to_dataframe(self):
ss = MetagenomicSampleSheetv100(self.ss)
Expand Down

0 comments on commit f8d0323

Please sign in to comment.