Skip to content

Commit

Permalink
Remove smart_open HTTPS encoding workaround (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Aug 12, 2018
1 parent bb415ef commit 299b956
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 42 deletions.
61 changes: 19 additions & 42 deletions sample_sheet/_sample_sheet.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import csv
import io
import json
import os
import re
Expand All @@ -14,7 +13,7 @@
from smart_open import smart_open # type: ignore
from tabulate import tabulate # type: ignore
from terminaltables import SingleTable # type: ignore
from typing import Any, List, Mapping, Optional, Set, TextIO, Union
from typing import Any, Generator, List, Mapping, Optional, Set, TextIO, Union

from ._util import maybe_render_markdown

Expand All @@ -29,6 +28,8 @@
'Library_ID',
'Description'
]

REQUIRED_KEYS: List[str] = ['Sample_ID']
RECOMMENDED_KEYS: List[str] = ['Sample_ID', 'Sample_Name', 'index']
REQUIRED_SECTIONS: List[str] = ['Header', 'Settings', 'Reads', 'Data']

Expand Down Expand Up @@ -214,15 +215,15 @@ class Sample(object):
"""

_valid_index_key_pattern = re.compile(r'index2?')
_valid_index_value_pattern = re.compile(r'^[ACGTN]*$')
_whitespace_re = re.compile(r'\s+')

def __init__(self, mappable: Optional[Mapping]=None) -> None:
mappable = dict() if mappable is None else mappable
self.sample_sheet: Optional[SampleSheet] = None
self._other_keys: Set[str] = set()

self._whitespace_re = re.compile(r'\s+')
self._valid_index_key_pattern = re.compile(r'index2?')
self._valid_index_value_pattern = re.compile(r'^[ACGTN]*$')

# Explicitly define the recommended keys as None.
for key in RECOMMENDED_KEYS:
setattr(self, key, None)
Expand Down Expand Up @@ -361,8 +362,7 @@ class SampleSheet(object):
[Data] : table with header
Args:
path : str or pathlib.Path, optional
Any path supported by ``pathlib.Path`` and ``smart_open``.
path: Any path supported by ``pathlib.Path`` and/or ``smart_open``.
"""
_encoding: str = 'utf8'
Expand All @@ -384,33 +384,7 @@ def __init__(self, path: Optional[Path]=None) -> None:
self.Settings: SampleSheetSection = SampleSheetSection()

if self.path:
self._parse(str(self.path))

@staticmethod
def _readlines(path: Union[str, Path]) -> List[List[str]]:
"""Return a ``csv.reader`` for a filepath.
This helper method is required since ``smart_open.smart_open`` cannot
decode to "utf8" on-the-fly specifically for HTTPS. Instead, the path
is opened, read, decoded, and then wrapped in a new handle for
``csv.reader``.
Args:
path: Any path supported by ``pathlib.Path`` and ``smart_open``.
Returns:
lines: All lines of the sample sheet.
Notes:
A work around will exist as long as this issue remains unsolved:
https://github.com/RaRe-Technologies/smart_open/issues/146
"""
string = smart_open(str(path)).read().decode(SampleSheet._encoding)
handle = io.StringIO(string, newline='')
lines = list(csv.reader(handle, skipinitialspace=True))
return lines
self._parse(self.path)

def add_section(self, section_name: str) -> None:
"""Add a section to the ``SampleSheet``."""
Expand Down Expand Up @@ -461,11 +435,14 @@ def samples(self) -> List:
"""Return the samples present in this ``SampleSheet``."""
return self._samples

def _parse(self, path: Union[str, Path]):
def _parse(self, path: Path):
section_name: str = ''
sample_header: Optional[List[str]] = None

for i, line in enumerate(self._readlines(path)):
with smart_open(path, encoding=self._encoding) as handle:
lines = list(csv.reader(handle, skipinitialspace=True))

for i, line in enumerate(lines):
# Skip to next line if this line is empty to support formats of
# sample sheets with multiple newlines as section seperators.
#
Expand Down Expand Up @@ -655,8 +632,8 @@ def to_json(self, **kwargs) -> str:

def to_picard_basecalling_params(
self,
directory: Union[str, Path],
bam_prefix: Union[str, Path],
directory: Path,
bam_prefix: Path,
lanes: Union[int, List[int]]
):
"""Writes sample and library information to a set of files for a given
Expand Down Expand Up @@ -841,15 +818,15 @@ def write_blank_lines(writer, n=blank_lines, width=csv_width):
line = [getattr(sample, key) for key in samples_header]
writer.writerow(pad_iterable(line, csv_width))

def __len__(self):
def __len__(self) -> int:
"""Return the number of samples on this ``SampleSheet``."""
return len(self.samples)

def __iter__(self):
def __iter__(self) -> Generator[Sample, None, None]:
"""Iterating over a ``SampleSheet`` will emit its samples."""
yield from self.samples

def __repr__(self):
def __repr__(self) -> str:
"""Show the constructor command to initialize this object."""
path = f'"{self.path}"' if self.path else 'None'
return f'{self.__class__.__qualname__}({path})'
Expand Down
10 changes: 10 additions & 0 deletions tests/test_sample_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@

RESOURCES = (Path(__file__).absolute().resolve().parent / 'resources')

URI = (
'https://raw.githubusercontent.com/clintval/sample-sheet/'
'master/tests/resources/paired-end-single-index.csv'
)

VT_100_MAPPING = {
'0x71': '─',
'0x74': '├',
Expand Down Expand Up @@ -619,6 +624,11 @@ def test_len(self):
sample_sheet.add_sample(fake2)
eq_(len(sample_sheet), 2)

def test_read_from_uri(self):
"""Test ``__init__()`` from URI"""
sample_sheet = SampleSheet(URI)
assert sample_sheet.Reads == [151, 151]

def test_str(self):
"""Test ``__str__()``, when not printing to a TTY"""
infile = RESOURCES / 'paired-end-single-index.csv'
Expand Down

0 comments on commit 299b956

Please sign in to comment.