Skip to content

Commit

Permalink
Merge pull request #29 from johnchase/issue-25
Browse files Browse the repository at this point in the history
Issue 25
  • Loading branch information
ebolyen committed Jun 16, 2016
2 parents e7e96a0 + 5732ddd commit eeff04f
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 11 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@
## Version 0.9.0 (changes since 0.9.0 release go here)

### Features
* Bug fix: There was a small bug in the previous version that would occasionally cause issues when installing with conda
* Added the ability to add a previously existing list of IDs to the create command

### Bugs
* Bug fix: There was a small bug in the previous version that would occasionally cause issues when installing with conda
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ Creating a list of IDs
cual-id create ids 42 # writes 42 ids to stdout
cual-id create ids 42 > my-ids.txt # writes 42 ids to my-ids.txt
cual-id create ids 42 --existing-ids my-ids.txt # creates ids that do not
# overlap with existing ids. Note this will only compare new ids to the first
# column of the file
Expand Down
9 changes: 6 additions & 3 deletions cualid/fix.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from difflib import get_close_matches


def fix_ids(correct_input, input_to_check, thresh=.5):
corr_ids = [e.strip().split('\t')[1] for e in correct_input]
def parse_ids(input_file, col):
return [e.strip().split('\t')[col] for e in input_file]


broke_ids = [e.strip().split('\t')[0] for e in input_to_check]
def fix_ids(correct_input, input_to_check, thresh=.5):
corr_ids = parse_ids(correct_input, 1)
broke_ids = parse_ids(input_to_check, 0)

seen = set()
for broke_id in broke_ids:
Expand Down
14 changes: 11 additions & 3 deletions cualid/mint.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,20 @@ def at_least_distance(query, existing, d=3):
return True


def create_ids(n, id_length, min_distance=3, failure_threshold=0.99):
def create_ids(n, id_length,
min_distance=3,
failure_threshold=0.99,
existing_ids=None):
if existing_ids is not None:
hrids = existing_ids
else:
hrids = []
uuids = []
hrids = []
initial_hrid_len = len(hrids)
failures = 0
trys = 1
while len(hrids) < n and failures/trys < failure_threshold:
while (len(hrids) - initial_hrid_len < n and
failures/trys < failure_threshold):
trys += 1
uuid_ = uuid.uuid4()
hrid = uuid_.hex[-id_length:]
Expand Down
6 changes: 5 additions & 1 deletion cualid/tests/test_fix.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
import io

from cualid.fix import fix_ids, format_output
from cualid.fix import fix_ids, format_output, parse_ids


class TestFormatOutput(unittest.TestCase):
Expand Down Expand Up @@ -60,6 +60,10 @@ def test_fix_ids_fix_all(self):

self.assertTrue(tested)

def test_parse_ids(self):
exp = ['23bb9', 'c0cab', '87696', '7869b', 'd50cc']
self.assertEqual(parse_ids(identical, 0), exp)
self.assertEqual(parse_ids(correct1, 1), exp)

correct1 = io.StringIO("c0b5d3ae-d2d4-4aa5-bd51-76c93e223bb9\t23bb9\n"
"0b95a10f-0610-434f-9734-4d2ac02c0cab\tc0cab\n"
Expand Down
10 changes: 7 additions & 3 deletions scripts/cual-id
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import sys

from cualid.label import get_barcodes
from cualid.mint import create_ids
from cualid.fix import fix_ids, format_output
from cualid.fix import fix_ids, format_output, parse_ids

@click.group()
def cli():
Expand Down Expand Up @@ -33,13 +33,17 @@ def barcode_generator(input, output_pdf, suppress_ids, barcode):
@click.argument('number-of-ids', type=int)
@click.option('-l', '--length', type=int, default=8, required=False)
@click.option('-f', '--fail-threshold', type=float, default=0.99, required=False)
def sample_id_generator(number_of_ids, length, fail_threshold):
@click.option('-e', '--existing-ids', type=click.File('U'), default=None, required=False)
def sample_id_generator(number_of_ids, length, fail_threshold, existing_ids):
"""This script generates a file of sample IDs"""
if fail_threshold >= 1:
raise ValueError("")
collected = 0
if existing_ids:
existing_ids = parse_ids(existing_ids, 0)
for uuid_, hrid in create_ids(number_of_ids, length,
failure_threshold=fail_threshold):
failure_threshold=fail_threshold,
existing_ids=existing_ids):
collected += 1
click.echo("%s\t%s" %(uuid_, hrid))

Expand Down

0 comments on commit eeff04f

Please sign in to comment.