Skip to content

Commit

Permalink
feat: Add entry suggestion to samplesheet check (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
jvfe committed Apr 21, 2023
1 parent 4775118 commit 47e4f85
Showing 1 changed file with 26 additions and 8 deletions.
34 changes: 26 additions & 8 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,23 @@ def check_samplesheet(file_in, file_out):

sample_mapping_dict = {}
with open(file_in, "r") as fin:

## Check header
MIN_COLS = 2
# TODO nf-core: Update the column names for the input samplesheet
HEADER = ["sample", "fastq_1", "fastq_2"]
assembly_header = ["sample", "fna_file_path"]
header = [x.strip('"') for x in fin.readline().strip().split(",")]
if header[: len(HEADER)] != HEADER:
print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER)))
if header[: len(assembly_header)] == assembly_header:
print(
"ERROR: Assembly samplesheet found. Try using -entry annotation or -entry assembly_qc"
)

elif header[: len(HEADER)] != HEADER:
print(
"ERROR: Please check samplesheet header -> {} != {}".format(
",".join(header), ",".join(HEADER)
)
)
sys.exit(1)

## Check sample entries
Expand All @@ -78,7 +87,9 @@ def check_samplesheet(file_in, file_out):
num_cols = len([x for x in lspl if x])
if num_cols < MIN_COLS:
print_error(
"Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
"Invalid number of populated columns (minimum = {})!".format(
MIN_COLS
),
"Line",
line,
)
Expand Down Expand Up @@ -126,13 +137,20 @@ def check_samplesheet(file_in, file_out):
with open(file_out, "w") as fout:
fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2"]) + "\n")
for sample in sorted(sample_mapping_dict.keys()):

## Check that multiple runs of the same sample are of the same datatype
if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]):
print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample))
if not all(
x[0] == sample_mapping_dict[sample][0][0]
for x in sample_mapping_dict[sample]
):
print_error(
"Multiple runs of a sample must be of the same datatype!",
"Sample: {}".format(sample),
)

for idx, val in enumerate(sample_mapping_dict[sample]):
fout.write(",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n")
fout.write(
",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n"
)
else:
print_error("No entries to process!", "Samplesheet: {}".format(file_in))

Expand Down

0 comments on commit 47e4f85

Please sign in to comment.