Simplify + improve reporting

ga4gh · Mar 7, 2017 · 33b8a61 · 33b8a61
1 parent 414fb06
commit 33b8a61
Show file tree

Hide file tree

Showing 67 changed files with 1,978 additions and 9,619 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.idea
diff --git a/reporting/basic/.gitignore b/reporting/basic/.gitignore
@@ -1,3 +1,6 @@
 *.pyc
 *.swp
 .idea
+share/microbench/hap.py-results/*.vcf.gz
+share/microbench/hap.py-results/*.vcf.gz.tbi
+share/microbench/hap.py-results/*.json.gz
diff --git a/reporting/basic/bin/rep.py b/reporting/basic/bin/rep.py
@@ -30,19 +30,16 @@
 def main():
     parser = argparse.ArgumentParser(description="Create a variant calling report.")
 
-    parser.add_argument("input", help="Input file in GA4GH metrics format", nargs="*")
+    parser.add_argument("input", help="Input file in GA4GH metrics CSV format. "
+                                      "To label multiple results, use the following pattern: "
+                                      "rep.py gatk-3_vcfeval-giab:gatk3.roc.all.csv.gz -o test.html ; this will"
+                                      "use the label gatk-3 for 'Method', and vcfeval-giab for the "
+                                      "'Comparison' header.", nargs="*")
 
     parser.add_argument("-o", "--output", help="Output file name for reports, e.g. 'report' to write "
                                                "report.html",
                         required=True)
 
-    parser.add_argument("-m", "--comparison-method", default="default", dest="comparison_method",
-                        help="The comparison method that was used.")
-
-    parser.add_argument("-l", "--result-list", default=[], dest="result_list", action="append",
-                        help="Result list in delimited format. Must have these columns: "
-                             "method, comparisonmethod, and files.")
-
     parser.add_argument("--roc-max-datapoints",
                         help="Maximum number of data points in a ROC (higher numbers might slow down our plotting)",
                         dest="roc_datapoints", type=int, default=1000)
@@ -63,32 +60,35 @@ def main():
     elif not args.output.endswith(".html"):
         args.output += ".html"
 
-    if args.input:
-        metrics = report.metrics.read_qfy_csv(args.input, args.comparison_method)
-    else:
-        metrics = []
-
-    for l in args.result_list:
-        print "reading %s" % l
-        csvfile = open(l, 'rb')
-        dialect = csv.Sniffer().sniff(csvfile.read(8192))
-        csvfile.seek(0)
-        dr = csv.DictReader(csvfile, dialect=dialect)
-        for row in dr:
-            rfiles = [x.strip() for x in row["files"].split(",")]
-            for i, rfile in enumerate(rfiles):
-                if not os.path.exists(rfile):
-                    rfiles[i] = os.path.abspath(os.path.join(os.path.dirname(l), rfile))
-            row_metrics = report.metrics.read_qfy_csv(rfiles,
-                                                      method=row["method"],
-                                                      cmethod=row["comparisonmethod"],
-                                                      roc_metrics=["METRIC.Precision", "METRIC.Recall"],
-                                                      roc_diff=args.roc_diff,
-                                                      max_data_points=args.roc_datapoints,
-                                                      minmax={"METRIC.Precision": {"min": args.min_precision},
-                                                              "METRIC.Recall": {"min": args.min_recall}}
-                                                      )
-            metrics = metrics + row_metrics
+    metrics = []
+    for i in args.input:
+        l = i.split(":")
+
+        method_label = "default"
+        cmethod_label = "default"
+
+        if len(l) <= 1:
+            rfiles = l[0]
+        else:
+            rfiles = l[1:]
+            labels = l[0].split("_")
+            if len(labels) > 0:
+                method_label = labels[0]
+            if len(labels) > 1:
+                cmethod_label = labels[1]
+
+        print "reading %s as %s / %s" % (str(rfiles), method_label, cmethod_label)
+
+        row_metrics = report.metrics.read_qfy_csv(rfiles,
+                                                  method=method_label,
+                                                  cmethod=cmethod_label,
+                                                  roc_metrics=["METRIC.Precision", "METRIC.Recall"],
+                                                  roc_diff=args.roc_diff,
+                                                  max_data_points=args.roc_datapoints,
+                                                  minmax={"METRIC.Precision": {"min": args.min_precision},
+                                                          "METRIC.Recall": {"min": args.min_recall}}
+                                                  )
+        metrics += row_metrics
 
     loader = jinja2.FileSystemLoader(searchpath=TEMPLATEDIR)
     env = jinja2.Environment(loader=loader)

diff --git a/reporting/basic/bin/run.py b/reporting/basic/bin/run.py