# Genomic Disorder Plot Review App

This notebook provides an interactive interface to review genomic disorder plots and classify them as correct or incorrect.

In [1]:
import pandas as pd
from plot_review_app import create_manifest_from_summary_table, display_statistics, generate_summary_table, PlotReviewApp

# Configuration - Update these paths
PLOT_DIRECTORY = "/Users/markw/Work/talkowski/sv-pipe-testing/mw_gd/gatk-sv-utils/ReviewGenomicDisorders/output/asd_cohort" 
GD_REGIONS_FILE = "/Users/markw/Work/talkowski/sv-pipe-testing/mw_gd/gatk-sv-utils/ReviewGenomicDisorders/input/GenomicDisorderRegions_hg38_2025-12-05.tsv"
MANIFEST_FILE = "/Users/markw/Work/talkowski/sv-pipe-testing/mw_gd/gatk-sv-utils/ReviewGenomicDisorders/review/plot_review_manifest.json"
OUTPUT_TABLE = "/Users/markw/Work/talkowski/sv-pipe-testing/mw_gd/gatk-sv-utils/ReviewGenomicDisorders/review/plot_review_manifest.tsv"

EXTERNAL_TABLE = "/Users/markw/Work/talkowski/sv-pipe-testing/mw_gd/gatk-sv-utils/MakeGDRevisionTable/input/ASD_NAHR_GD_reviewed_calls_20260108.tsv"
EXTERNAL_MANIFEST = "/Users/markw/Work/talkowski/sv-pipe-testing/mw_gd/gatk-sv-utils/MakeGDRevisionTable/input/ASD_NAHR_GD_reviewed_calls_20260108.json"

# Load GD regions reference
gd_regions = pd.read_csv(GD_REGIONS_FILE, sep='\t')
print(f"Loaded {len(gd_regions)} genomic disorder regions")
gd_regions.head()


Loaded 282 genomic disorder regions


Unnamed: 0,chr,start_GRCh38,end_GRCh38,GD_ID,svtype,NAHR,terminal,cluster
0,chr1,898703,6229913,GD_1p36_DEL_chr1_898703_6229913,DEL,no,p,
1,chr1,898703,6229913,GD_1p36_DUP_chr1_898703_6229913,DUP,no,p,
2,chr1,145686997,145808272,GD_1q21.1-BP1-2_DEL_chr1_145686997_145808272,DEL,yes,no,1q21
3,chr1,145686997,145808272,GD_1q21.1-BP1-2_DUP_chr1_145686997_145808272,DUP,yes,no,1q21
4,chr1,145686997,146048497,GD_1q21.1-BP1-3_DEL_chr1_145686997_146048497,DEL,yes,no,1q21


In [None]:
# Optional: load previous results
create_manifest_from_summary_table(EXTERNAL_TABLE, PLOT_DIRECTORY, EXTERNAL_MANIFEST, verbose=True);

Manifest created: /Users/markw/Work/talkowski/sv-pipe-testing/mw_gd/gatk-sv-utils/MakeGDRevisionTable/input/ASD_NAHR_GD_reviewed_calls_20260108.json
Total plot files found: 42379
Matched and classified: 4263
Unmatched samples: 17171

(Showing first 20 of 17171 unmatched samples)
  - chr1:145686997-145808272_GD_1q21.1-BP1-2_DEL_chr1_145686997_145808272_DEL___au15901a_1050797128__cf53b9
  - chr1:145686997-145808272_GD_1q21.1-BP1-2_DUP_chr1_145686997_145808272_DUP___4_0084_002_recal__d4e6cd
  - chr1:145686997-145808272_GD_1q21.1-BP1-2_DUP_chr1_145686997_145808272_DUP___asd_1487_1__83a388
  - chr1:145686997-145808272_GD_1q21.1-BP1-2_DUP_chr1_145686997_145808272_DUP___au1860201_recal__65bb64
  - chr1:145686997-145808272_GD_1q21.1-BP1-2_DUP_chr1_145686997_145808272_DUP___au1860301_recal__a19d13
  - chr1:145686997-145808272_GD_1q21.1-BP1-2_DUP_chr1_145686997_145808272_DUP___au1860302_recal__f7ed87
  - chr1:145686997-145808272_GD_1q21.1-BP1-2_DUP_chr1_145686997_145808272_DUP___au2793201_recal_

## Start Review App

In [3]:
# Create and display the review app
# app = PlotReviewApp(PLOT_DIRECTORY, MANIFEST_FILE, gd_regions_df=gd_regions)
app = PlotReviewApp(PLOT_DIRECTORY, EXTERNAL_MANIFEST, gd_regions_df=gd_regions)
app.display()

VBox(children=(HTML(value='<h2>Genomic Disorder Plot Review</h2>'), HTML(value='<hr>'), HTML(value="\n        â€¦

## Generate Summary Table

This section generates a summary table of manually reviewed GD calls with carriers and non-carriers.

In [4]:
# Generate summary table
summary_table = generate_summary_table(MANIFEST_FILE, gd_regions)
print(f"Generated summary table with {len(summary_table)} GD regions")
summary_table

Generated summary table with 18 GD regions


Unnamed: 0,chr,start,end,GD_ID,cluster_ID,SVTYPE,carriers,non_carriers
0,chr1,145686997,145808272,GD_1q21.1-BP1-2,,DEL,,__1_0058_003_recal__29c0aa
1,chr1,145686997,146048497,GD_1q21.1-BP1-3,,DEL,__1_0058_003_recal__29c0aa,
2,chr1,145808272,146048497,GD_1q21.1-BP2-3,,DEL,,__1_0058_003_recal__29c0aa
3,chr1,243124428,245154985,GD_1q43-q44,,DUP,,__03c16616__c52589
4,chr1,243124428,245154985,GD_1q43-q44,,DEL,,__sp0131913__e2eff3
5,chr10,46005406,49845537,GD_10q11.22-q11.23-AD,,DEL,"__sp0148883__35d430,__sp0149541__d62b9b,__sp01...",
6,chr10,46005406,49845537,GD_10q11.22-q11.23-AD,,DUP,"__03c16612__b82060,__03c16613__eefe18,__03c166...","__200905062_16__684179,__219_2349_0001__8fed5a..."
7,chr10,48181660,49845537,GD_10q11.22-q11.23-CD,,DEL,,"__sp0148883__35d430,__sp0149541__d62b9b,__sp01..."
8,chr10,48181660,50651802,GD_10q11.22-q11.23-CE,,DEL,,"__sp0148883__35d430,__sp0149541__d62b9b,__sp01..."
9,chr10,48181660,49845537,GD_10q11.22-q11.23-CD,,DUP,,"__03c16612__b82060,__03c16613__eefe18,__03c166..."


In [5]:
# Save summary table to file
summary_table.to_csv(OUTPUT_TABLE, sep='\t', index=False)
print(f"Summary table saved to {OUTPUT_TABLE}")

Summary table saved to /Users/markw/Work/talkowski/sv-pipe-testing/mw_gd/gatk-sv-utils/ReviewGenomicDisorders/review/plot_review_manifest.tsv


## Review Statistics

In [6]:
display_statistics(MANIFEST_FILE)

Total reviews: 158
Correct: 36 (22.8%)
Incorrect: 121 (76.6%)
Unsure: 1 (0.6%)

Correct subtypes:
  typical: 18 (50.0%)
  mosaic: 18 (50.0%)
