In [1]:
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
data_dir = Path("../data/")
pangodesig_file = data_dir / "bigtable_pangodesig.csv"
sc2ts_events_file = data_dir / "pango_x_events.csv"   # NOTE: Final ARG.
sc2ts_recomb_file = data_dir / "recombinants.csv"   # NOTE: Final ARG.
rhgisaid_file = data_dir / "bigtable_rhgisaid.csv"
rhnextstrain_file = data_dir / "bigtable_rhnextstrain.csv"
covrecomb_file = data_dir / "bigtable_covrecomb.csv"

In [3]:
pangodesig_df = pd.read_csv(pangodesig_file)
pangodesig_df = pangodesig_df.loc[pangodesig_df.type == "simple"].reset_index(drop=True)
pangodesig_df.head(5)

Unnamed: 0,pango,github_issue,type,parent_left_pango,parent_right_pango,parents_extra,interval_left,interval_right,intervals_extra
0,XA,https://github.com/cov-lineages/pango-designat...,simple,B.1.177,B.1.1.7,,21254,21766,
1,XAA,https://github.com/cov-lineages/pango-designat...,simple,BA.1*,BA.2*,,8392,9345,
2,XAB,https://github.com/cov-lineages/pango-designat...,simple,BA.1*,BA.2*,,6515,8394,
3,XAD,https://github.com/cov-lineages/pango-designat...,simple,BA.2*,BA.1*,,26062,26530,
4,XAE,https://github.com/cov-lineages/pango-designat...,simple,BA.2*,BA.1*,,24505,26050,


In [4]:
events_df = pd.read_csv(sc2ts_events_file)\
    [["pango", "closest_recombinant", "pango_samples"]]
# For the Pango Xs with multiple entries in the events table, 
# we keep only the entry with the largest number of Pango samples.
events_df.pango.value_counts().head()

pango
XAC    4
XM     4
XAD    2
XBB    2
XAM    1
Name: count, dtype: int64

In [5]:
# Keep only the entry with the highest pango_samples for each Pango X
events_df = events_df.loc[events_df\
    .groupby('pango')['pango_samples'].idxmax()]\
    .loc[events_df.closest_recombinant != -1]\
    .reset_index(drop=True)
events_df

Unnamed: 0,pango,closest_recombinant,pango_samples
0,XA,122444,39
1,XAA,1058654,17
2,XAC,964555,9
3,XAD,964555,1
4,XAE,964555,9
5,XAF,1177107,1
6,XAG,1058654,6
7,XAL,1003220,3
8,XAM,1058654,21
9,XAN,1189192,7


In [6]:
# Related to Table 1.
# See https://github.com/jeromekelleher/sc2ts-paper/issues/580
type1_events = [
    "XC",
    "XBR",
    "XA",
    "XS",
    "XL",
    "XBG",
    "XBD",
    "XQ",
    "XBB",
    "XM",
    "XBF",
    "XF",
    "XY",
    "XG",
    "XW",
]
print(f"Type 1 events: {len(type1_events)}")

type2_events = [
    "XAE",
    "XE",
    "XH",
    "XBH",
    "XBM",
    "XAF",
    "XJ",
]
print(f"Type 2 events: {len(type2_events)}")

Type 1 events: 15
Type 2 events: 7


In [7]:
recomb_df = pd.read_csv(sc2ts_recomb_file)
recomb_df.head(5)

Unnamed: 0,recombinant,sample_id,num_descendant_samples,num_samples,distinct_sample_pango,interval_left,interval_right,num_mutations,Viridian_amplicon_scheme,Artic_primer_version,...,parent_mrca_pango,parent_mrca_scorpio,parent_mrca_time,parent_mrca_date,is_rebar_recombinant,parent_pangonet_distance,net_min_supporting_loci_lft,net_min_supporting_loci_rgt,net_min_supporting_loci_lft_rgt_ge_4,k1000_muts
0,1280342,ERR9939974,1,1,1,695,958,1,COVID-ARTIC-V4.1,.,...,B.1.1.529,Probable Omicron (Unassigned),957.98188,2020-07-09,False,5,2,16,False,8
1,663484,SRR20259474,1,1,1,510,1222,1,COVID-AMPLISEQ-V1,.,...,B.1.617.2,Delta (B.1.617.2-like),838.212323,2020-11-05,False,2,2,16,False,5
2,1356368,ERR10219711,2,1,1,695,1453,1,COVID-ARTIC-V4.1,.,...,B.1.1.529,Probable Omicron (Unassigned),957.98188,2020-07-09,False,4,1,16,False,5
3,1253364,ERR9848224,855,1,1,695,1627,1,COVID-ARTIC-V4.1,.,...,B.1.1.529,Probable Omicron (Unassigned),957.98188,2020-07-09,False,4,1,54,False,7
4,1279026,ERR9940192,662,1,1,695,1627,0,COVID-ARTIC-V4.1,.,...,B.1.1.529,Probable Omicron (Unassigned),957.98188,2020-07-09,False,3,2,8,False,6


In [8]:
sc2ts_df = events_df.merge(recomb_df, left_on="closest_recombinant", right_on="recombinant", how="left")
sc2ts_df.head(5)

Unnamed: 0,pango,closest_recombinant,pango_samples,recombinant,sample_id,num_descendant_samples,num_samples,distinct_sample_pango,interval_left,interval_right,...,parent_mrca_pango,parent_mrca_scorpio,parent_mrca_time,parent_mrca_date,is_rebar_recombinant,parent_pangonet_distance,net_min_supporting_loci_lft,net_min_supporting_loci_rgt,net_min_supporting_loci_lft_rgt_ge_4,k1000_muts
0,XA,122444,39,122444,ERR5308556,39,1,1,20411,21765,...,B.1,.,1120.01602,2020-01-28,True,4,19,26,True,16
1,XAA,1058654,17,1058654,ERR8691075,154,1,1,4322,5386,...,B.1.1.529,Probable Omicron (Unassigned),957.98188,2020-07-09,True,5,8,43,True,7
2,XAC,964555,9,964555,ERR8146303,253,1,1,24504,26060,...,B.1.1.529,Probable Omicron (Unassigned),957.98188,2020-07-09,False,4,43,5,True,6
3,XAD,964555,1,964555,ERR8146303,253,1,1,24504,26060,...,B.1.1.529,Probable Omicron (Unassigned),957.98188,2020-07-09,False,4,43,5,True,6
4,XAE,964555,9,964555,ERR8146303,253,1,1,24504,26060,...,B.1.1.529,Probable Omicron (Unassigned),957.98188,2020-07-09,False,4,43,5,True,6


Concordance in parental Pango lineages

In [9]:
from pangonet.pangonet import PangoNet

pangonet_dir = Path("../arg_postprocessing/pangonet_data/")
pango = PangoNet().build(
    alias_key=pangonet_dir / "alias_key.json",
    lineage_notes=pangonet_dir / "lineage_notes.txt"
)


def is_concordant(*, ref, qry):
    """Check if query Pango lineage is at least as specific as reference Pango lineage."""
    def _remap(x):
        if x.endswith("*"):
            x = x[:-1]
        return pango.uncompress(x)
    pango_ref = _remap(ref)
    pango_qry = _remap(qry)
    if (pango_qry == pango_ref) or (pango_qry.startswith(pango_ref + ".")):
        return True
    return False

2025-09-09 11:26:14,015 INFO:Creating aliases.
2025-09-09 11:26:14,017 INFO:Creating network.


In [10]:
def compute_parent_concordance(df, focal_pangos=None, skip_pangos=None):
    list_pango_concordant = []
    list_pango_discordant = []
    list_pango_complex = []

    for row in df.itertuples():
        if (focal_pangos is not None) and (row.pango not in focal_pangos):
            continue
        if (skip_pangos is not None) and (row.pango in skip_pangos):
            continue
        # Skip Pango Xs that have multiple breakpoints as per Pango designation.
        # NOTE: This assumes that pangodesig_df has only one-breakpoint Pango Xs.
        if sum(pangodesig_df.pango == row.pango) == 0:
            list_pango_complex.append(row.pango)
            continue
        method_left_parent = row.parent_left_pango
        method_right_parent = row.parent_right_pango
        truth_data = pangodesig_df[pangodesig_df.pango == row.pango]
        truth_left_parent = truth_data.parent_left_pango.values[0]
        truth_right_parent = truth_data.parent_right_pango.values[0]
        is_parent_concordant = is_concordant(ref=truth_left_parent, qry=method_left_parent) \
            and is_concordant(ref=truth_right_parent, qry=method_right_parent)
        if is_parent_concordant:
            list_pango_concordant.append(row.pango)
        else:
            list_pango_discordant.append(row.pango)

    print(f"Complex: {list_pango_complex}")
    print(f"Discordant: {list_pango_discordant}")

    num_concordant = len(list_pango_concordant)
    num_discordant = len(list_pango_discordant)
    num_total = num_concordant + num_discordant
    perc_concordant = round((num_concordant / num_total) * 100, 1)
    perc_discordant = round((num_discordant / num_total) * 100, 1)
    print(f"Concordant: {num_concordant} ({perc_concordant}%)")
    print(f"Discordant: {num_discordant} ({perc_discordant}%)")

    return (
        list_pango_concordant,
        list_pango_discordant,
        perc_concordant,
        perc_discordant,
        num_total,
    )


In [11]:
list_skip_pango = ['XAN', 'XAV', 'XAZ', 'XBE']

print(f"Pango Xs: {len(sc2ts_df)}")
(
    sc2ts_list_pango_concordant,
    sc2ts_list_pango_discordant,
    sc2ts_perc_concordant,
    sc2ts_perc_discordant,
    sc2ts_num_total,
) = compute_parent_concordance(
    sc2ts_df,
    focal_pangos=type1_events,
    skip_pangos=list_skip_pango,
)

sc2ts_parents_results = {
    "method": "sc2ts",
    "num_concordant": len(sc2ts_list_pango_concordant),
    "num_discordant": len(sc2ts_list_pango_discordant),
    "perc_concordant": sc2ts_perc_concordant,
    "perc_discordant": sc2ts_perc_discordant,
    "num_total": sc2ts_num_total,
}

Pango Xs: 36
Complex: []
Discordant: ['XBB']
Concordant: 14 (93.3%)
Discordant: 1 (6.7%)


In [12]:
rhgisaid_df = pd.read_csv(rhgisaid_file)
# Exclude Pango Xs classified as having multiple breakpoints by the method.
rhgisaid_df = rhgisaid_df.loc[(rhgisaid_df.type == "simple")].reset_index(drop=True)

print(f"Are all Pango Xs in results? {all(rhgisaid_df.is_in_results)}")

print(f"Pango Xs: {len(rhgisaid_df)}")
(
    rhgisaid_list_pango_concordant,
    rhgisaid_list_pango_discordant,
    rhgisaid_perc_concordant,
    rhgisaid_perc_discordant,
    rhgisaid_num_total,
) = compute_parent_concordance(
    rhgisaid_df,
    focal_pangos=type1_events,
)

rhgisaid_parents_results = {
    "method": "rhgisaid",
    "num_concordant": len(rhgisaid_list_pango_concordant),
    "num_discordant": len(rhgisaid_list_pango_discordant),
    "perc_concordant": rhgisaid_perc_concordant,
    "perc_discordant": rhgisaid_perc_discordant,
    "num_total": rhgisaid_num_total,
}

Are all Pango Xs in results? True
Pango Xs: 37
Complex: []
Discordant: []
Concordant: 13 (100.0%)
Discordant: 0 (0.0%)


In [13]:
rhnextstrain_df = pd.read_csv(rhnextstrain_file)
# Exclude Pango Xs classified as having multiple breakpoints by the method.
rhnextstrain_df = rhnextstrain_df.loc[rhnextstrain_df.type == "simple"].reset_index(drop=True)

print(f"Are all Pango Xs in results? {all(rhnextstrain_df.is_in_results)}")

print(f"Pango Xs: {len(rhnextstrain_df)}")
(
    rhnextstrain_list_pango_concordant,
    rhnextstrain_list_pango_discordant,
    rhnextstrain_perc_concordant,
    rhnextstrain_perc_discordant,
    rhnextstrain_num_total,
) = compute_parent_concordance(
    rhnextstrain_df,
    focal_pangos=type1_events,
)

rhnextstrain_parents_results = {
    "method": "rhnextstrain",
    "num_concordant": len(rhnextstrain_list_pango_concordant),
    "num_discordant": len(rhnextstrain_list_pango_discordant),
    "perc_concordant": rhnextstrain_perc_concordant,
    "perc_discordant": rhnextstrain_perc_discordant,
    "num_total": rhnextstrain_num_total,
}

Are all Pango Xs in results? True
Pango Xs: 45
Complex: []
Discordant: ['XBB', 'XM']
Concordant: 12 (85.7%)
Discordant: 2 (14.3%)


In [14]:
covrecomb_df = pd.read_csv(covrecomb_file)
# Exclude Pango Xs classified as having multiple breakpoints by the method.
covrecomb_df = covrecomb_df.loc[covrecomb_df.type == "simple"].reset_index(drop=True)

print(f"Are all Pango Xs in results? {all(covrecomb_df.is_in_results)}")

print(f"Pango Xs: {len(covrecomb_df)}")
(
    covrecomb_list_pango_concordant,
    covrecomb_list_pango_discordant,
    covrecomb_perc_concordant,
    covrecomb_perc_discordant,
    covrecomb_num_total,
) = compute_parent_concordance(
    covrecomb_df,
    focal_pangos=type1_events,
)

covrecomb_parents_results = {
    "method": "covrecomb",
    "num_concordant": len(covrecomb_list_pango_concordant),
    "num_discordant": len(covrecomb_list_pango_discordant),
    "perc_concordant": covrecomb_perc_concordant,
    "perc_discordant": covrecomb_perc_discordant,
    "num_total": covrecomb_num_total,
}

Are all Pango Xs in results? True
Pango Xs: 21
Complex: []
Discordant: ['XBF']
Concordant: 7 (87.5%)
Discordant: 1 (12.5%)


In [15]:
summary = [
    sc2ts_parents_results,
    rhgisaid_parents_results,
    rhnextstrain_parents_results,
    covrecomb_parents_results,
]
pd.DataFrame(summary)

Unnamed: 0,method,num_concordant,num_discordant,perc_concordant,perc_discordant,num_total
0,sc2ts,14,1,93.3,6.7,15
1,rhgisaid,13,0,100.0,0.0,13
2,rhnextstrain,12,2,85.7,14.3,14
3,covrecomb,7,1,87.5,12.5,8


#### Overlap of breakpoint intervals

In [16]:
def overlap_breakpoint_intervals(*, ref, qry):
    assert ref[0] < ref[1]
    assert qry[0] < qry[1]
    ref_size = ref[1] - ref[0]
    cp_size = qry[1] - qry[0]
    dist = max(0, max(qry[0] - (ref[1] - 1), ref[0] - (qry[1] - 1)))
    def _get_overlap(a, b):
        return max(0, min(a[1], b[1]) - max(a[0], b[0]))
    overlap_size = _get_overlap(a=ref, b=qry)
    is_overlap_with_ref = overlap_size > 0
    overlap_ref_size_ratio = overlap_size / ref_size
    cp_ref_size_ratio = cp_size / ref_size
    return (is_overlap_with_ref, dist, overlap_ref_size_ratio, cp_ref_size_ratio)


In [17]:
def compute_breakpoint_overlap(df, focal_pangos=None, skip_pangos=None):
    list_pango_concordant = []
    list_pango_discordant = []
    list_pango_discordant_dist = []
    list_pango_complex = []
    list_pango_unavail = []

    for row in df.itertuples():
        if focal_pangos is not None and row.pango not in focal_pangos:
            continue
        if (skip_pangos is not None) and (row.pango in skip_pangos):
            continue
        # Skip Pango Xs that have multiple breakpoints as per Pango designation.
        # NOTE: This assumes that pangodesig_df has only one-breakpoint Pango Xs.
        if sum(pangodesig_df.pango == row.pango) == 0:
            list_pango_complex.append(row.pango)
            continue
        method_coords = (int(row.interval_left), int(row.interval_right))
        truth_data = pangodesig_df[pangodesig_df.pango == row.pango]
        if truth_data.interval_left.values[0] == '-' or truth_data.interval_right.values[0] == '-':
            list_pango_unavail.append(row.pango)
            continue
        truth_coords = (int(truth_data.interval_left.values[0]), int(truth_data.interval_right.values[0]))
        (
            is_overlap_with_ref, dist, _, _
        ) = overlap_breakpoint_intervals(ref=truth_coords, qry=method_coords)
        if is_overlap_with_ref:
            list_pango_concordant.append(row.pango)
        else:
            list_pango_discordant.append(row.pango)
            list_pango_discordant_dist.append(dist)
            print(row.pango, truth_coords, method_coords, dist)

    print(f"Complex: {list_pango_complex}")
    print(f"Discordant: {list_pango_discordant}")
    print(f"Unavailable: {list_pango_unavail}")

    num_concordant = len(list_pango_concordant)
    num_discordant = len(list_pango_discordant)
    num_total = num_concordant + num_discordant
    perc_concordant = round((num_concordant / num_total) * 100, 1)
    perc_discordant = round((num_discordant / num_total) * 100, 1)
    print(f"Concordant: {num_concordant} ({perc_concordant}%)")
    print(f"Discordant: {num_discordant} ({perc_discordant}%)")

    median_dist = np.median(list_pango_discordant_dist)
    min_dist = np.min(list_pango_discordant_dist)
    max_dist = np.max(list_pango_discordant_dist)
    dist_summary = f"{median_dist} ({min_dist}, {max_dist})"
    print(f"Distance summary: {dist_summary}")

    return (
        list_pango_concordant,
        list_pango_discordant,
        perc_concordant,
        perc_discordant,
        dist_summary,
        num_total,
    )


In [18]:
print(f"Pango Xs: {len(sc2ts_df)}")
(
    sc2ts_list_pango_concordant,
    sc2ts_list_pango_discordant,
    sc2ts_perc_concordant,
    sc2ts_perc_discordant,
    sc2ts_dist_summary,
    sc2ts_num_total,
) = compute_breakpoint_overlap(
    sc2ts_df,
    focal_pangos=type1_events,
)

sc2ts_bkpts_results = {
    "method": "sc2ts",
    "num_concordant": len(sc2ts_list_pango_concordant),
    "num_discordant": len(sc2ts_list_pango_discordant),
    "perc_concordant": sc2ts_perc_concordant,
    "perc_discordant": sc2ts_perc_discordant,
    "summary_discordant_dist": sc2ts_dist_summary,
    "num_total": sc2ts_num_total,
}

Pango Xs: 36
XBB (22891, 22936) (22332, 22577) 315
XC (25999, 26003) (26768, 27390) 766
Complex: []
Discordant: ['XBB', 'XC']
Unavailable: []
Concordant: 13 (86.7%)
Discordant: 2 (13.3%)
Distance summary: 540.5 (315, 766)


In [19]:
rhgisaid_df = pd.read_csv(rhgisaid_file)
# Exclude Pango Xs classified as having multiple breakpoints by the method.
rhgisaid_df = rhgisaid_df.loc[rhgisaid_df.type == "simple"].reset_index(drop=True)

print(f"Are all Pango Xs in results? {all(rhgisaid_df.is_in_results)}")

print(f"Pango Xs: {len(rhgisaid_df)}")
(
    rhgisaid_list_pango_concordant,
    rhgisaid_list_pango_discordant,
    rhgisaid_perc_concordant,
    rhgisaid_perc_discordant,
    rhgisaid_dist_summary,
    rhgisaid_num_total,
) = compute_breakpoint_overlap(
    rhgisaid_df,
    focal_pangos=type1_events,
)

rhgisaid_bkpts_results = {
    "method": "rhgisaid",
    "num_concordant": len(rhgisaid_list_pango_concordant),
    "num_discordant": len(rhgisaid_list_pango_discordant),
    "perc_concordant": rhgisaid_perc_concordant,
    "perc_discordant": rhgisaid_perc_discordant,
    "summary_discordant_dist": rhgisaid_dist_summary,
    "num_total": rhgisaid_num_total,
}

Are all Pango Xs in results? True
Pango Xs: 37
XC (25999, 26003) (26766, 26768) 764
XF (5386, 6513) (4183, 4185) 1202
XL (6517, 8394) (6512, 6514) 4
Complex: []
Discordant: ['XC', 'XF', 'XL']
Unavailable: []
Concordant: 10 (76.9%)
Discordant: 3 (23.1%)
Distance summary: 764.0 (4, 1202)


In [20]:
rhnextstrain_df = pd.read_csv(rhnextstrain_file)
# Exclude Pango Xs classified as having multiple breakpoints by the method.
rhnextstrain_df = rhnextstrain_df.loc[rhnextstrain_df.type == "simple"].reset_index(drop=True)

print(f"Are all Pango Xs in results? {all(rhnextstrain_df.is_in_results)}")

print(f"Pango Xs: {len(rhnextstrain_df)}")
(
    rhnextstrain_list_pango_concordant,
    rhnextstrain_list_pango_discordant,
    rhnextstrain_perc_concordant,
    rhnextstrain_perc_discordant,
    rhnextstrain_dist_summary,
    rhnextstrain_num_total,
) = compute_breakpoint_overlap(
    rhnextstrain_df,
    focal_pangos=type1_events,
)

rhnextstrain_bkpts_results = {
    "method": "rhnextstrain",
    "num_concordant": len(rhnextstrain_list_pango_concordant),
    "num_discordant": len(rhnextstrain_list_pango_discordant),
    "perc_concordant": rhnextstrain_perc_concordant,
    "perc_discordant": rhnextstrain_perc_discordant,
    "summary_discordant_dist": rhnextstrain_dist_summary,
    "num_total": rhnextstrain_num_total,
}

Are all Pango Xs in results? True
Pango Xs: 45
XBB (22891, 22936) (22331, 22333) 559
XBF (9864, 9868) (8703, 8705) 1160
XBG (22600, 22917) (19814, 19816) 2785
Complex: []
Discordant: ['XBB', 'XBF', 'XBG']
Unavailable: []
Concordant: 11 (78.6%)
Discordant: 3 (21.4%)
Distance summary: 1160.0 (559, 2785)


In [21]:
covrecomb_df = pd.read_csv(covrecomb_file)
# Exclude Pango Xs classified as having multiple breakpoints by the method.
covrecomb_df = covrecomb_df.loc[covrecomb_df.type == "simple"].reset_index(drop=True)

print(f"Are all Pango Xs in results? {all(covrecomb_df.is_in_results)}")

print(f"Pango Xs: {len(covrecomb_df)}")
(
    covrecomb_list_pango_concordant,
    covrecomb_list_pango_discordant,
    covrecomb_perc_concordant,
    covrecomb_perc_discordant,
    covrecomb_dist_summary,
    covrecomb_num_total,
) = compute_breakpoint_overlap(
    covrecomb_df,
    focal_pangos=type1_events,
)

covrecomb_bkpts_results = {
    "method": "covrecomb",
    "num_concordant": len(covrecomb_list_pango_concordant),
    "num_discordant": len(covrecomb_list_pango_discordant),
    "perc_concordant": covrecomb_perc_concordant,
    "perc_discordant": covrecomb_perc_discordant,
    "summary_discordant_dist": covrecomb_dist_summary,
    "num_total": covrecomb_num_total,
}

Are all Pango Xs in results? True
Pango Xs: 21
XBB (22891, 22936) (22109, 22675) 217
XBF (9864, 9868) (23075, 26530) 13208
XC (25999, 26003) (26767, 27973) 765
XY (11539, 12881) (15240, 21619) 2360
Complex: []
Discordant: ['XBB', 'XBF', 'XC', 'XY']
Unavailable: []
Concordant: 4 (50.0%)
Discordant: 4 (50.0%)
Distance summary: 1562.5 (217, 13208)


In [22]:
summary = [
    sc2ts_bkpts_results,
    rhgisaid_bkpts_results,
    rhnextstrain_bkpts_results,
    covrecomb_bkpts_results,
]
pd.DataFrame(summary)

Unnamed: 0,method,num_concordant,num_discordant,perc_concordant,perc_discordant,summary_discordant_dist,num_total
0,sc2ts,13,2,86.7,13.3,"540.5 (315, 766)",15
1,rhgisaid,10,3,76.9,23.1,"764.0 (4, 1202)",13
2,rhnextstrain,11,3,78.6,21.4,"1160.0 (559, 2785)",14
3,covrecomb,4,4,50.0,50.0,"1562.5 (217, 13208)",8


Print results in latex

In [23]:
latex_table = r'\begin{table}' + "\n" + \
r'\caption{Concordance among methods in characterizing Pango X lineages.}' + "\n" + \
r'\label{tab:method_concordance}' + "\n" + \
r'\begin{tabular}{lccc}' + "\n" + \
r'\toprule & \multicolumn{1}{c}{Parent lineages} & \multicolumn{2}{c}{Breakpoint intervals} \\' + "\n" + \
r'\cmidrule(lr){2-2} \cmidrule(lr){3-4}' + "\n" + \
r'Method & Concordant (\%) & Concordant (\%) & Distance (bases) \\' + "\n" + \
r'\midrule' + "\n" + \
f"""Sc2ts & \
{sc2ts_parents_results["num_concordant"]} / {sc2ts_parents_results["num_total"]} ({sc2ts_parents_results["perc_concordant"]}) & \
{sc2ts_bkpts_results["num_concordant"]} / {sc2ts_bkpts_results["num_total"]} ({sc2ts_bkpts_results["perc_concordant"]}) & \
{sc2ts_bkpts_results["summary_discordant_dist"]}""" + r'\\' + "\n" + \
f"""RecombinHunt-GISAID & \
{rhgisaid_parents_results["num_concordant"]} / {rhgisaid_parents_results["num_total"]} ({rhgisaid_parents_results["perc_concordant"]}) & \
{rhgisaid_bkpts_results["num_concordant"]} / {rhgisaid_bkpts_results["num_total"]} ({rhgisaid_bkpts_results["perc_concordant"]}) & \
{rhgisaid_bkpts_results["summary_discordant_dist"]}""" + r'\\' + "\n" + \
f"""RecombinHunt-Nextstrain & \
{rhnextstrain_parents_results["num_concordant"]} / {rhnextstrain_parents_results["num_total"]} ({rhnextstrain_parents_results["perc_concordant"]}) & \
{rhnextstrain_bkpts_results["num_concordant"]} / {rhnextstrain_bkpts_results["num_total"]} ({rhnextstrain_bkpts_results["perc_concordant"]}) & \
{rhnextstrain_bkpts_results["summary_discordant_dist"]}""" + r'\\' + "\n" + \
f"""CovRecomb & \
{covrecomb_parents_results["num_concordant"]} / {covrecomb_parents_results["num_total"]} ({covrecomb_parents_results["perc_concordant"]}) & \
{covrecomb_bkpts_results["num_concordant"]} / {covrecomb_bkpts_results["num_total"]} ({covrecomb_bkpts_results["perc_concordant"]}) & \
{covrecomb_bkpts_results["summary_discordant_dist"]}""" + r'\\' + "\n" + \
r'\bottomrule' + "\n" + \
r'\end{tabular}' + "\n" + \
r'\end{table}'

print(latex_table)

\begin{table}
\caption{Concordance among methods in characterizing Pango X lineages.}
\label{tab:method_concordance}
\begin{tabular}{lccc}
\toprule & \multicolumn{1}{c}{Parent lineages} & \multicolumn{2}{c}{Breakpoint intervals} \\
\cmidrule(lr){2-2} \cmidrule(lr){3-4}
Method & Concordant (\%) & Concordant (\%) & Distance (bases) \\
\midrule
Sc2ts & 14 / 15 (93.3) & 13 / 15 (86.7) & 540.5 (315, 766)\\
RecombinHunt-GISAID & 13 / 13 (100.0) & 10 / 13 (76.9) & 764.0 (4, 1202)\\
RecombinHunt-Nextstrain & 12 / 14 (85.7) & 11 / 14 (78.6) & 1160.0 (559, 2785)\\
CovRecomb & 7 / 8 (87.5) & 4 / 8 (50.0) & 1562.5 (217, 13208)\\
\bottomrule
\end{tabular}
\end{table}
