<a href="https://colab.research.google.com/github/jeremyjyang/unmchp/blob/main/colab/CPT_compare.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CPT code list comparison

Author: Jeremy Yang

Comparison of CPT code lists from (1) CMS, and (2) NM HCA.

In [1]:
import pandas as pd

## Read file A

HCA LoD #42 (Dec 20, 2024) Attachment A, Table 1

In [2]:
url = 'https://raw.githubusercontent.com/jeremyjyang/unmchp/refs/heads/main/data/Final-LOD-42-Final-Biomarker-Coverage_TABLE1-Biomarker-Testing-Codes.tsv'
try:
  dfA = pd.read_csv(url, sep='\t')
  display(dfA.head())
except Exception as e:
  print(f"An error occurred while reading the file from URL: {e}")

Unnamed: 0,Procedure Code,Description
0,0004M,Scoliosis dna alys
1,0006M,Onc hep gene risk classifier
2,0007M,Onc gastro 51 gene nomogram
3,0011M,Onc prst8 ca mrna 12 gen alg
4,0012M,Onc mrna 5 gen rsk urthl ca


In [4]:
print(f"Rows: {dfA.shape[0]}; Columns: {dfA.shape[1]}")
dfA.columns = ["Code", "Description"]
unique_codes_countA = dfA['Code'].nunique()
print(f"The number of unique values in the 'Code' column is: {unique_codes_countA}")

Rows: 611; Columns: 2
The number of unique values in the 'Code' column is: 589


## Find duplicate codes

In [5]:
duplicate_rowsA = dfA[dfA.duplicated(subset=['Code'], keep=False)]
if not duplicate_rowsA.empty:
  print("Rows with duplicate 'Code' values:")
  display(duplicate_rowsA.sort_values(by='Code'))
else:
  print("No duplicate values found in 'Code' column.")

Rows with duplicate 'Code' values:


Unnamed: 0,Code,Description
130,0215U,Rare ds xom dna alys ea comp
139,0215U,Rare ds xom dna alys ea comp
131,0216U,Neuro inh ataxia dna 12 com
140,0216U,Neuro inh ataxia dna 12 com
141,0217U,Neuro inh ataxia dna 51 gene
132,0217U,Neuro inh ataxia dna 51 gene
142,0218U,Neuro musc dys dmd seq alys
133,0218U,Neuro musc dys dmd seq alys
143,0219U,Nfct agt hiv gnrj seq alys
134,0219U,Nfct agt hiv gnrj seq alys


## Read file B

Billing and Coding: Molecular Pathology and Genetic Testing (A58917) (July 9, 2025)

https://www.cms.gov/medicare-coverage-database/view/article.aspx?articleId=58917&ver=83

In [7]:
url = 'https://raw.githubusercontent.com/jeremyjyang/unmchp/refs/heads/main/data/Article_-_Billing_and_Coding_Molecular_Pathology_and_Genetic_Testing_A58917_CODING-INFORMATION.tsv'
try:
    dfB = pd.read_csv(url, sep='\t')
    display(dfB.head())
except Exception as e:
    print(f"An error occurred while reading the file from URL: {e}")

Unnamed: 0,Code,Description
0,81105,Hpa-1 genotyping
1,81106,Hpa-2 genotyping
2,81107,Hpa-3 genotyping
3,81108,Hpa-4 genotyping
4,81109,Hpa-5 genotyping


In [8]:
print(f"Rows: {dfB.shape[0]}; Columns: {dfB.shape[1]}")
unique_codes_countB = dfB['Code'].nunique()
print(f"The number of unique values in the 'Code' column is: {unique_codes_countB}")

Rows: 553; Columns: 2
The number of unique values in the 'Code' column is: 553


## Find duplicate codes

Set B

In [9]:
duplicate_rowsB = dfB[dfB.duplicated(subset=['Code'], keep=False)]
if not duplicate_rowsB.empty:
  print("Rows with duplicate 'Code' values:")
  display(duplicate_rowsB.sort_values(by='Code'))
else:
  print("No duplicate values found in 'Code' column.")

No duplicate values found in 'Code' column.


## Compare code sets

A versus B

In [10]:
setA = set(dfA['Code'])
setB = set(dfB['Code'])

intersectionAB = setA.intersection(setB)
differenceAB = setA.difference(setB)
differenceBA = setB.difference(setA)

In [None]:
print(f"Number of unique codes in dfA: {len(setA)}")
print(f"Number of unique codes in dfB: {len(setB)}")
print(f"Number of codes in intersection: {len(intersectionAB)}")
print(f"Number of codes in dfA but not in dfB: {len(differenceAB)}")
print(f"Number of codes in dfB but not in dfA: {len(differenceBA)}")

Number of unique codes in dfA: 589
Number of unique codes in dfB: 553
Number of codes in intersection: 535
Number of codes in dfA but not in dfB: 54
Number of codes in dfB but not in dfA: 18


In [11]:
print(f"Intersection of codes ({len(intersectionAB)}): {', '.join(intersectionAB)}")

Intersection of codes (535): 81511, 0234U, 81408, 81179, 81121, 0010U, 0271U, 81259, 0239U, 81378, 0088U, 0101U, 81319, 81313, 81202, 81522, 0302U, 81383, 0197U, 0229U, 81529, 0131U, 81231, 81253, 81360, 0011M, 0265U, 0016U, 81333, 0175U, 81448, 0270U, 0212U, 81297, 81186, 81334, 81528, 0007M, 81320, 0133U, 81251, 81207, 81271, 81222, 81521, 0331U, 81168, 0084U, 81538, 81361, 0230U, 0141U, 81328, 81315, 0215U, 0049U, 81201, 81204, 81405, 0209U, 81518, 0293U, 81166, 81400, 81192, 81414, 81309, 81439, 0398U, 0138U, 81248, 81190, 81342, 81519, 0046U, 81440, 81546, 81357, 0177U, 0363U, 81520, 81188, 0343U, 0409U, 0203U, 81219, 0260U, 0273U, 81331, 0332U, 0433U, 0307U, 81402, 0198U, 0291U, 81539, 0179U, 81404, 81167, 81161, 81401, 0242U, 0118U, 81265, 0233U, 0199U, 0008U, 0267U, 0388U, 0339U, 81194, 81108, 81223, 81314, 0417U, 0137U, 81173, 81184, 0162U, 0285U, 0030U, 81170, 81422, 81226, 81403, 81288, 81418, 81341, 81411, 81303, 81246, 81363, 0299U, 0089U, 0194U, 0413U, 0237U, 81258, 81296

In [12]:
print(f"Codes in dfA but not in dfB ({len(differenceAB)}): {', '.join(differenceAB)}")

Codes in dfA but not in dfB (54): 0474U, 88264, 0469U, 88245, 88280, 88371, 0370U, G9143, 81433, 0487U, 0465U, 0374U, 88261, 0467U, 0369U, 0457U, 0396U, 88269, 0460U, 81438, 0020M, 0488U, 88372, 88274, 88267, 88249, 88285, 0459U, 0489U, 0475U, 0456U, 0345U, 0471U, 88271, 0486U, 88248, 81560, 0462U, 0472U, 88273, 88263, 0078U, 0468U, 88275, 88272, 88283, 0464U, 88262, 81436, 0458U, 0380U, 88289, 0461U, 0373U


In [13]:
print(f"Codes in dfB but not in dfA ({len(differenceBA)}): {', '.join(differenceBA)}")

Codes in dfB but not in dfA (18): 0481U, 0485U, 81463, 0246U, 81462, 81441, 81464, 81558, 81458, 81459, 0560U, 0478U, 81449, 81457, 81195, 0562U, 0494U, 0561U


## Read file C

Billing and Coding: Molecular Pathology and Genetic Testing (A58917) (October 18, 2024)

https://www.cms.gov/medicare-coverage-database/view/article.aspx?articleid=58917&ver=77&

In [14]:
url = 'https://raw.githubusercontent.com/jeremyjyang/unmchp/refs/heads/main/data/Article_-_Billing_and_Coding_Molecular_Pathology_and_Genetic_Testing_A58917_version_20241018_CODING-INFORMATION.tsv'
try:
    dfC = pd.read_csv(url, sep='\t')
    display(dfC.head())
except Exception as e:
    print(f"An error occurred while reading the file from URL: {e}")

Unnamed: 0,Code,Description
0,81105,Hpa-1 genotyping
1,81106,Hpa-2 genotyping
2,81107,Hpa-3 genotyping
3,81108,Hpa-4 genotyping
4,81109,Hpa-5 genotyping


In [15]:
print(f"Rows: {dfC.shape[0]}; Columns: {dfC.shape[1]}")
unique_codes_countC = dfC['Code'].nunique()
print(f"The number of unique values in the 'Code' column is: {unique_codes_countC}")

Rows: 556; Columns: 2
The number of unique values in the 'Code' column is: 556


## Find duplicate codes

Set C

In [16]:
duplicate_rowsC = dfC[dfC.duplicated(subset=['Code'], keep=False)]
if not duplicate_rowsC.empty:
  print("Rows with duplicate 'Code' values:")
  display(duplicate_rowsC.sort_values(by='Code'))
else:
  print("No duplicate values found in 'Code' column.")

No duplicate values found in 'Code' column.


## Compare code sets

A versus C

In [17]:
setC = set(dfC['Code'])
intersectionAC = setA.intersection(setC)
differenceAC = setA.difference(setC)
differenceCA = setC.difference(setA)

In [18]:
print(f"Number of unique codes in dfA: {len(setA)}")
print(f"Number of unique codes in dfC: {len(setC)}")
print(f"Number of codes in intersection: {len(intersectionAC)}")
print(f"Number of codes in dfA but not in dfC: {len(differenceAC)}")
print(f"Number of codes in dfC but not in dfA: {len(differenceCA)}")

Number of unique codes in dfA: 589
Number of unique codes in dfC: 556
Number of codes in intersection: 543
Number of codes in dfA but not in dfC: 46
Number of codes in dfC but not in dfA: 13


In [19]:
print(f"Intersection of codes ({len(intersectionAC)}): {', '.join(intersectionAC)}")

Intersection of codes (543): 81511, 0234U, 81408, 81179, 81121, 0010U, 0271U, 81259, 0239U, 81378, 0088U, 0101U, 81319, 81313, 81202, 81522, 0302U, 81383, 0197U, 0229U, 81529, 0131U, 81231, 81253, 81360, 0011M, 0265U, 0016U, 81333, 0175U, 81448, 0270U, 0212U, 81297, 81186, 81334, 81528, 0007M, 0369U, 81320, 0133U, 81251, 81207, 81271, 81222, 81521, 0331U, 81168, 0084U, 81538, 81361, 0230U, 0141U, 81328, 81315, 0215U, 0049U, 81201, 81204, 81405, 0209U, 81518, 0293U, 81166, 81400, 81192, 81414, 81309, 81433, 81439, 0398U, 0138U, 81248, 81190, 81342, 81519, 0046U, 81440, 81546, 81357, 0177U, 0363U, 81520, 81188, 0343U, 0409U, 0203U, 81219, 0260U, 0273U, 81331, 0332U, 0433U, 0307U, 81402, 0198U, 0291U, 81539, 0179U, 81404, 81167, 81161, 81401, 0242U, 0118U, 81265, 0233U, 0199U, 0008U, 0267U, 0388U, 0339U, 81194, 81108, 81223, 81314, 0417U, 0137U, 81173, 81184, 0162U, 0285U, 0030U, 81170, 81422, 81226, 81403, 81288, 81418, 81341, 81411, 81303, 81246, 81363, 0299U, 0089U, 0194U, 0413U, 0237U

In [20]:
print(f"Codes in dfA but not in dfC ({len(differenceAC)}): {', '.join(differenceAC)}")

Codes in dfA but not in dfC (46): 0474U, 88264, 0469U, 88245, 88280, 88371, G9143, 0487U, 0465U, 88261, 0467U, 0457U, 0396U, 88269, 0460U, 0020M, 0488U, 88372, 88274, 88267, 88249, 88285, 0459U, 0489U, 0475U, 0456U, 0345U, 0471U, 88271, 0486U, 88248, 81560, 0462U, 0472U, 88273, 88263, 0078U, 0468U, 88275, 88272, 88283, 0464U, 88262, 0458U, 88289, 0461U


In [21]:
print(f"Codes in dfC but not in dfA ({len(differenceCA)}): {', '.join(differenceCA)}")

Codes in dfC but not in dfA (13): 0485U, 0481U, 81463, 0246U, 81462, 81441, 81458, 81459, 0478U, 81449, 81457, 0494U, 81464
