In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Process ICD-10 files into one

In [None]:
# load in icd files
## files were obtained from: https://www.cdc.gov/nchs/icd/icd-10-cm/files.html
icd10_2015 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2015.csv')
icd10_2016 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2016.csv')
icd10_2017 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2017.csv')
icd10_2018 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2018.csv')
icd10_2019 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2019.csv')
icd10_2020 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2020.csv')
icd10_2021 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2021.csv')
icd10_2022 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2022.csv')
icd10_2023 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2023.csv')
icd10_2024 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2024.csv')
icd10_2025 = pd.read_csv('/content/drive/MyDrive/Data/icd10/icd10-2025.csv')

In [None]:
icd10_joined = pd.concat([icd10_2025, icd10_2024, icd10_2023, icd10_2022, icd10_2021, icd10_2020,
                          icd10_2019, icd10_2018, icd10_2017, icd10_2016, icd10_2015])

In [None]:
# prompt: drop all duplicates from icd10_joined according to the variable "Code"

icd10_joined = icd10_joined.drop_duplicates(subset=['Code'])

In [None]:
icd10_joined.to_csv('/content/drive/MyDrive/Data/icd10/ICD10Diagnosis.csv', index=False)

## Process CCSR look up file

In [None]:
# CCSR lookup file for ICD10 regrouping
## look up was obtained from HCUP: https://hcup-us.ahrq.gov/toolssoftware/ccsr/dxccsr.jsp#download
LU_CCSR = pd.read_csv('/content/drive/MyDrive/Data/Input/DXCCSR_v2025-1.csv')
LU_CCSR.head()

  LU_CCSR = pd.read_csv('/content/drive/MyDrive/Data/Input/DXCCSR_v2025-1.csv')


Unnamed: 0,'ICD-10-CM CODE','ICD-10-CM CODE DESCRIPTION','Default CCSR CATEGORY IP','Default CCSR CATEGORY DESCRIPTION IP','Default CCSR CATEGORY OP','Default CCSR CATEGORY DESCRIPTION OP','CCSR CATEGORY 1','CCSR CATEGORY 1 DESCRIPTION','CCSR CATEGORY 2','CCSR CATEGORY 2 DESCRIPTION','CCSR CATEGORY 3','CCSR CATEGORY 3 DESCRIPTION','CCSR CATEGORY 4','CCSR CATEGORY 4 DESCRIPTION','CCSR CATEGORY 5','CCSR CATEGORY 5 DESCRIPTION','CCSR CATEGORY 6','CCSR CATEGORY 6 DESCRIPTION','Rationale for Default Assignment'
0,'A000',"Cholera due to Vibrio cholerae 01, biovar chol...",'DIG001',Intestinal infection,'DIG001',Intestinal infection,'DIG001',Intestinal infection,'INF003',Bacterial infections,' ',,' ',,' ',,' ',,06 Infectious conditions
1,'A001',"Cholera due to Vibrio cholerae 01, biovar eltor",'DIG001',Intestinal infection,'DIG001',Intestinal infection,'DIG001',Intestinal infection,'INF003',Bacterial infections,' ',,' ',,' ',,' ',,06 Infectious conditions
2,'A009',"Cholera, unspecified",'DIG001',Intestinal infection,'DIG001',Intestinal infection,'DIG001',Intestinal infection,'INF003',Bacterial infections,' ',,' ',,' ',,' ',,06 Infectious conditions
3,'A0100',"Typhoid fever, unspecified",'DIG001',Intestinal infection,'DIG001',Intestinal infection,'DIG001',Intestinal infection,'INF003',Bacterial infections,' ',,' ',,' ',,' ',,06 Infectious conditions
4,'A0101',Typhoid meningitis,'NVS001',Meningitis,'NVS001',Meningitis,'INF003',Bacterial infections,'NVS001',Meningitis,' ',,' ',,' ',,' ',,06 Infectious conditions


In [None]:
# Remove '' from cells and column names in LU_CCSR
LU_CCSR.columns = LU_CCSR.columns.str.strip("'")
for column in LU_CCSR.columns:
  if LU_CCSR[column].dtype == object:
    LU_CCSR[column] = LU_CCSR[column].str.strip("'")

# select needed columns, then rename
LU_CCSR = LU_CCSR[['ICD-10-CM CODE', 'CCSR CATEGORY 1', 'CCSR CATEGORY 1 DESCRIPTION']]
LU_CCSR.rename(columns={'ICD-10-CM CODE':'ICD_CD', 'CCSR CATEGORY 1':'CCSR_CATEGORY_CD','CCSR CATEGORY 1 DESCRIPTION': 'CCSR_CATEGORY_DESCRIPTION'}, inplace=True)
LU_CCSR

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  LU_CCSR.rename(columns={'ICD-10-CM CODE':'ICD_CD', 'CCSR CATEGORY 1':'CCSR_CATEGORY_CD','CCSR CATEGORY 1 DESCRIPTION': 'CCSR_CATEGORY_DESCRIPTION'}, inplace=True)


Unnamed: 0,ICD_CD,CCSR_CATEGORY_CD,CCSR_CATEGORY_DESCRIPTION
0,A000,DIG001,Intestinal infection
1,A001,DIG001,Intestinal infection
2,A009,DIG001,Intestinal infection
3,A0100,DIG001,Intestinal infection
4,A0101,INF003,Bacterial infections
...,...,...,...
75233,Z9912,FAC012,Other specified encounters and counseling
75234,Z992,FAC025,Other specified status
75235,Z993,FAC025,Other specified status
75236,Z9981,FAC025,Other specified status


In [None]:
LU_CCSR.to_csv('/content/drive/MyDrive/Data/Output/DXCCSR_v2025-1_processed.csv', index=False)