<a href="https://colab.research.google.com/github/david-c-brown/clinic_kpi_monthly/blob/main/cpt_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import glob
import csv
from datetime import datetime

In [None]:
file_path = "path/to/folder/"

# list of all clinics and their files

clinics = glob.glob(file_path+'*') #glob glob glob

# mapping names for consistency down the line
clinic_mapping = {
    'CLNC1': 'Clinic 1',
    'CLINI2': 'Clinic 2',
    'Clinic3': 'Clinic 3',
    'Clinic_4': 'Clinic 4'
}


# marks when the analysis occurred, keeps runs organized
now = datetime.now()
month = now.strftime('%B').lower()
year = now.strftime('%Y')
filename = f'cpt_breakout_{month}_{year}.csv'

# there is a finite amount of correct columns and infinite incorrect values
columns = ['PT','All','97530','97535','97112','97110','97140','97001',
           '97162','97163','97161','97002','97164','98960','97116','97033',
           '97760','97150','97124','97039','97035','97032','97016','97014',
           '97012','97010','95853','95852','95851','95597','G0283','G0282',
           '20553','97608','92542','95992,29540','97610','64550','29240',
           '29260','29280','29520','29530','29550','29200','29799','20552',
           '97799','20053','97597','97750','97000','97774','97775','97776',
           '97777','97546','NC001']

In [None]:
def analyze_cpt(df):
    # pt eval codes
    eval_cols = ['97161', '97162', '97163']
    # re-evaluation codes
    reval = df[['97161', '97162', '97163', '97164']].sum()
    if (reval > 0).any():
        reval = np.nanmean(reval.loc['97164'] / reval[eval_cols])
    else:
        reval = 0.0

    # num of eval codes verses # of codes
    total_eval = df.loc[(df[eval_cols] == 1.0).any(axis=1) & (df['All'] <= 10)] # <=10 removes total rows, since we are using margins = True later on
    if total_eval.shape[0] > 1:
        total_eval = np.nansum(total_eval['All']) / np.nansum(total_eval[eval_cols])
    else:
        total_eval = 0.0

    # num of codes per treatment block when not a new diagnosis
    non_eval = df.loc[(df[eval_cols] == 0).all(axis=1)]
    non_eval = non_eval['All'].sum() / non_eval.shape[0]

    # high value billing code list
    code_list = ['97530', '97535', '97112', '97001',
                '97162', '97163', '97161', '97002',
                '97164', '92542', '64550', '29280',
                '29520', '29530', '29550', '29200',
                '20552', '97799', '20053', '97750']

    code_mix = df[code_list]
    code_mix = (code_mix.sum().sum() / df['All'].sum()) # % of codes in the high value blend

    try:
        # write to csv for each provider
        return writer.writerow([i,g, f'{code_mix:.0%}', f'{total_eval:.3}', f'{non_eval:.3}', f'{reval:.0%}'])
    except:
        # write to csv for each clinic
        return writer.writerow([i,'Practice', f'{code_mix:.0%}', f'{total_eval:.3}', f'{non_eval:.3}', f'{reval:.0%}'])


In [None]:
with open(filename, 'w', newline='') as file_object:
    writer = csv.writer(file_object)
    writer.writerow(['Clinic', 'PT','Code mix', 'Codes per eval', 'Codes per non eval', 'Re eval per Eval'])

    for i in clinics:
        df = pd.read_excel(i) # normally we would specify which file type, but files are inconsistently encoded

        # cleaning payment processor naming conventions to map to our mapping dict we defined earlier
        i = i.split('_', 1)[0]
        i = i.rsplit('\\', 1)[-1]
        try:
            i = clinic_mapping[i] #changes name to actuals instead of external code
        except:
            pass
        # cleaning up columns with extraneous information
        df['CPT'] = df['CPT'].replace({'GP:':'', ':59':'', '.KX':''}, regex=True)
        df['PT'] = df['PT'].replace({r'[A-Z]{2,3} \(': '', r'[A-Z]{2} \(': '', '\)': ''}, regex=True)

        # the groupby version of this is a fair bit messier, and about the same speed
        pivot = np.round(pd.pivot_table(df, values= 'Units',
                                    index=['PT','Patient','Date of Service'],
                                    columns=['CPT'],
                                    aggfunc='sum',
                                    margins=True
                                    ))

        # moving our pivot into a more manageable form
        clinic_df = pivot.reset_index().reindex(columns = columns).replace(np.nan, 0)
        # this removes the total row while keeping the total column
        clinic_df = clinic_df[clinic_df['PT'] != 'All']


        # list of pts
        pt = clinic_df.PT.unique()
        analyze_cpt(clinic_df)

        # loop through individual providers
        for g in pt:
            # make a df for each provider for individual analysis
            pt_df = clinic_df.loc[(clinic_df['PT'].str.contains(g,regex =False))].copy()
            pt_df.loc[:, 'PT'] = g
            analyze_cpt(pt_df)
            del g # removes PT from memory when cycling to the next clinic