In [59]:
import polars as pl

In [104]:
df = pl.read_csv('170040_sample.csv', skip_rows = 1, null_values = ["N/A"])

In [105]:
ip_cols = [c for c in df.columns if c.startswith('IP')]
op_cols = [c for c in df.columns if c.startswith('OP')]

In [106]:
ip_cols

['IPPricingDetail',
 'IPPrice',
 'IPDiscountedCashPrice',
 'IPExpectedReimbursement',
 'IPNegotiatedMaximumPrice',
 'IPNegotiatedMinimumPrice']

In [107]:
op_cols

['OPPrice',
 'OPDiscountedCashPrice',
 'OPExpectedReimbursement',
 'OPNegotiatedMaximumPrice',
 'OPNegotiatedMinimumPrice']

Here's something we can do right away:

In [108]:
df.sample(5)

Procedure,CodeType,Code,RevCode,NDC,ProcedureDescription,Payer,Plan(s),IPPricingDetail,Quantity,IPPrice,IPDiscountedCashPrice,IPExpectedReimbursement,IPNegotiatedMaximumPrice,IPNegotiatedMinimumPrice,OPPrice,OPDiscountedCashPrice,OPExpectedReimbursement,OPNegotiatedMaximumPrice,OPNegotiatedMinimumPrice,BillType
str,str,str,str,str,str,str,str,str,i64,f64,f64,str,str,str,f64,f64,str,f64,f64,str
"""10061""","""CPT""","""10061""","""0450 - EMERGEN...",,"""HC 10061 I & D...","""CENTURY HEALTH...","""CENTURY HEALTH...",,1,0.0,0.0,,,,1010.0,202.0,"""27,959.07""",31453.98,132.01,"""HB"""
"""11421""","""CPT""","""11421""","""0761 - TREATME...",,"""EXC B9 LESION ...","""MEDICA MEDICAR...","""MEDICA PRIME S...",,1,0.0,0.0,,,,4115.98,823.196,,,,"""HB"""
"""11406""","""CPT""","""11406""","""0761 - TREATME...",,"""EXC B9 LESION ...","""COVENTRY [5032...","""COVENTRY ONE E...",,1,0.0,0.0,,,,31241.33,6248.266,"""7,527.50""",33283.52,482.72,"""HB"""
"""11402""","""CPT""","""11402""","""0761 - TREATME...",,"""Hp Exc, Ben Le...","""GALLAGHER BASS...","""GALLAGHER BASS...",,1,0.0,0.0,,,,4046.0,809.2,,,,"""HB"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""VIA CHRISTI HO...","""VIA CHRISTI HO...","""FSC: 50401; BF...",1,21375.0,4275.0,,,,21375.0,4275.0,"""14,962.50""",19237.5,650.0,"""HB"""


In [109]:
df = df.with_columns([
    pl.when(pl.col('BillType') == 'HB').then('facility').otherwise(
        pl.when(pl.col('BillType') == 'PB').then('professional')).alias('blling_class')
])

In [110]:
df.sample(5)

Procedure,CodeType,Code,RevCode,NDC,ProcedureDescription,Payer,Plan(s),IPPricingDetail,Quantity,IPPrice,IPDiscountedCashPrice,IPExpectedReimbursement,IPNegotiatedMaximumPrice,IPNegotiatedMinimumPrice,OPPrice,OPDiscountedCashPrice,OPExpectedReimbursement,OPNegotiatedMaximumPrice,OPNegotiatedMinimumPrice,BillType,blling_class
str,str,str,str,str,str,str,str,str,i64,f64,f64,str,str,str,f64,f64,str,f64,f64,str,str
"""10180""","""CPT""","""10180""","""0761 - TREATME...",,"""INCISION and D...","""PHP [503200005...","""FREEDOM NETWOR...",,1,0.0,0.0,,,,17364.07,3472.814,"""15,092.92""",18361.42,2330.89,"""HB""","""facility"""
"""11401""","""CPT""","""11401""","""0761 - TREATME...",,"""Hp Exc, Ben Le...","""HEALTH NET FED...","""TRICARE [50320...",,1,0.0,0.0,,,,4097.0,819.4,,,,"""HB""","""facility"""
"""11406""","""CPT""","""11406""","""0761 - TREATME...",,"""EXC B9 LESION ...","""AMERICAN HEALT...","""KANSAS HEALTH ...",,1,0.0,0.0,,,,31241.33,6248.266,"""1,452.35""",33283.52,482.72,"""HB""","""facility"""
"""10007""","""CPT""","""10007""",,,"""PR FINE NEEDLE...","""HUMANA MEDICAR...","""HUMANA COMMUNI...",,1,0.0,0.0,,,,39669.51,7933.902,,,,"""HB""","""facility"""
"""10160""","""CPT""","""10160""","""0361 - OPERATI...",,"""HC Fluid Asp (...","""AETNA [5032000...","""AETNA HMO/SELE...",,1,0.0,0.0,,,,4413.0,882.6,"""3,381.00""",3972.6,158.33,"""HB""","""facility"""


We can also extract the revenue code easily:

In [145]:
df = df.with_columns([
    pl.col('RevCode').str.extract('(\d{4})\s-').alias('rev_code')
])

Let's work with just the IP cols.

In [146]:
df_ip = df.select([c for c in df.columns if c not in op_cols])

In [147]:
df_ip = df_ip.with_columns([
    pl.lit('inpatient').alias('patient_class')
])

In [148]:
df_ip.head()

Procedure,CodeType,Code,RevCode,NDC,ProcedureDescription,Payer,Plan(s),IPPricingDetail,Quantity,IPPrice,IPDiscountedCashPrice,IPExpectedReimbursement,IPNegotiatedMaximumPrice,IPNegotiatedMinimumPrice,BillType,blling_class,rev_code,patient_class
str,str,str,str,str,str,str,str,str,i64,f64,f64,str,str,str,str,str,str,str
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""BCBS KC [50320...","""BCBS KC BLUE C...","""FSC: 50401; BF...",1,21375.0,4275.0,,,,"""HB""","""facility""","""0270""","""inpatient"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FIRST HEALTH [...","""FIRST HEALTH P...","""FSC: 50401; BF...",1,21375.0,4275.0,,,,"""HB""","""facility""","""0270""","""inpatient"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""HUMANA MEDICAR...","""HUMANA COMMUNI...","""FSC: 50401; BF...",1,21375.0,4275.0,,,,"""HB""","""facility""","""0270""","""inpatient"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""POINT COMFORT ...","""POINT COMFORT ...","""FSC: 50401; BF...",1,21375.0,4275.0,,,,"""HB""","""facility""","""0270""","""inpatient"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""CIGNA [5032009...","""NFL DEDICATED ...","""FSC: 50401; BF...",1,21375.0,4275.0,,,,"""HB""","""facility""","""0270""","""inpatient"""


Now we need to split up the cash, gross, and min/max rates from the payer rates.

In [149]:
df_ip.columns

['Procedure',
 'CodeType',
 'Code',
 'RevCode',
 'NDC',
 'ProcedureDescription',
 'Payer',
 'Plan(s)',
 'IPPricingDetail',
 'Quantity',
 'IPPrice',
 'IPDiscountedCashPrice',
 'IPExpectedReimbursement',
 'IPNegotiatedMaximumPrice',
 'IPNegotiatedMinimumPrice',
 'BillType',
 'blling_class',
 'rev_code',
 'patient_class']

In [150]:
df_ip_payer = df_ip.select([
    'Procedure',
    'CodeType',
    'Code',
    'RevCode',
    'NDC',
    'ProcedureDescription',
    'IPPricingDetail',
    'Payer',
    'Plan(s)',
    'Quantity',
    'IPExpectedReimbursement',
    'BillType',
    'blling_class',
    'rev_code',
    'patient_class'
])

In [151]:
df_ip_payer.rename({
    'IPExpectedReimbursement':'rate'
}).filter(pl.col('rate').is_not_null())

Procedure,CodeType,Code,RevCode,NDC,ProcedureDescription,IPPricingDetail,Payer,Plan(s),Quantity,rate,BillType,blling_class,rev_code,patient_class
str,str,str,str,str,str,str,str,str,i64,str,str,str,str,str


In [160]:
df_ip_other = df_ip.select([
    'Procedure',
    'CodeType',
    'Code',
    'RevCode',
    'NDC',
    'ProcedureDescription',
    # 'Payer',
    # 'Plan(s)',
    'IPPricingDetail',
    'Quantity',
    'IPPrice',
    'IPDiscountedCashPrice',
    # 'IPExpectedReimbursement',
    'IPNegotiatedMaximumPrice',
    'IPNegotiatedMinimumPrice',
    'BillType',
    'blling_class',
    'rev_code',
    'patient_class'
])

In [161]:
df_ip_other.head()

Procedure,CodeType,Code,RevCode,NDC,ProcedureDescription,IPPricingDetail,Quantity,IPPrice,IPDiscountedCashPrice,IPNegotiatedMaximumPrice,IPNegotiatedMinimumPrice,BillType,blling_class,rev_code,patient_class
str,str,str,str,str,str,str,i64,f64,f64,str,str,str,str,str,str
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,21375.0,4275.0,,,"""HB""","""facility""","""0270""","""inpatient"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,21375.0,4275.0,,,"""HB""","""facility""","""0270""","""inpatient"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,21375.0,4275.0,,,"""HB""","""facility""","""0270""","""inpatient"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,21375.0,4275.0,,,"""HB""","""facility""","""0270""","""inpatient"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,21375.0,4275.0,,,"""HB""","""facility""","""0270""","""inpatient"""


In [162]:
value_vars = ['IPPrice', 'IPDiscountedCashPrice', 'IPNegotiatedMaximumPrice', 'IPNegotiatedMinimumPrice']
df_ip_other = df_ip_other.melt(
    id_vars = [c for c in df_ip_other.columns if c not in value_vars],
    value_vars = value_vars,
    variable_name = 'payer_name',
    value_name = 'rate'
)


In [163]:
df_ip_other.head()

Procedure,CodeType,Code,RevCode,NDC,ProcedureDescription,IPPricingDetail,Quantity,BillType,blling_class,rev_code,patient_class,payer_name,rate
str,str,str,str,str,str,str,i64,str,str,str,str,str,str
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,"""HB""","""facility""","""0270""","""inpatient""","""IPPrice""","""21375.0"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,"""HB""","""facility""","""0270""","""inpatient""","""IPPrice""","""21375.0"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,"""HB""","""facility""","""0270""","""inpatient""","""IPPrice""","""21375.0"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,"""HB""","""facility""","""0270""","""inpatient""","""IPPrice""","""21375.0"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,"""HB""","""facility""","""0270""","""inpatient""","""IPPrice""","""21375.0"""


In [164]:
df_ip_other = df_ip_other.with_columns([
    pl.when(pl.col('payer_name') == "IPPrice").then('gross').otherwise(
        pl.when(pl.col('payer_name') == "IPDiscountedCashPrice").then('cash').otherwise(
            pl.when(pl.col('payer_name') == "IPNegotiatedMaximumPrice").then('max').otherwise(
                pl.when(pl.col('payer_name') == "IPNegotiatedMinimumPrice").then('min')))).alias('payer_category')
])

In [165]:
df_ip_other.sample(5)

Procedure,CodeType,Code,RevCode,NDC,ProcedureDescription,IPPricingDetail,Quantity,BillType,blling_class,rev_code,patient_class,payer_name,rate,payer_category
str,str,str,str,str,str,str,i64,str,str,str,str,str,str,str
"""11012""","""CPT""","""11012""",,,"""Dbrdmt fxand/d...",,1,"""HB""","""facility""",,"""inpatient""","""IPPrice""","""0.0""","""gross"""
"""0398T""","""CPT""","""0398T""",,,"""Mrgfus stereot...",,1,"""HB""","""facility""",,"""inpatient""","""IPDiscountedCa...","""0.0""","""cash"""
"""11401""","""CPT""","""11401""","""0761 - TREATME...",,"""Hp Exc, Ben Le...",,1,"""HB""","""facility""","""0761""","""inpatient""","""IPDiscountedCa...","""0.0""","""cash"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,"""HB""","""facility""","""0270""","""inpatient""","""IPNegotiatedMa...",,"""max"""
"""HCORR32720001""","""EAP""",""" C1750""","""0270 - MEDICAL...",,"""HC 70132 Cresc...","""FSC: 50401; BF...",1,"""HB""","""facility""","""0270""","""inpatient""","""IPNegotiatedMi...",,"""min"""


In [168]:
df_ip_other = df_ip_other.filter(pl.col('rate').is_not_null())

In [169]:
df_ip_other.sample(5)

Procedure,CodeType,Code,RevCode,NDC,ProcedureDescription,IPPricingDetail,Quantity,BillType,blling_class,rev_code,patient_class,payer_name,rate,payer_category
str,str,str,str,str,str,str,i64,str,str,str,str,str,str,str
"""11406""","""CPT""","""11406""","""0761 - TREATME...",,"""EXC B9 LESION ...",,1,"""HB""","""facility""","""0761""","""inpatient""","""IPPrice""","""0.0""","""gross"""
"""11012""","""CPT""","""11012""",,,"""Dbrdmt fxand/d...",,1,"""HB""","""facility""",,"""inpatient""","""IPDiscountedCa...","""0.0""","""cash"""
"""11403""","""CPT""","""11403""","""0761 - TREATME...",,"""Hp Exc, Ben Le...",,1,"""HB""","""facility""","""0761""","""inpatient""","""IPPrice""","""0.0""","""gross"""
"""11043""","""CPT""","""11043""","""0361 - OPERATI...",,"""DEBRIDEMENT MU...",,1,"""HB""","""facility""","""0361""","""inpatient""","""IPDiscountedCa...","""0.0""","""cash"""
"""10160""","""CPT""","""10160""","""0361 - OPERATI...",,"""HC Fluid Asp (...",,1,"""HB""","""facility""","""0361""","""inpatient""","""IPDiscountedCa...","""0.0""","""cash"""
