In [71]:
import pandas as pd

In [341]:
#import csv
df_rx = pd.read_csv('RXPORTE_20170426_114606.csv')
df_prod = pd.read_csv('Product_Export_20170426_114857.csv')

#format dataframes before merging
df_rx['rxid'] = df_rx['Rx Number']
df_rx['date'] = pd.to_datetime(df_rx['Date Written'], format='%m%d%Y')
# df_rx['ndc'] = pd.np.abs(df_rx['Dispensed NDC #'].astype(int))
df_rx['ndc'] = df_rx['Dispensed NDC #']
df_rx['qty'] = df_rx['Refill Qty'].astype(int)
df_rx['days_supply'] = pd.to_numeric(df_rx['Days Supply'],errors='coerce').astype(int)


df_prod['gcsn'] = df_prod['dg-generic-code-seq-nbr']
df_prod['pkg_size'] = df_prod['dg-pkg-size-units']
df_prod['units'] = df_prod['dg-dispensing-units']
df_prod['desc'] = df_prod['dg-description']
df_prod['gen_desc'] = df_prod['dg-description-generic']
df_prod['ppc'] = df_prod['dg-preferred-product-code'].fillna(0)
df_prod['ndc'] = df_prod['dg-ndc']
df_prod['qoh'] = pd.to_numeric(df_prod['dg-qty-on-hand'],errors='coerce').fillna(0).astype(int)
df_prod['reorder_point'] = df_prod["dg-reorder-point"]
df_prod['reorder_qoh_desired'] = df_prod["dg-qty-on-hand-desired"]
df_prod['reorder_qty'] = df_prod["dg-qty-reorder"]


#trim product DF and merge with rx DF
df_all = df_rx.merge(df_prod,how='left',on='ndc')

#trim to relevent columns
df_final = df_all.loc[:,['rxid','date','desc','ndc','qty','days_supply','gcsn','pkg_size','units','gen_desc','ppc','qoh']]
df_final.loc[:,'qty_per_day'] = df_final.loc[:,'qty']/df_final.loc[:,'days_supply']
df_final.dropna(subset=['gcsn','ndc'],inplace=True)

# Perform aggregation

In [256]:
def agg_sum(df,cols,val_col='qty',func=sum):
    return df.groupby(cols + ['date'])[val_col].apply(func).unstack(level=cols).sort_index().sort_index(axis=1)

## First question:

For each item on formulary, i.e. each gcsn with a ppc set to 1 for SOME NDC, what is the:
1. Current preffered product info
    - package size (pkg_size)
    - unit (units)
2. Information on that product/gcsn: 
    - Number of Rx dispensed
    - total units dispense
3. PER RX statistic:
    - max qty
    - mean qty
    - quartiles (25%, 50%, 75%)
    - max qty/days_supply
    - mean qty/days_supply
3. PER CLINIC statistics:
    - Number of Rx dispensed
    - max qty
    - mean qty
    - quartiles (25%, 50%, 75%)

## 1. Prefferred Product info

In [322]:
df_ppc = df_prod.loc[df_prod.groupby('gcsn')['ppc'].idxmax()]
is_ppc_reorder  = ~(pd.isnull(df_prod["dg-qty-on-hand-desired"]) & pd.isnull(df_prod["dg-qty-reorder"])) & (df_prod["ppc"] == 1)
df_ppc = df_ppc.loc[is_ppc_reorder]
df_ppc = df_ppc[['ndc','gcsn','desc','pkg_size','units','reorder_point','reorder_qty','reorder_qoh_desired']]
df_ppc.set_index('gcsn', inplace=True)
df_ppc.columns = pd.MultiIndex.from_tuples([('pp_info',col) for col in df_ppc.columns])
print df_ppc.shape
df_ppc.head()

(254, 4)


Unnamed: 0_level_0,pp_info,pp_info,pp_info,pp_info
Unnamed: 0_level_1,ndc,desc,pkg_size,units
gcsn,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
266.0,68382020000.0,AMIODARONE HCL 200 MG TABLEZYD,500,TA
287.0,50111030000.0,HYDRALAZINE 50 MG TABLET PLI,100,TA
346.0,228212800.0,CLONIDINE HCL 0.1 MG TABLETACT,500,TA
390.0,185061000.0,LISINOPRIL 10 MG TABLET SAN,1000,TA
391.0,185062000.0,LISINOPRIL 20 MG TABLET SAN,1000,TA


## 2. Information on GCSN

In [330]:
cnt = df_final.groupby('gcsn').date.count()
cnt.name = 'rx_count'
cnt=cnt.to_frame()
total = df_final.groupby('gcsn').qty.sum()
total.name = 'tot_units'
total=total.to_frame()
qoh = df_prod.groupby('gcsn').qoh.sum()
qoh.name = 'qoh'
qoh=qoh.to_frame()
df_gcsn_agg = cnt.join(total,how='outer')
df_gcsn_agg = df_gcsn_agg.join(qoh,how='outer').fillna(0).sort_values('rx_count')

df_gcsn_agg.columns = pd.MultiIndex.from_tuples([('gcsn_totals',col) for col in df_gcsn_agg.columns])
df_gcsn_agg.head()

Unnamed: 0_level_0,gcsn_totals,gcsn_totals,gcsn_totals
Unnamed: 0_level_1,rx_count,tot_units,qoh
gcsn,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
23725.0,0.0,0.0,2
7284.0,0.0,0.0,15
57803.0,0.0,0.0,0
57802.0,0.0,0.0,0
33515.0,0.0,0.0,0


## 3. Per RX statistics
- max qty
- mean qty
- quartiles (25%, 50%, 75%)
- max qty/days_supply
- mean qty/days_supply

In [338]:
df_per_rx = df_final.groupby(['gcsn'])['qty','qty_per_day'].describe().unstack(-1)
to_keep = [('qty','mean'),('qty','25%'),('qty','50%'),('qty','75%'),('qty','max'),('qty_per_day','mean'),('qty_per_day','max')]
df_per_rx = df_per_rx[to_keep]
df_per_rx.columns.set_levels(levels=['units_per_rx','rx_units_per_day'], level=0,inplace=True)
df_per_rx.head()

Unnamed: 0_level_0,units_per_rx,units_per_rx,units_per_rx,units_per_rx,units_per_rx,rx_units_per_day,rx_units_per_day
Unnamed: 0_level_1,mean,25%,50%,75%,max,mean,max
gcsn,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
266.0,60.0,30.0,60.0,90.0,90.0,1.0,1.0
346.0,180.0,180.0,180.0,180.0,180.0,2.0,2.0
390.0,71.029412,54.75,90.0,90.0,200.0,1.014706,2.0
391.0,77.941176,49.25,90.0,90.0,180.0,1.103268,2.0
392.0,73.126761,60.0,90.0,90.0,150.0,1.0,1.0


## 4. Per CLINIC statistics

- Number of Rx dispensed
- max qty
- mean qty
- quartiles (25%, 50%, 75%)

In [332]:
df_pc_cnt = df_final.groupby(['gcsn','date'])['qty'].count().unstack(-1).T.describe().T[['mean','max']]
df_pc_cnt.columns = ['rx_cnt_mean','rx_cnt_max']
df_pc_cnt.head()

Unnamed: 0_level_0,rx_cnt_mean,rx_cnt_max
gcsn,Unnamed: 1_level_1,Unnamed: 2_level_1
266.0,1.333333,2.0
346.0,1.0,1.0
390.0,1.307692,3.0
391.0,1.837838,6.0
392.0,1.690476,5.0


In [333]:
df_pc_rx = df_final.groupby(['gcsn','date'])['qty'].sum().unstack(-1).T.describe().T
df_pc_rx.drop('count',inplace=True,axis=1)
df_pc_rx.head()

Unnamed: 0_level_0,mean,std,min,25%,50%,75%,max
gcsn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
266.0,80.0,17.320508,60.0,75.0,90.0,90.0,90.0
346.0,180.0,0.0,180.0,180.0,180.0,180.0,180.0
390.0,92.884615,56.782622,7.0,60.0,90.0,90.0,260.0
391.0,143.243243,80.066433,7.0,90.0,120.0,180.0,300.0
392.0,123.619048,75.776936,7.0,90.0,90.0,180.0,280.0


In [334]:
df_per_clinic = df_pc_rx.join(df_pc_cnt)
df_per_clinic.columns = pd.MultiIndex.from_tuples([('per_clinic',col) for col in df_per_clinic.columns])
df_per_clinic.head()

Unnamed: 0_level_0,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic
Unnamed: 0_level_1,mean,std,min,25%,50%,75%,max,rx_cnt_mean,rx_cnt_max
gcsn,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
266.0,80.0,17.320508,60.0,75.0,90.0,90.0,90.0,1.333333,2.0
346.0,180.0,0.0,180.0,180.0,180.0,180.0,180.0,1.0,1.0
390.0,92.884615,56.782622,7.0,60.0,90.0,90.0,260.0,1.307692,3.0
391.0,143.243243,80.066433,7.0,90.0,120.0,180.0,300.0,1.837838,6.0
392.0,123.619048,75.776936,7.0,90.0,90.0,180.0,280.0,1.690476,5.0


In [339]:
df_rx_info = df_ppc
df_rx_info = df_rx_info.join(df_gcsn_agg,how='left')
df_rx_info = df_rx_info.join(df_per_rx,how='left')
df_rx_info = df_rx_info.join(df_per_clinic,how='left')
df_rx_info.head()

Unnamed: 0,gcsn,266.0,287.0,346.0,390.0,391.0
pp_info,ndc,6.8382e+10,5.0111e+10,2.28213e+08,1.85061e+08,1.85062e+08
pp_info,desc,AMIODARONE HCL 200 MG TABLEZYD,HYDRALAZINE 50 MG TABLET PLI,CLONIDINE HCL 0.1 MG TABLETACT,LISINOPRIL 10 MG TABLET SAN,LISINOPRIL 20 MG TABLET SAN
pp_info,pkg_size,500,100,500,1000,1000
pp_info,units,TA,TA,TA,TA,TA
gcsn_totals,rx_count,4,0,2,34,68
gcsn_totals,tot_units,240,0,360,2415,5300
gcsn_totals,qoh,90,500,160,2410,3217
units_per_rx,mean,60,,180,71.0294,77.9412
units_per_rx,25%,30,,180,54.75,49.25
units_per_rx,50%,60,,180,90,90


In [340]:
df_rx_info.to_excel

Unnamed: 0_level_0,pp_info,pp_info,pp_info,pp_info,gcsn_totals,gcsn_totals,gcsn_totals,units_per_rx,units_per_rx,units_per_rx,...,rx_units_per_day,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic,per_clinic
Unnamed: 0_level_1,ndc,desc,pkg_size,units,rx_count,tot_units,qoh,mean,25%,50%,...,max,mean,std,min,25%,50%,75%,max,rx_cnt_mean,rx_cnt_max
gcsn,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
266.0,6.838202e+10,AMIODARONE HCL 200 MG TABLEZYD,500,TA,4.0,240.0,90,60.000000,30.00,60.0,...,1.000000,80.000000,17.320508,60.0,75.00,90.0,90.00,90.0,1.333333,2.0
287.0,5.011103e+10,HYDRALAZINE 50 MG TABLET PLI,100,TA,0.0,0.0,500,,,,...,,,,,,,,,,
346.0,2.282128e+08,CLONIDINE HCL 0.1 MG TABLETACT,500,TA,2.0,360.0,160,180.000000,180.00,180.0,...,2.000000,180.000000,0.000000,180.0,180.00,180.0,180.00,180.0,1.000000,1.0
390.0,1.850610e+08,LISINOPRIL 10 MG TABLET SAN,1000,TA,34.0,2415.0,2410,71.029412,54.75,90.0,...,2.000000,92.884615,56.782622,7.0,60.00,90.0,90.00,260.0,1.307692,3.0
391.0,1.850620e+08,LISINOPRIL 20 MG TABLET SAN,1000,TA,68.0,5300.0,3217,77.941176,49.25,90.0,...,2.000000,143.243243,80.066433,7.0,90.00,120.0,180.00,300.0,1.837838,6.0
392.0,3.782076e+08,LISINOPRIL 40 MG TABLET MYL,500,TA,71.0,5192.0,749,73.126761,60.00,90.0,...,1.000000,123.619048,75.776936,7.0,90.00,90.0,180.00,280.0,1.690476,5.0
393.0,1.855400e+08,LISINOPRIL 5 MG TABLET SAN,1000,TA,31.0,2406.0,1577,77.612903,30.00,90.0,...,3.000000,150.375000,169.712256,15.0,60.00,90.0,180.00,690.0,1.937500,7.0
475.0,4.359804e+10,NITROGLYCERIN 0.4 MG TABLETDR.,100,TA,13.0,1300.0,400,100.000000,100.00,100.0,...,3.333333,130.000000,67.494856,100.0,100.00,100.0,100.00,300.0,1.300000,3.0
564.0,5.910345e+08,VERAPAMIL 120 MG TABLET WAT,500,TA,0.0,0.0,685,,,,...,,,,,,,,,,
566.0,5.910343e+08,VERAPAMIL 80 MG TABLET WAT,1000,TA,0.0,0.0,100,,,,...,,,,,,,,,,
