In [376]:
import pandas as pd
import numpy as np

### Set Period

In [377]:
curr_month = (pd.to_datetime(('2024-03-01'))).strftime('%Y-%m-%d')
prior_month = (pd.to_datetime(curr_month) - pd.DateOffset(months=12)).strftime('%Y-%m-%d')
stop_date = (pd.to_datetime(curr_month) + pd.DateOffset(months=1) - pd.DateOffset(days=1)).strftime('%Y-%m-%d')
start_date = (pd.to_datetime(stop_date) - pd.DateOffset(months=12) + pd.DateOffset(days=1) ).strftime('%Y-%m-%d')
start_date_prior = (pd.to_datetime(start_date) - pd.DateOffset(months=12)).strftime('%Y-%m-%d')
stop_date_prior = (pd.to_datetime(stop_date) - pd.DateOffset(months=12)).strftime('%Y-%m-%d')

In [378]:
claims_current = pd.read_csv(f'Claims Data/claims_{curr_month.replace("-", "_")}.csv')
claims_prior = pd.read_csv(f'Claims Data/claims_{prior_month.replace("-", "_")}.csv')
elig_current = pd.read_csv(f'Elig Data/elig_{curr_month.replace("-", "_")}.csv')
elig_prior = pd.read_csv(f'Elig Data/elig_{prior_month.replace("-", "_")}.csv')
monthsdata = pd.read_csv(f'Elig Data/MonthsData_{curr_month.replace("-", "_")}.csv')

df_current = claims_current.copy()
df_prior = claims_prior.copy()
mm_current = elig_current.copy()
mm_prior = elig_prior.copy()
monthsdata = monthsdata.copy()

In [379]:
# Categorize Others
# Categorizes specific groups into an "Others" category
Others = ['Null',
          'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified',
          'Factors influencing health status and contact with health services',
          'Diseases of the skin and subcutaneous tissue',
          'External causes of morbidity',
          'Diseases of the ear and mastoid process',
          'Diseases of the eye and adnexa']
df_current.loc[df_current['group'].isin(Others), 'group'] = 'Others'
df_prior.loc[df_prior['group'].isin(Others), 'group'] = 'Others'

In [380]:
# Data Preprocessing
df_mm = pd.concat([mm_prior, mm_current])
df_events = pd.concat([df_prior, df_current])

df_mm['year'] = df_mm['year'].astype(str)
df_events['year'] = df_events['year'].astype(str)
df_mm = df_mm.rename(columns={'total_member_count': 'total_member_count'})

# Indexing
df_events['index'] = df_events['year'] + "-" + df_events['class']
df_events['index0'] = df_events['class'] + "-" + df_events['group']
df_events['index1'] = df_events['class'] + "-" + df_events['group'] + "-" + df_events['service_type']
df_mm['index'] = df_mm['year'] + "-" + df_mm['tenantid']

monthsdata.rename(columns={'division': 'tenantid'}, inplace=True)


In [381]:
# CREATION OF COMPARISON TABLES
df_current.rename(columns={'tag_tpa': 'carrier', 'tag_exchange':'exchange'}, inplace=True)
mm_current.loc[:, 'carrier'] = mm_current['carrier'].replace({'BCBS FL': 'bcbsfl', 'BCBS NC': 'bcbsnc'})
mm_current['carrier'] = mm_current['carrier'].str.lower()
monthsdata.loc[:, 'carrier'] = monthsdata['carrier'].replace({'BCBS FL': 'bcbsfl', 'BCBS NC': 'bcbsnc'})
monthsdata['carrier'] = monthsdata['carrier'].str.lower()
pmpm_agg_c = pd.merge(df_current, mm_current, how='inner', on=['tenantid', 'carrier', 'exchange', 'year', 'start_date', 'stop_date'])
pmpm_agg_c = pd.merge(pmpm_agg_c, monthsdata, how='inner', on=['tenantid','carrier', 'exchange'])
pmpm_agg_c = pmpm_agg_c[pmpm_agg_c['MaxMonth'] >= pmpm_agg_c['curr_month']]
pmpm_agg_c = pmpm_agg_c[pmpm_agg_c['months'] >= 30]


df_current_final = pmpm_agg_c

df_prior.rename(columns={'tag_tpa': 'carrier', 'tag_exchange':'exchange'}, inplace=True)
mm_prior.loc[:, 'carrier'] = mm_prior['carrier'].replace({'BCBS FL': 'bcbsfl', 'BCBS NC': 'bcbsnc'})
mm_prior['carrier'] = mm_prior['carrier'].str.lower()
tenantlist = pmpm_agg_c['tenantid'].unique()
pmpm_agg_p = tenantlist.tolist()
pmpm_agg_p = pd.DataFrame(pmpm_agg_p, columns=['tenantid'])
pmpm_agg_p = pd.merge(pmpm_agg_p, df_prior, how='inner', on=['tenantid'])
pmpm_agg_p = pd.merge(pmpm_agg_p, mm_prior, how='inner', on=['tenantid', 'carrier', 'exchange', 'year', 'start_date', 'stop_date'])
pmpm_agg_p = pd.merge(pmpm_agg_p, monthsdata, how='inner', on=['tenantid','carrier', 'exchange'])


# df_prior_final = pmpm_agg_p

pmpm_agg_c

Unnamed: 0,tenantid,carrier,exchange,class,group,service_type,counts,allowed,paid,claimants,...,stop_employee_count,mm_months,ee_months,total_rate,paying_rate,admin_expense,pooling_expense,other_expense,months,MaxMonth
0,100F-New York City Industrial T,aetna,TriNet III,Acute,acute_NonER,Office Visit,24,4756.85,3856.59,5,...,6,169,70,229120.60,228733.00,4647.16,1604.10,412.22,37,2024-03-01
1,100F-New York City Industrial T,aetna,TriNet III,Acute,acute_NonER,Outpatient,9,741.76,495.14,5,...,6,169,70,229120.60,228733.00,4647.16,1604.10,412.22,37,2024-03-01
2,100F-New York City Industrial T,aetna,TriNet III,Acute,acute_NonER,Rx,26,1008.79,855.06,5,...,6,169,70,229120.60,228733.00,4647.16,1604.10,412.22,37,2024-03-01
3,100F-New York City Industrial T,aetna,TriNet III,Chronic Condition,CKD complex,Office Visit,12,1624.76,1102.83,1,...,6,169,70,229120.60,228733.00,4647.16,1604.10,412.22,37,2024-03-01
4,100F-New York City Industrial T,aetna,TriNet III,Chronic Condition,CKD complex,Outpatient,7,3346.45,3323.98,1,...,6,169,70,229120.60,228733.00,4647.16,1604.10,412.22,37,2024-03-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396975,"ZZY-Monzo, Inc",bsca,TriNet III,Chronic Condition,metabolic only,Rx,2,1.79,0.00,1,...,9,218,118,162288.54,162108.00,7640.04,6477.84,97.96,37,2024-03-01
396976,"ZZY-Monzo, Inc",kaiser,TriNet III,Acute,acute_NonER,Office Visit,1,63.00,63.00,1,...,2,25,25,9445.00,9061.13,0.00,0.00,0.00,37,2024-03-01
396977,"ZZY-Monzo, Inc",kaiser,TriNet III,Chronic Condition,diabetes only,Office Visit,7,741.50,686.50,1,...,2,25,25,9445.00,9061.13,0.00,0.00,0.00,37,2024-03-01
396978,"ZZY-Monzo, Inc",kaiser,TriNet III,Chronic Condition,diabetes only,Outpatient,5,500.18,484.05,1,...,2,25,25,9445.00,9061.13,0.00,0.00,0.00,37,2024-03-01


In [382]:
# # Current BoB Build

bob_current = pd.DataFrame(pmpm_agg_c.groupby(['tenantid','carrier', 'exchange']).agg({'mm_months': 'max', 'ee_months': 'max', 'allowed':'sum', 'paid': 'sum', 'claimants': 'sum', 'admin_expense':'max', 'pooling_expense': 'max', 'other_expense': 'max', 'total_rate': 'max'})).reset_index()
bob_current['admin'] = bob_current['admin_expense'] + bob_current['pooling_expense'] + bob_current['other_expense']
bob_current.drop(['admin_expense', 'pooling_expense', 'other_expense'], axis=1, inplace=True)

bob_prior = pd.DataFrame(pmpm_agg_p.groupby(['tenantid','carrier', 'exchange']).agg({'mm_months': 'max', 'ee_months': 'max', 'allowed':'sum', 'paid': 'sum', 'claimants':'sum', 'admin_expense':'max', 'pooling_expense': 'max', 'other_expense': 'max', 'total_rate': 'max'})).reset_index()
bob_prior['admin'] = bob_prior['admin_expense'] + bob_prior['pooling_expense'] + bob_prior['other_expense']
bob_prior.drop(['admin_expense', 'pooling_expense', 'other_expense'], axis=1, inplace=True)

pmpm_paid_bob = round((bob_current.paid.sum() ) / bob_current.mm_months.sum(),2) # Calculate BoB PMPM
pmpm_admin_bob = round((bob_current.admin.sum()) / bob_current.mm_months.sum(),2) # Calculate BoB PMPM
pmpm_bob = pmpm_paid_bob + pmpm_admin_bob
pepm_paid_bob = round((bob_current.paid.sum()) / bob_current.ee_months.sum(),2) # Calculate BoB PMPM
pepm_admin_bob = round((bob_current.admin.sum()) / bob_current.ee_months.sum(),2) # Calculate BoB PMPM
pepm_bob = pepm_paid_bob + pepm_admin_bob
TCR_paid_bob = round((bob_current.paid.sum()) / bob_current.total_rate.sum(), 2) # Calculate BoB PMPM
TCR_admin_bob = round((bob_current.admin.sum()) / bob_current.total_rate.sum(), 2) # Calculate BoB PMPM
TCR_bob = round((bob_current.paid.sum() + bob_current.admin.sum()) / bob_current.total_rate.sum(), 2) # Calculate BoB PMPM

# Prior BoB Build
p_pmpm_paid_bob = round((bob_prior.paid.sum() ) / bob_prior.mm_months.sum(),2) # Calculate BoB PMPM
p_pmpm_admin_bob = round((bob_prior.admin.sum()) / bob_prior.mm_months.sum(),2) # Calculate BoB PMPM
p_pmpm_bob = p_pmpm_paid_bob + p_pmpm_admin_bob
p_pepm_paid_bob = round((bob_prior.paid.sum()) / bob_prior.ee_months.sum(),2) # Calculate BoB PMPM
p_pepm_admin_bob = round((bob_prior.admin.sum()) / bob_prior.ee_months.sum(),2) # Calculate BoB PMPM
p_pepm_bob = p_pepm_paid_bob + p_pepm_admin_bob
p_TCR_paid_bob = round((bob_prior.paid.sum()) / bob_prior.total_rate.sum(), 2) # Calculate BoB PMPM
p_TCR_admin_bob = round((bob_prior.admin.sum()) / bob_prior.total_rate.sum(), 2) # Calculate BoB PMPM
p_TCR_bob = round((bob_prior.paid.sum() + bob_prior.admin.sum()) / bob_prior.total_rate.sum(), 2) # Calculate BoB PMPM

summary = {'pmpm_allowed_bob': [pmpm_bob],
            'pmpm_allowed_bob_p': [p_pmpm_bob],
            'pmpm_paid_bob': [pmpm_paid_bob],
            'pmpm_paid_bob_p': [p_pmpm_paid_bob],
            'pepm_allowed_bob': [pepm_bob],
            'pepm_allowed_bob_p': [p_pepm_bob],
            'pepm_paid_bob': [pepm_paid_bob],
            'pepm_paid_bob_p': [p_pepm_paid_bob],
            'TCR_allowed_bob': [TCR_bob],
            'TCR_allowed_bob_p': [p_TCR_bob],
            'TCR_paid_bob': [TCR_paid_bob],
            'TCR_paid_bob_p': [p_TCR_paid_bob]}
summary = pd.DataFrame(summary)

print(summary)

# filename = f'Outputs/BookOfBusiness_{curr_month}.csv'

# summary.to_csv(filename, index=False)

# print(f"Output saved to {filename}.")


   pmpm_allowed_bob  pmpm_allowed_bob_p  pmpm_paid_bob  pmpm_paid_bob_p  \
0            626.39              576.88         579.69            531.4   

   pepm_allowed_bob  pepm_allowed_bob_p  pepm_paid_bob  pepm_paid_bob_p  \
0           1256.71              1153.8        1163.01          1062.84   

   TCR_allowed_bob  TCR_allowed_bob_p  TCR_paid_bob  TCR_paid_bob_p  
0             0.88               0.82          0.82            0.75  


### Statements

In [383]:
dmca_claims_elig_c = pmpm_agg_c.copy()

non_recurring = ['Pregnancy, childbirth and the puerperium', 'Certain conditions originating in the perinatal period', 'Certain infectious and parasitic diseases', 'Injury, poisoning and certain other consequences of external causes']
non_recurring_c = dmca_claims_elig_c[dmca_claims_elig_c['group'].isin(non_recurring)]
non_recurring_c = non_recurring_c.groupby(['tenantid', 'carrier', 'exchange']).agg({'allowed': 'sum','paid': 'sum'}).reset_index()
non_recurring_c.rename(columns={'allowed': 'non_recurring_allowed', 'paid': 'non_recurring_paid'}, inplace=True)
transplants_c = pd.read_csv(f'Claims Data/Transplants_{curr_month.replace("-", "_")}.csv')
transplants_c = transplants_c.groupby(['tenantid', 'carrier', 'exchange']).agg({'amtallowed': 'sum', 'amtpaid':'sum'}).reset_index()
transplants_c.rename(columns={'amtallowed': 'non_recurring_allowed', 'amtpaid': 'non_recurring_paid'}, inplace=True)
non_recurring_c = pd.concat([non_recurring_c, transplants_c])
non_recurring_c = non_recurring_c.groupby(['tenantid', 'carrier', 'exchange']).agg({'non_recurring_allowed': 'sum', 'non_recurring_paid': 'sum'}).reset_index()

dmca_claims_elig_c = dmca_claims_elig_c.groupby(['tenantid', 'carrier', 'exchange','year', 'start_date', 'stop_date']).agg({
    'allowed': 'sum', 
    'paid': 'sum', 
    'start_member_count':'first', 
    'stop_member_count':'first', 
    'start_employee_count': 'first', 
    'stop_employee_count': 'first', 
    'mm_months':'first', 
    'ee_months': 'first', 
    'total_rate': 'first', 
    'admin_expense': 'first',
    'pooling_expense': 'first',
    'other_expense': 'first',
}).reset_index()

dmca_claims_elig_c = pd.merge(dmca_claims_elig_c, non_recurring_c, how='left', on=['tenantid', 'carrier', 'exchange'])
dmca_claims_elig_c['non_recurring_allowed'].fillna(0, inplace=True)
dmca_claims_elig_c['non_recurring_paid'].fillna(0, inplace=True)
dmca_claims_elig_c['allowed_adj'] = (dmca_claims_elig_c['allowed'] - dmca_claims_elig_c['non_recurring_allowed']).clip(lower=0)
dmca_claims_elig_c['paid_adj'] = (dmca_claims_elig_c['paid'] - dmca_claims_elig_c['non_recurring_paid']).clip(lower=0)

dmca_claims_elig_c['admin'] = dmca_claims_elig_c['admin_expense'] + dmca_claims_elig_c['pooling_expense'] + dmca_claims_elig_c['other_expense']
dmca_claims_elig_c.drop(['admin_expense', 'pooling_expense', 'other_expense'], axis=1, inplace=True)
dmca_claims_elig_c['avg_mbrs'] = dmca_claims_elig_c['mm_months'] / 12
dmca_claims_elig_c['turnover'] = 1 - abs((dmca_claims_elig_c['stop_member_count'] - dmca_claims_elig_c['start_member_count']) / dmca_claims_elig_c['start_member_count']).clip(upper=1)
dmca_claims_elig_c['credibility'] = (0.25*dmca_claims_elig_c['turnover'] + (dmca_claims_elig_c['avg_mbrs'] - dmca_claims_elig_c['turnover'])*0.01)/(1+(dmca_claims_elig_c['avg_mbrs'] - 1)*0.01)

dmca_claims_elig_p = pmpm_agg_p.copy()

non_recurring_p = dmca_claims_elig_p[dmca_claims_elig_p['group'].isin(non_recurring)]
non_recurring_p = non_recurring_p.groupby(['tenantid', 'carrier', 'exchange']).agg({'allowed': 'sum','paid': 'sum'}).reset_index()
non_recurring_p.rename(columns={'allowed': 'non_recurring_allowed', 'paid': 'non_recurring_paid'}, inplace=True)
transplants_p = pd.read_csv(f'Claims Data/Transplants_{prior_month.replace("-", "_")}.csv')
transplants_p = transplants_p.groupby(['tenantid', 'carrier', 'exchange']).agg({'amtallowed': 'sum', 'amtpaid':'sum'}).reset_index()
transplants_p.rename(columns={'amtallowed': 'non_recurring_allowed', 'amtpaid': 'non_recurring_paid'}, inplace=True)
non_recurring_p = pd.concat([non_recurring_p, transplants_p])
non_recurring_p = non_recurring_p.groupby(['tenantid', 'carrier', 'exchange']).agg({'non_recurring_allowed': 'sum', 'non_recurring_paid': 'sum'}).reset_index()

dmca_claims_elig_p = dmca_claims_elig_p.groupby(['tenantid', 'carrier', 'exchange','year', 'start_date', 'stop_date']).agg({
    'allowed': 'sum', 
    'paid': 'sum', 
    'start_member_count':'first', 
    'stop_member_count':'first', 
    'start_employee_count': 'first', 
    'stop_employee_count': 'first', 
    'mm_months':'first', 
    'ee_months': 'first', 
    'total_rate': 'first', 
    'admin_expense': 'first',
    'pooling_expense': 'first',
    'other_expense': 'first',
}).reset_index()

dmca_claims_elig_p = pd.merge(dmca_claims_elig_p, non_recurring_p, how='left', on=['tenantid', 'carrier', 'exchange'])
dmca_claims_elig_p['non_recurring_allowed'].fillna(0, inplace=True)
dmca_claims_elig_p['non_recurring_paid'].fillna(0, inplace=True)
dmca_claims_elig_p['allowed_adj'] = (dmca_claims_elig_p['allowed'] - dmca_claims_elig_p['non_recurring_allowed']).clip(lower=0)
dmca_claims_elig_p['paid_adj'] = (dmca_claims_elig_p['paid'] - dmca_claims_elig_p['non_recurring_paid']).clip(lower=0)

dmca_claims_elig_p['admin'] = dmca_claims_elig_p['admin_expense'] + dmca_claims_elig_p['pooling_expense'] + dmca_claims_elig_p['other_expense']
dmca_claims_elig_p.drop(['admin_expense', 'pooling_expense', 'other_expense'], axis=1, inplace=True)
dmca_claims_elig_p['avg_mbrs'] = dmca_claims_elig_p['mm_months'] / 12
dmca_claims_elig_p['turnover'] = 1 - abs((dmca_claims_elig_p['stop_member_count'] - dmca_claims_elig_p['start_member_count']) / dmca_claims_elig_p['start_member_count']).clip(upper=1)
dmca_claims_elig_p['credibility'] = (0.25*dmca_claims_elig_p['turnover'] + (dmca_claims_elig_p['avg_mbrs'] - dmca_claims_elig_p['turnover'])*0.01)/(1+(dmca_claims_elig_p['avg_mbrs'] - 1)*0.01)

dmca_claims_elig = pd.merge(dmca_claims_elig_c, dmca_claims_elig_p, how='outer', on=['tenantid', 'carrier', 'exchange'], suffixes=('_c', '_p'))
dmca_claims_elig['year_c'] = pd.to_datetime(curr_month).strftime('%Y')
dmca_claims_elig['year_p'] = pd.to_datetime(prior_month).strftime('%Y')
dmca_claims_elig['start_date_c'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
dmca_claims_elig['start_date_p'] = pd.to_datetime(start_date_prior).strftime('%Y-%m-%d')
dmca_claims_elig['stop_date_c'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')
dmca_claims_elig['stop_date_p'] = pd.to_datetime(stop_date_prior).strftime('%Y-%m-%d')
dmca_claims_elig.fillna(0, inplace=True)

dmca_claims_elig['pmpm_c'] = dmca_claims_elig['paid_adj_c'] / dmca_claims_elig['mm_months_c']
dmca_claims_elig['pmpm_p'] = dmca_claims_elig['paid_adj_p'] / dmca_claims_elig['mm_months_p']
dmca_claims_elig['bob_pmpm_c'] = pmpm_paid_bob
dmca_claims_elig['bob_pmpm_p'] = p_pmpm_paid_bob
dmca_claims_elig['bob_pmpm_trend'] = dmca_claims_elig['bob_pmpm_c'] / dmca_claims_elig['bob_pmpm_p'] - 1
dmca_claims_elig['current_cred_pmpm'] = (dmca_claims_elig['pmpm_c'] * dmca_claims_elig['credibility_c'] + dmca_claims_elig['bob_pmpm_c']* (1-dmca_claims_elig['credibility_c']))
dmca_claims_elig['prior_cred_pmpm'] = (dmca_claims_elig['pmpm_p'] * dmca_claims_elig['credibility_p'] + dmca_claims_elig['bob_pmpm_p']* (1-dmca_claims_elig['credibility_p']))
dmca_claims_elig['cred_trend'] = dmca_claims_elig['current_cred_pmpm']/dmca_claims_elig['prior_cred_pmpm'] - 1
dmca_claims_elig['potential_loss'] = round((dmca_claims_elig['paid_adj_c'].clip(lower=0) * (1-((dmca_claims_elig['bob_pmpm_trend'] + 1) / (dmca_claims_elig['cred_trend'] + 1)))),2)
dmca_claims_elig['potential_loss_pct'] = (dmca_claims_elig['potential_loss']/dmca_claims_elig['paid_adj_c'].clip(lower=0)).clip(lower=-0.5)
dmca_claims_elig['potential_loss_pct'].fillna(0, inplace=True)

#Risk Index
dmca_claims_elig['loss_pct_std_dev'] = np.std(dmca_claims_elig.potential_loss_pct.clip(lower=0))
dmca_claims_elig['pct_std_dev_ratio'] = dmca_claims_elig.potential_loss_pct/dmca_claims_elig.loss_pct_std_dev
dmca_claims_elig['loss_dol_std_dev'] = np.std(dmca_claims_elig.potential_loss.clip(lower=0))
dmca_claims_elig['dol_std_dev_ratio'] = dmca_claims_elig.potential_loss/dmca_claims_elig.loss_dol_std_dev
dmca_claims_elig['ratio_avg'] = (dmca_claims_elig.dol_std_dev_ratio + dmca_claims_elig.pct_std_dev_ratio) / 2
dmca_claims_elig['risk_index_setup'] = (2.5 + 1*dmca_claims_elig.ratio_avg).clip(upper=5)
dmca_claims_elig['risk_index'] = round(dmca_claims_elig.risk_index_setup.clip(lower=1), 1)
dmca_claims_elig.sort_values(by=['risk_index', 'potential_loss'], ascending=[False, False], inplace=True)
dmca_claims_elig['rank'] = dmca_claims_elig.reset_index().index + 1

dmca_claims_elig.drop(['pmpm_c', 'pmpm_p', 'credibility_c', 'credibility_p', 'avg_mbrs_c', 'avg_mbrs_p', 'turnover_c', 'turnover_p', 'loss_pct_std_dev', 'pct_std_dev_ratio', 'loss_dol_std_dev', 'dol_std_dev_ratio', 'ratio_avg', 'risk_index_setup', 'current_cred_pmpm', 'prior_cred_pmpm', 'cred_trend', 'potential_loss_pct'], axis=1, inplace=True)
# dmca_claims_elig = dmca_claims_elig[dmca_claims_elig['tenantid'] == 'U9J-SOPHiA GENETICS, Inc']
dmca_claims_elig

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dmca_claims_elig_c['non_recurring_allowed'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dmca_claims_elig_c['non_recurring_paid'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediat

Unnamed: 0,tenantid,carrier,exchange,year_c,start_date_c,stop_date_c,allowed_c,paid_c,start_member_count_c,stop_member_count_c,...,non_recurring_paid_p,allowed_adj_p,paid_adj_p,admin_p,bob_pmpm_c,bob_pmpm_p,bob_pmpm_trend,potential_loss,risk_index,rank
3684,"15N2-Axsome Therapeutics, Inc",empire,TriNet III,2024,2023-04-01,2024-03-31,9985703.73,9058972.85,922.0,1324.0,...,0.00,4314938.24,3900804.90,423890.54,579.69,531.4,0.090873,4278629.49,5.0,1
6625,GHS-Braeburn Inc,uhc,TriNet IV,2024,2023-04-01,2024-03-31,3529813.66,3304515.78,138.0,541.0,...,0.00,472042.10,443568.30,45023.03,579.69,531.4,0.090873,2206546.23,5.0,2
12302,"VEQ-Nutcracker Therapeutics, Inc",bsca,TriNet III,2024,2023-04-01,2024-03-31,3894482.03,3775488.29,120.0,92.0,...,0.00,1311530.07,1228347.68,72615.94,579.69,531.4,0.090873,2158432.24,5.0,3
5833,D7P-Zapier,bsca,TriNet III,2024,2023-04-01,2024-03-31,4055926.86,3687573.14,309.0,290.0,...,330465.95,1442941.98,1211776.58,196568.40,579.69,531.4,0.090873,2085711.29,5.0,4
9578,"OEG-Centiva Capital, LP",uhc,TriNet IV,2024,2023-04-01,2024-03-31,4938673.21,4528693.60,456.0,543.0,...,0.00,2227215.27,2016314.21,231660.38,579.69,531.4,0.090873,1867806.22,5.0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14997,"ZYX-PrivCo Media, LLC",bsca,TriNet III,2024,2023-04-01,2024-03-31,0.00,0.00,0.0,0.0,...,0.00,1.29,0.00,657.84,579.69,531.4,0.090873,,,15020
14998,"ZYX-PrivCo Media, LLC",kaiser,TriNet III,2024,2023-04-01,2024-03-31,0.00,0.00,0.0,0.0,...,0.00,30937.71,30524.31,0.00,579.69,531.4,0.090873,,,15021
15008,"ZZ5-Ace3 Group, LLC",kaiser,TriNet III,2024,2023-04-01,2024-03-31,0.00,0.00,0.0,0.0,...,0.00,835.17,835.17,0.00,579.69,531.4,0.090873,,,15022
15011,ZZ7-Surfside Solutions Inc,kaiser,TriNet III,2024,2023-04-01,2024-03-31,0.00,0.00,0.0,0.0,...,0.00,11992.42,11694.19,0.00,579.69,531.4,0.090873,,,15023


In [384]:

dmca_claims_elig_tenant_c = dmca_claims_elig_c.groupby(['tenantid','year', 'start_date', 'stop_date']).agg({
    'allowed': 'sum', 
    'paid': 'sum', 
    'start_member_count':'sum', 
    'stop_member_count':'sum', 
    'start_employee_count': 'sum', 
    'stop_employee_count': 'sum', 
    'mm_months':'sum', 
    'ee_months': 'sum', 
    'total_rate': 'sum', 
    'admin': 'sum',
    'non_recurring_allowed': 'sum',
    'non_recurring_paid': 'sum',
    'allowed_adj': 'sum',
    'paid_adj': 'sum',
}).reset_index()

dmca_claims_elig_tenant_c['avg_mbrs'] = dmca_claims_elig_tenant_c['mm_months'] / 12
dmca_claims_elig_tenant_c['turnover'] = 1 - abs((dmca_claims_elig_tenant_c['stop_member_count'] - dmca_claims_elig_tenant_c['start_member_count']) / dmca_claims_elig_tenant_c['start_member_count']).clip(upper=1)
dmca_claims_elig_tenant_c['credibility'] = (0.25*dmca_claims_elig_tenant_c['turnover'] + (dmca_claims_elig_tenant_c['avg_mbrs'] - dmca_claims_elig_tenant_c['turnover'])*0.01)/(1+(dmca_claims_elig_tenant_c['avg_mbrs'] - 1)*0.01)

dmca_claims_elig_tenant_p = dmca_claims_elig_p.groupby(['tenantid','year', 'start_date', 'stop_date']).agg({
    'allowed': 'sum', 
    'paid': 'sum', 
    'start_member_count':'sum', 
    'stop_member_count':'sum', 
    'start_employee_count': 'sum', 
    'stop_employee_count': 'sum', 
    'mm_months':'sum', 
    'ee_months': 'sum', 
    'total_rate': 'sum', 
    'admin': 'sum',
    'non_recurring_allowed': 'sum',
    'non_recurring_paid': 'sum',
    'allowed_adj': 'sum',
    'paid_adj': 'sum',

}).reset_index()


dmca_claims_elig_tenant_p['avg_mbrs'] = dmca_claims_elig_tenant_p['mm_months'] / 12
dmca_claims_elig_tenant_p['turnover'] = 1 - abs((dmca_claims_elig_tenant_p['stop_member_count'] - dmca_claims_elig_tenant_p['start_member_count']) / dmca_claims_elig_tenant_p['start_member_count']).clip(upper=1)
dmca_claims_elig_tenant_p['credibility'] = (0.25*dmca_claims_elig_tenant_p['turnover'] + (dmca_claims_elig_tenant_p['avg_mbrs'] - dmca_claims_elig_tenant_p['turnover'])*0.01)/(1+(dmca_claims_elig_tenant_p['avg_mbrs'] - 1)*0.01)

dmca_claims_elig_tenant = pd.merge(dmca_claims_elig_tenant_c, dmca_claims_elig_tenant_p, how='outer', on=['tenantid'], suffixes=('_c', '_p'))
dmca_claims_elig_tenant['year_c'] = pd.to_datetime(curr_month).strftime('%Y')
dmca_claims_elig_tenant['year_p'] = pd.to_datetime(prior_month).strftime('%Y')
dmca_claims_elig_tenant['start_date_c'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
dmca_claims_elig_tenant['start_date_p'] = pd.to_datetime(start_date_prior).strftime('%Y-%m-%d')
dmca_claims_elig_tenant['stop_date_c'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')
dmca_claims_elig_tenant['stop_date_p'] = pd.to_datetime(stop_date_prior).strftime('%Y-%m-%d')
dmca_claims_elig_tenant.fillna(0, inplace=True)

dmca_claims_elig_tenant['pmpm_c'] = dmca_claims_elig_tenant['paid_adj_c'] / dmca_claims_elig_tenant['mm_months_c']
dmca_claims_elig_tenant['pmpm_p'] = dmca_claims_elig_tenant['paid_adj_p'] / dmca_claims_elig_tenant['mm_months_p']
dmca_claims_elig_tenant['bob_pmpm_c'] = pmpm_paid_bob
dmca_claims_elig_tenant['bob_pmpm_p'] = p_pmpm_paid_bob
dmca_claims_elig_tenant['bob_pmpm_trend'] = dmca_claims_elig_tenant['bob_pmpm_c'] / dmca_claims_elig_tenant['bob_pmpm_p'] - 1
dmca_claims_elig_tenant['current_cred_pmpm'] = (dmca_claims_elig_tenant['pmpm_c'] * dmca_claims_elig_tenant['credibility_c'] + dmca_claims_elig_tenant['bob_pmpm_c']* (1-dmca_claims_elig_tenant['credibility_c']))
dmca_claims_elig_tenant['prior_cred_pmpm'] = (dmca_claims_elig_tenant['pmpm_p'] * dmca_claims_elig_tenant['credibility_p'] + dmca_claims_elig_tenant['bob_pmpm_p']* (1-dmca_claims_elig_tenant['credibility_p']))
dmca_claims_elig_tenant['cred_trend'] = dmca_claims_elig_tenant['current_cred_pmpm']/dmca_claims_elig_tenant['prior_cred_pmpm'] - 1
dmca_claims_elig_tenant['potential_loss'] = round((dmca_claims_elig_tenant['paid_adj_c'].clip(lower=0) * (1-((dmca_claims_elig_tenant['bob_pmpm_trend'] + 1) / (dmca_claims_elig_tenant['cred_trend'] + 1)))),2)
dmca_claims_elig_tenant['potential_loss_pct'] = (dmca_claims_elig_tenant['potential_loss']/dmca_claims_elig_tenant['paid_adj_c'].clip(lower=0)).clip(lower=-0.5)
dmca_claims_elig_tenant['potential_loss_pct'].fillna(0, inplace=True)

#Risk Index
dmca_claims_elig_tenant['loss_pct_std_dev'] = np.std(dmca_claims_elig_tenant.potential_loss_pct.clip(lower=0))
dmca_claims_elig_tenant['pct_std_dev_ratio'] = dmca_claims_elig_tenant.potential_loss_pct/dmca_claims_elig_tenant.loss_pct_std_dev
dmca_claims_elig_tenant['loss_dol_std_dev'] = np.std(dmca_claims_elig_tenant.potential_loss.clip(lower=0))
dmca_claims_elig_tenant['dol_std_dev_ratio'] = dmca_claims_elig_tenant.potential_loss/dmca_claims_elig_tenant.loss_dol_std_dev
dmca_claims_elig_tenant['ratio_avg'] = (dmca_claims_elig_tenant.dol_std_dev_ratio + dmca_claims_elig_tenant.pct_std_dev_ratio) / 2
dmca_claims_elig_tenant['risk_index_setup'] = (2.5 + 1*dmca_claims_elig_tenant.ratio_avg).clip(upper=5)
dmca_claims_elig_tenant['risk_index'] = round(dmca_claims_elig_tenant.risk_index_setup.clip(lower=1), 1)
dmca_claims_elig_tenant.sort_values(by=['risk_index', 'potential_loss'], ascending=[False, False], inplace=True)
dmca_claims_elig_tenant['rank'] = dmca_claims_elig_tenant.reset_index().index + 1

dmca_claims_elig_tenant.drop(['pmpm_c', 'pmpm_p', 'credibility_c', 'credibility_p', 'avg_mbrs_c', 'avg_mbrs_p', 'turnover_c', 'turnover_p', 'loss_pct_std_dev', 'pct_std_dev_ratio', 'loss_dol_std_dev', 'dol_std_dev_ratio', 'ratio_avg', 'risk_index_setup', 'current_cred_pmpm', 'prior_cred_pmpm', 'cred_trend', 'potential_loss_pct'], axis=1, inplace=True)

dmca_claims_elig_tenant = dmca_claims_elig_tenant[['tenantid', 'risk_index', 'rank']]

# dmca_claims_elig_tenant = dmca_claims_elig_tenant[dmca_claims_elig_tenant['tenantid'] == 'U9J-SOPHiA GENETICS, Inc']
dmca_claims_elig_tenant


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dmca_claims_elig_tenant['potential_loss_pct'].fillna(0, inplace=True)


Unnamed: 0,tenantid,risk_index,rank
2225,"15N2-Axsome Therapeutics, Inc",5.0,1
3393,D7P-Zapier,5.0,2
7490,"VEQ-Nutcracker Therapeutics, Inc",5.0,3
3918,GHS-Braeburn Inc,5.0,4
2771,"7A9-Viking River Cruises, Inc",5.0,5
...,...,...,...
5741,OD6-AAFMC LLC,,9237
5746,ODJ-Startle International Inc,,9238
6015,PBF-WJC LLC,,9239
6588,SQN-Alpha Entertainment Group Amer,,9240


In [385]:
final = pd.merge(dmca_claims_elig, dmca_claims_elig_tenant, how='left', on=['tenantid'], suffixes=('_carrier', '_tenant'))
final['business_risk_tenant'] = final['risk_index_tenant'].apply(lambda x: 'High' if x >= 4.0 else 'Medium' if x >= 2.5 else 'Low')
final['business_risk_carrier'] = final['risk_index_carrier'].apply(lambda x: 'High' if x >= 4.0 else 'Medium' if x >= 2.5 else 'Low')
final['group_size'] = final['stop_member_count_c'].apply(lambda x: '<100' if x < 100 else '100-500' if x < 500 else '500+')
final['group_size_tenant'] = final.groupby(['tenantid'])['stop_member_count_c'].transform('sum').apply(lambda x: '<100' if x < 100 else '100-500' if x < 500 else '500+')
final.sort_values(by=['risk_index_tenant', 'potential_loss'], ascending=[False, False], inplace=True)

final.to_csv(f'dmca_claims_elig_{curr_month.replace("-", "_")}.csv', index=False)
final

Unnamed: 0,tenantid,carrier,exchange,year_c,start_date_c,stop_date_c,allowed_c,paid_c,start_member_count_c,stop_member_count_c,...,bob_pmpm_trend,potential_loss,risk_index_carrier,rank_carrier,risk_index_tenant,rank_tenant,business_risk_tenant,business_risk_carrier,group_size,group_size_tenant
0,"15N2-Axsome Therapeutics, Inc",empire,TriNet III,2024,2023-04-01,2024-03-31,9985703.73,9058972.85,922.0,1324.0,...,0.090873,4278629.49,5.0,1,5.0,1,High,High,500+,500+
1,GHS-Braeburn Inc,uhc,TriNet IV,2024,2023-04-01,2024-03-31,3529813.66,3304515.78,138.0,541.0,...,0.090873,2206546.23,5.0,2,5.0,4,High,High,500+,500+
2,"VEQ-Nutcracker Therapeutics, Inc",bsca,TriNet III,2024,2023-04-01,2024-03-31,3894482.03,3775488.29,120.0,92.0,...,0.090873,2158432.24,5.0,3,5.0,3,High,High,<100,100-500
3,D7P-Zapier,bsca,TriNet III,2024,2023-04-01,2024-03-31,4055926.86,3687573.14,309.0,290.0,...,0.090873,2085711.29,5.0,4,5.0,2,High,High,100-500,500+
4,"OEG-Centiva Capital, LP",uhc,TriNet IV,2024,2023-04-01,2024-03-31,4938673.21,4528693.60,456.0,543.0,...,0.090873,1867806.22,5.0,5,5.0,6,High,High,500+,500+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14357,OD6-AAFMC LLC,kaiser,TriNet III,2024,2023-04-01,2024-03-31,493.67,430.33,1.0,1.0,...,0.090873,,,14358,,9237,Low,Low,<100,<100
14360,ODJ-Startle International Inc,aetna,TriNet III,2024,2023-04-01,2024-03-31,0.01,0.01,1.0,1.0,...,0.090873,,,14361,,9238,Low,Low,<100,<100
14421,PBF-WJC LLC,uhc,TriNet IV,2024,2023-04-01,2024-03-31,562.36,10.00,1.0,1.0,...,0.090873,,,14422,,9239,Low,Low,<100,<100
14505,SQN-Alpha Entertainment Group Amer,kaiser,TriNet III,2024,2023-04-01,2024-03-31,2114.45,2105.00,1.0,1.0,...,0.090873,,,14506,,9240,Low,Low,<100,<100


### Level 1

In [386]:
direction_dict_1 = {'True': 'increased', 'False': 'decreased'}
direction_dict_2 = {'True': 'above', 'False': 'below'}
direction_dict_3 = {'True': 'higher', 'False': 'lower'}
direction_dict_4 = {'True': 'an excess', 'False': 'a reduced'}
metric_dict_1 = {'per1000_change': 'per 1000 rate', 'pmpm_change': 'PMPM'}
class_dict_1 = {'Catastrophic': 'catastrophic', 'Acute': 'acute', 'Chronic Condition': 'chronic'}
group_count_dict_1 = {'1': 'specifically', '2': 'including'}

In [387]:
pmpm_agg_tenant_c = bob_current.copy()
pmpm_agg_tenant_c = pmpm_agg_tenant_c.groupby(['tenantid']).agg({'paid': 'sum', 'admin': 'sum', 'mm_months': 'sum', 'ee_months': 'sum', 'total_rate': 'sum'}).reset_index()
pmpm_agg_tenant_c['pmpm_c'] = (pmpm_agg_tenant_c['paid'] + pmpm_agg_tenant_c['admin']) / pmpm_agg_tenant_c['mm_months'] # Current PMPM by tenant
pmpm_agg_tenant_c['pepm_c'] = (pmpm_agg_tenant_c['paid'] + pmpm_agg_tenant_c['admin']) / pmpm_agg_tenant_c['ee_months'] # Current PEPM by tenant
pmpm_agg_tenant_c['TCR_c'] = (pmpm_agg_tenant_c['paid'] + pmpm_agg_tenant_c['admin']) / pmpm_agg_tenant_c['total_rate'] # Current TCR by tenant
pmpm_agg_tenant_c = pmpm_agg_tenant_c[['tenantid', 'paid', 'mm_months', 'pmpm_c', 'pepm_c', 'TCR_c']]

pmpm_agg_tenant_p = bob_prior.copy()
pmpm_agg_tenant_p = pmpm_agg_tenant_p.groupby(['tenantid']).agg({'paid': 'sum', 'admin': 'sum', 'mm_months': 'sum', 'ee_months': 'sum', 'total_rate': 'sum'}).reset_index()
pmpm_agg_tenant_p['pmpm_p'] = (pmpm_agg_tenant_p['paid'] + pmpm_agg_tenant_p['admin']) / pmpm_agg_tenant_p['mm_months'] # Current PMPM by tenant
pmpm_agg_tenant_p['pepm_p'] = (pmpm_agg_tenant_p['paid'] + pmpm_agg_tenant_p['admin']) / pmpm_agg_tenant_p['ee_months'] # Current PEPM by tenant
pmpm_agg_tenant_p['TCR_p'] = (pmpm_agg_tenant_p['paid'] + pmpm_agg_tenant_p['admin']) / pmpm_agg_tenant_p['total_rate'] # Current TCR by tenant
pmpm_agg_tenant_p = pmpm_agg_tenant_p[['tenantid', 'paid', 'mm_months', 'pmpm_p', 'pepm_p', 'TCR_p']]

pmpm_agg_tenant = pd.merge(pmpm_agg_tenant_c, pmpm_agg_tenant_p, how='outer', on=['tenantid'], suffixes=('_c','_p')) # Merge current and prior pmpm df
pmpm_agg_tenant.fillna(0, inplace=True)
pmpm_agg_tenant['pmpm_change_t'] = pmpm_agg_tenant['pmpm_c'] / pmpm_agg_tenant['pmpm_p'] -1 # Calculate pmpm change by tenant
pmpm_agg_tenant['pepm_change_t'] = pmpm_agg_tenant['pepm_c'] / pmpm_agg_tenant['pepm_p'] -1 # Calculate pepm change by tenant
pmpm_agg_tenant['TCR_change_t'] = pmpm_agg_tenant['TCR_c'] / pmpm_agg_tenant['TCR_p'] -1 # Calculate TCR change by tenant

pmpm_agg_tenant['members_change_t'] = pmpm_agg_tenant['mm_months_c'] / pmpm_agg_tenant['mm_months_p'] -1 # Calculate member change by tenant
pmpm_agg_tenant['direction'] = pmpm_agg_tenant['pmpm_change_t'] > 0 # Determines if the PMPM has increased or decreased compared to prior period
pmpm_agg_tenant.replace([np.inf, -np.inf], 1, inplace=True)


pmpm_agg_tenant

Unnamed: 0,tenantid,paid_c,mm_months_c,pmpm_c,pepm_c,TCR_c,paid_p,mm_months_p,pmpm_p,pepm_p,TCR_p,pmpm_change_t,pepm_change_t,TCR_change_t,members_change_t,direction
0,100F-New York City Industrial T,50918.66,169,340.722722,822.602000,0.251318,40503.92,169.0,285.387988,689.008143,0.210503,0.193893,0.193893,0.193893,0.000000,True
1,100G-Iron Park Capital Partners,322751.36,134,2451.061119,7638.190465,2.636878,580462.90,134.0,4705.620896,14664.027907,5.062358,-0.479121,-0.479121,-0.479121,0.000000,False
2,100J-Tamares Management LLC,89116.89,259,393.225985,1060.890938,0.391184,68060.01,259.0,313.375946,845.462187,0.311749,0.254806,0.254806,0.254806,0.000000,True
3,100M-Wellness Insight Technologies,705.30,32,96.075625,153.721000,0.136036,9039.73,32.0,397.245625,635.593000,0.562472,-0.758146,-0.758146,-0.758146,0.000000,False
4,"100P-Walking Fish Therapeutics, I",707329.40,762,965.018373,1950.514589,0.808073,260370.97,659.0,422.333460,835.789039,0.350732,1.284968,1.333740,1.303964,0.156297,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9236,ZZJ-Atlantic Pictures LLC,86514.87,371,289.099650,491.999862,0.609832,65244.50,371.0,226.984825,386.290688,0.478806,0.273652,0.273652,0.273652,0.000000,True
9237,ZZK-Altman Inc,121860.44,60,2052.776333,10263.881667,6.196200,11782.60,60.0,231.027167,1155.135833,0.697344,7.885433,7.885433,7.885433,0.000000,True
9238,ZZM-Contextant LLC June 19,24101.28,169,171.625621,644.549556,0.543089,10460.81,169.0,81.851834,307.399111,0.259010,1.096784,1.096784,1.096784,0.000000,True
9239,"ZZN-Vibliome Operations, LLC",58204.51,216,319.756065,697.649596,0.421775,35450.30,216.0,215.548426,470.287475,0.284320,0.483453,0.483453,0.483453,0.000000,True


In [388]:
# Dynamic statement build
pmpm_agg_tenant['direction_pmpm_bob'] = pmpm_agg_tenant.pmpm_c > pmpm_bob # Determines if the PMPM is above or below the bob
pmpm_agg_tenant['direction_pepm_bob'] = pmpm_agg_tenant.pepm_c > pepm_bob # Determines if the PMPM is above or below the bob
pmpm_agg_tenant['direction_TCR_bob'] = pmpm_agg_tenant.TCR_c > TCR_bob # Determines if the PMPM is above or below the bob
pmpm_agg_tenant['pmpm_change_t'] = (round(pmpm_agg_tenant['pmpm_change_t']*100)).astype(str).str[:-2] + "%" # Convert the pmpm_change_t to string percentage
pmpm_agg_tenant['pepm_change_t'] = (round(pmpm_agg_tenant['pepm_change_t']*100)).astype(str).str[:-2] + "%" # Convert the pmpm_change_t to string percentage
pmpm_agg_tenant['TCR_change_t'] = (round(pmpm_agg_tenant['TCR_change_t']*100)).astype(str).str[:-2] + "%" # Convert the pmpm_change_t to string percentage
pmpm_agg_tenant['pmpm_to_bob'] = round((pmpm_agg_tenant.pmpm_c/pmpm_bob -1)*100).astype(str).str[:-2] + "%" # Convert the pmpm_to_bob to string percentage
pmpm_agg_tenant['pepm_to_bob'] = round((pmpm_agg_tenant.pepm_c/pepm_bob -1)*100).astype(str).str[:-2] + "%" # Convert the pmpm_to_bob to string percentage
pmpm_agg_tenant['TCR_to_bob'] = round((pmpm_agg_tenant.TCR_c/TCR_bob -1)*100).astype(str).str[:-2] + "%" # Convert the pmpm_to_bob to string percentage
pmpm_agg_tenant['l1_statement_pmpm_1'] = ("On a paid date basis, " + pmpm_agg_tenant['tenantid'] + "'s paid PMPM " + pmpm_agg_tenant['direction'].astype(str).map(direction_dict_1) + " " + pmpm_agg_tenant['pmpm_change_t'] + " YoY, from " + "$" + round(pmpm_agg_tenant['pmpm_p']).astype(str).str[:-2] + " to $" + round(pmpm_agg_tenant['pmpm_c']).astype(str).str[:-2])
pmpm_agg_tenant['l1_statement_pmpm_2'] = ("Compared to benchmarks, PMPMs are " + pmpm_agg_tenant.pmpm_to_bob + " " + pmpm_agg_tenant['direction_pmpm_bob'].astype(str).map(direction_dict_3) +  " than average.")
pmpm_agg_tenant['l1_statement_pepm_1'] = ("On a paid date basis, " + pmpm_agg_tenant['tenantid'] + "'s paid PEPM " + pmpm_agg_tenant['direction'].astype(str).map(direction_dict_1) + " " + pmpm_agg_tenant['pepm_change_t'] + " YoY, from " + "$" + round(pmpm_agg_tenant['pepm_p']).astype(str).str[:-2] + " to $" + round(pmpm_agg_tenant['pepm_c']).astype(str).str[:-2])
pmpm_agg_tenant['l1_statement_pepm_2'] = ("Compared to benchmarks, PEPMs are " + pmpm_agg_tenant.pepm_to_bob + " " + pmpm_agg_tenant['direction_pepm_bob'].astype(str).map(direction_dict_3) +  " than average.")
pmpm_agg_tenant['l1_statement_TCR_1'] = ("On a paid date basis, " + pmpm_agg_tenant['tenantid'] + "'s TCR " + pmpm_agg_tenant['direction'].astype(str).map(direction_dict_1) + " " + pmpm_agg_tenant['TCR_change_t'] + " YoY, from " + "$" + round(pmpm_agg_tenant['pmpm_p']).astype(str).str[:-2] + " to $" + round(pmpm_agg_tenant['pmpm_c']).astype(str).str[:-2])
pmpm_agg_tenant['l1_statement_TCR_2'] = ("Compared to benchmarks, TCR is " + pmpm_agg_tenant.pmpm_to_bob + " " + pmpm_agg_tenant['direction_TCR_bob'].astype(str).map(direction_dict_3) +  " than average.")

pmpm_agg_tenant

Unnamed: 0,tenantid,paid_c,mm_months_c,pmpm_c,pepm_c,TCR_c,paid_p,mm_months_p,pmpm_p,pepm_p,...,direction_TCR_bob,pmpm_to_bob,pepm_to_bob,TCR_to_bob,l1_statement_pmpm_1,l1_statement_pmpm_2,l1_statement_pepm_1,l1_statement_pepm_2,l1_statement_TCR_1,l1_statement_TCR_2
0,100F-New York City Industrial T,50918.66,169,340.722722,822.602000,0.251318,40503.92,169.0,285.387988,689.008143,...,False,-46%,-35%,-71%,"On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, PMPMs are -46% lower t...","On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, PEPMs are -35% lower t...","On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, TCR is -46% lower than..."
1,100G-Iron Park Capital Partners,322751.36,134,2451.061119,7638.190465,2.636878,580462.90,134.0,4705.620896,14664.027907,...,True,291%,508%,200%,"On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, PMPMs are 291% higher ...","On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, PEPMs are 508% higher ...","On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, TCR is 291% higher tha..."
2,100J-Tamares Management LLC,89116.89,259,393.225985,1060.890938,0.391184,68060.01,259.0,313.375946,845.462187,...,False,-37%,-16%,-56%,"On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, PMPMs are -37% lower t...","On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, PEPMs are -16% lower t...","On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, TCR is -37% lower than..."
3,100M-Wellness Insight Technologies,705.30,32,96.075625,153.721000,0.136036,9039.73,32.0,397.245625,635.593000,...,False,-85%,-88%,-85%,"On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, PMPMs are -85% lower t...","On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, PEPMs are -88% lower t...","On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, TCR is -85% lower than..."
4,"100P-Walking Fish Therapeutics, I",707329.40,762,965.018373,1950.514589,0.808073,260370.97,659.0,422.333460,835.789039,...,False,54%,55%,-8%,"On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, PMPMs are 54% higher t...","On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, PEPMs are 55% higher t...","On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, TCR is 54% lower than ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9236,ZZJ-Atlantic Pictures LLC,86514.87,371,289.099650,491.999862,0.609832,65244.50,371.0,226.984825,386.290688,...,False,-54%,-61%,-31%,"On a paid date basis, ZZJ-Atlantic Pictures LL...","Compared to benchmarks, PMPMs are -54% lower t...","On a paid date basis, ZZJ-Atlantic Pictures LL...","Compared to benchmarks, PEPMs are -61% lower t...","On a paid date basis, ZZJ-Atlantic Pictures LL...","Compared to benchmarks, TCR is -54% lower than..."
9237,ZZK-Altman Inc,121860.44,60,2052.776333,10263.881667,6.196200,11782.60,60.0,231.027167,1155.135833,...,True,228%,717%,604%,"On a paid date basis, ZZK-Altman Inc's paid PM...","Compared to benchmarks, PMPMs are 228% higher ...","On a paid date basis, ZZK-Altman Inc's paid PE...","Compared to benchmarks, PEPMs are 717% higher ...","On a paid date basis, ZZK-Altman Inc's TCR inc...","Compared to benchmarks, TCR is 228% higher tha..."
9238,ZZM-Contextant LLC June 19,24101.28,169,171.625621,644.549556,0.543089,10460.81,169.0,81.851834,307.399111,...,False,-73%,-49%,-38%,"On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, PMPMs are -73% lower t...","On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, PEPMs are -49% lower t...","On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, TCR is -73% lower than..."
9239,"ZZN-Vibliome Operations, LLC",58204.51,216,319.756065,697.649596,0.421775,35450.30,216.0,215.548426,470.287475,...,False,-49%,-44%,-52%,"On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, PMPMs are -49% lower t...","On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, PEPMs are -44% lower t...","On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, TCR is -49% lower than..."


### Level 2

In [389]:
# BoB PMPMs by Class
pmpm_class_bob_c = pd.DataFrame(pmpm_agg_c.groupby(['class'])['paid'].sum()).reset_index()
pmpm_class_members_c = pmpm_agg_tenant_c[['tenantid', 'mm_months']]
pmpm_class_members_p = pmpm_agg_tenant_c[['tenantid', 'mm_months']]
# current member count
pmpm_class_bob_c['members_c'] = pmpm_class_members_c['mm_months'].sum()
pmpm_class_bob_c['bob_class_pmpm'] = pmpm_class_bob_c['paid'] / pmpm_class_bob_c['members_c']
pmpm_class_bob_c = pmpm_class_bob_c.iloc[:, [0, 3]]

# BoB Util by Class
per1000_class_c = pd.DataFrame(pmpm_agg_c.groupby(['class']).agg({'counts': 'sum'})).reset_index()
per1000_class_claimants_c = (pd.DataFrame(pmpm_agg_c.groupby(['tenantid', 'class']).agg({'claimants': 'first'})).reset_index().
                             groupby(['class']).agg({'claimants': 'sum'}).reset_index())
per1000_class_c = pd.merge(per1000_class_c, per1000_class_claimants_c, how='left', on=['class'])  # Merge
per1000_class_c['members_c'] = pmpm_class_members_c['mm_months'].sum()
per1000_class_c['per1000claims_bob'] = per1000_class_c['counts'] / (per1000_class_c['members_c'] / 1000)
per1000_class_c['per1000claimants_bob'] = per1000_class_c['claimants'] / (per1000_class_c['members_c'] / 1000)
per1000_class_c = per1000_class_c.iloc[:, [0, 4, 5]]

# LEVEL 2 Comparison Table
pmpm_agg_class_c = pd.DataFrame(pmpm_agg_c.groupby(['tenantid', 'class']).agg({'paid': 'sum', 'claimants': 'first', 'counts': 'sum'})).reset_index()
pmpm_agg_class_c = pd.merge(pmpm_agg_class_c, pmpm_class_members_c, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_class_c['pmpm'] = pmpm_agg_class_c['paid'] / pmpm_agg_class_c['mm_months']
pmpm_agg_class_c['per1000claims'] = pmpm_agg_class_c['counts'] / (pmpm_agg_class_c['mm_months'] / 1000)
pmpm_agg_class_c['per1000claimants'] = pmpm_agg_class_c['claimants'] / (pmpm_agg_class_c['mm_months'] / 1000)
total_member_count_c = pmpm_class_members_c['mm_months'].sum()

pmpm_agg_class_p = pd.DataFrame(pmpm_agg_p.groupby(['tenantid', 'class']).agg({'paid': 'sum', 'claimants': 'first', 'counts': 'sum'})).reset_index()
pmpm_agg_class_p = pd.merge(pmpm_agg_class_p, pmpm_class_members_p, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_class_p['pmpm'] = pmpm_agg_class_p['paid'] / pmpm_agg_class_p['mm_months']
pmpm_agg_class_p['per1000claims'] = pmpm_agg_class_p['counts'] / (pmpm_agg_class_p['mm_months'] / 1000)
pmpm_agg_class_p['per1000claimants'] = pmpm_agg_class_p['claimants'] / (pmpm_agg_class_p['mm_months'] / 1000)
total_member_count_p = pmpm_class_members_p['mm_months'].sum()

pmpm_agg_class = pd.merge(pmpm_agg_class_c, pmpm_agg_class_p, how='outer', on=['tenantid', 'class'], suffixes=('_c', '_p'))  # Merge current and prior pmpm df
pmpm_agg_class = pd.merge(pmpm_agg_class, per1000_class_c, how='left', on=['class'])  # Merge current and prior pmpm df

# Create % Change Field
pmpm_agg_class.fillna(0, inplace=True)
pmpm_agg_class['pmpm_change'] = pmpm_agg_class['pmpm_c'] / pmpm_agg_class['pmpm_p'] - 1
pmpm_agg_class['per1000claims_change'] = pmpm_agg_class['per1000claims_c'] / pmpm_agg_class['per1000claims_p'] - 1
pmpm_agg_class['per1000claimants_change'] = pmpm_agg_class['per1000claimants_c'] / pmpm_agg_class['per1000claimants_p'] - 1
pmpm_agg_class['paid_diff'] = pmpm_agg_class['paid_c'] - pmpm_agg_class['paid_p']

pmpm_agg_class['year'] = pd.to_datetime(curr_month).strftime('%Y')
pmpm_agg_class['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
pmpm_agg_class['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')

pmpm_agg_class.to_csv(f'Outputs/Level_22_{curr_month.replace("-", "_")}.csv', index=False)
pmpm_agg_class

Unnamed: 0,tenantid,class,paid_c,claimants_c,counts_c,mm_months_c,pmpm_c,per1000claims_c,per1000claimants_c,paid_p,...,per1000claimants_p,per1000claims_bob,per1000claimants_bob,pmpm_change,per1000claims_change,per1000claimants_change,paid_diff,year,start_date,stop_date
0,100F-New York City Industrial T,Acute,5206.79,5.0,59.0,169.0,30.809408,349.112426,29.585799,25091.18,...,71.005917,494.146424,9.944256,-0.792485,-0.584507,-0.583333,-19884.39,2024,2023-04-01,2024-03-31
1,100F-New York City Industrial T,Chronic Condition,45711.87,1.0,412.0,169.0,270.484438,2437.869822,5.917160,15412.74,...,5.917160,1353.889105,3.508767,1.965850,0.971292,0.000000,30299.13,2024,2023-04-01,2024-03-31
2,100G-Iron Park Capital Partners,Acute,101487.01,1.0,123.0,134.0,757.365746,917.910448,7.462687,85055.66,...,283.582090,494.146424,9.944256,0.193183,-0.741597,-0.973684,16431.35,2024,2023-04-01,2024-03-31
3,100G-Iron Park Capital Partners,Chronic Condition,221264.35,1.0,355.0,134.0,1651.226493,2649.253731,7.462687,495407.24,...,7.462687,1353.889105,3.508767,-0.553369,-0.760620,0.000000,-274142.89,2024,2023-04-01,2024-03-31
4,100J-Tamares Management LLC,Acute,29816.19,12.0,112.0,259.0,115.120425,432.432432,46.332046,12185.98,...,42.471042,494.146424,9.944256,1.446762,-0.170370,0.090909,17630.21,2024,2023-04-01,2024-03-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20525,ZZM-Contextant LLC June 19,Chronic Condition,14358.48,1.0,85.0,169.0,84.961420,502.958580,5.917160,2916.01,...,5.917160,1353.889105,3.508767,3.924016,2.269231,0.000000,11442.47,2024,2023-04-01,2024-03-31
20526,"ZZN-Vibliome Operations, LLC",Acute,41330.10,1.0,121.0,216.0,191.343056,560.185185,4.629630,14332.54,...,69.444444,494.146424,9.944256,1.883655,0.315217,-0.933333,26997.56,2024,2023-04-01,2024-03-31
20527,"ZZN-Vibliome Operations, LLC",Chronic Condition,16874.41,1.0,162.0,216.0,78.122269,750.000000,4.629630,21117.76,...,4.629630,1353.889105,3.508767,-0.200938,0.361345,0.000000,-4243.35,2024,2023-04-01,2024-03-31
20528,"ZZY-Monzo, Inc",Acute,227523.30,2.0,420.0,500.0,455.046600,840.000000,4.000000,47463.65,...,2.000000,494.146424,9.944256,3.793633,0.640625,1.000000,180059.65,2024,2023-04-01,2024-03-31


In [390]:
# Create table to determine largest driver
pmpm_agg_class_skinny = pmpm_agg_class.iloc[:, [0, 1, 21, 6, 13, 18, 7, 14,19, 8, 15,20]]

pmpm_agg_class_skinny = pd.DataFrame(pmpm_agg_class_skinny.set_index(['tenantid', 'class', 'paid_diff', 'pmpm_c', 'pmpm_p', 'per1000claims_c', 'per1000claims_p', 'per1000claimants_c', 'per1000claimants_p']).stack()).rename(columns={0: "per_change"}).reset_index()
pmpm_agg_class_skinny['per_change'] = pmpm_agg_class_skinny.per_change.replace([np.inf, -np.inf], 1)  # This replaces per_change Infinity instances to 100% change

pmpm_agg_class_skinny = pmpm_agg_class_skinny.sort_values(['tenantid', 'paid_diff', 'per_change'],ascending=[True, False, False]).groupby(['tenantid', 'class']).head()
pmpm_agg_class_skinny.rename(columns={'level_9': 'level_2'}, inplace=True)
pmpm_agg_class_skinny = pmpm_agg_tenant[['tenantid', 'direction']].merge(pmpm_agg_class_skinny,how='outer')  # Pull in overall PMPM trend for sorting

pmpm_agg_class_skinny['sort'] = pmpm_agg_class_skinny.sort_values(['tenantid', 'paid_diff', 'per_change'], ascending=[True, False, False]).groupby(['tenantid']).cumcount() + 1  # Partition and create row numbers for sorting based on tenant's overall pmpm trend
pmpm_agg_class_skinny['sort2'] = np.where(pmpm_agg_class_skinny['direction'] == False, (-pmpm_agg_class_skinny['sort']), pmpm_agg_class_skinny['sort'])  # If direction = false, then make negative
pmpm_agg_class_skinny = pmpm_agg_class_skinny.sort_values(['tenantid', 'sort2'], ascending=True)  # Sort by ascending
pmpm_agg_class_skinny = pmpm_agg_class_skinny.groupby(['tenantid', 'class']).nth([0])  # Filter for first row only per partition
pmpm_agg_class_skinny = pd.merge(pmpm_agg_class_skinny, pmpm_class_bob_c, how='left', on='class')
pmpm_agg_class_skinny = pd.merge(pmpm_agg_class_skinny, per1000_class_c, how='left', on='class')
pmpm_agg_class_skinny_1 = pmpm_agg_class_skinny.groupby(['tenantid']).nth(0)  # Filter for first row only per partition
pmpm_agg_class_skinny_2 = pmpm_agg_class_skinny.groupby(['tenantid']).nth(1)  # Filter for first row only per partition
pmpm_agg_class_skinny_3 = pmpm_agg_class_skinny.groupby(['tenantid']).nth(2)  # Filter for first row only per partition
pmpm_agg_class_skinny_1 = pmpm_agg_class_skinny_1.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11]]  # Clean up
pmpm_agg_class_skinny_2 = pmpm_agg_class_skinny_2.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11]]  # Clean up
pmpm_agg_class_skinny_3 = pmpm_agg_class_skinny_3.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11]]  # Clean up
pmpm_agg_class = pd.merge(pmpm_agg_class_skinny_1, pmpm_agg_class_skinny_2, how='left', on=['tenantid'], suffixes=('_1', '_2'))  # Final table creation
pmpm_agg_class_skinny_3 = pmpm_agg_class_skinny_3.add_suffix('_3')
pmpm_agg_class_skinny_3.rename(columns={'tenantid_3': 'tenantid', 'carrier_3': 'carrier', 'exchange_3': 'exchange'}, inplace=True)
pmpm_agg_class = pd.merge(pmpm_agg_class, pmpm_agg_class_skinny_3, how='left', on=['tenantid'])
pmpm_agg_class.fillna(0, inplace=True)

pmpm_agg_class

Unnamed: 0,tenantid,direction_1,class_1,paid_diff_1,pmpm_c_1,pmpm_p_1,per1000claims_c_1,per1000claims_p_1,per1000claimants_c_1,per1000claimants_p_1,...,class_3,paid_diff_3,pmpm_c_3,pmpm_p_3,per1000claims_c_3,per1000claims_p_3,per1000claimants_c_3,per1000claimants_p_3,level_2_3,per_change_3
0,100F-New York City Industrial T,True,Chronic Condition,30299.13,270.484438,91.199645,2437.869822,1236.686391,5.917160,5.917160,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
1,100G-Iron Park Capital Partners,False,Chronic Condition,-274142.89,1651.226493,3697.068955,2649.253731,11067.164179,7.462687,7.462687,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
2,100J-Tamares Management LLC,True,Acute,17630.21,115.120425,47.050116,432.432432,521.235521,46.332046,42.471042,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
3,100M-Wellness Insight Technologies,False,Chronic Condition,-7468.65,0.000000,233.395312,0.000000,1843.750000,0.000000,31.250000,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
4,"100P-Walking Fish Therapeutics, I",True,Chronic Condition,188634.72,470.638031,223.085906,1253.280840,1090.551181,1.312336,1.312336,...,Acute,115395.06,270.045367,118.608281,666.666667,931.758530,7.874016,30.183727,pmpm_change,1.276783
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9236,ZZJ-Atlantic Pictures LLC,True,Chronic Condition,32667.86,146.021995,57.968464,1048.517520,660.377358,5.390836,5.390836,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
9237,ZZK-Altman Inc,True,Catastrophic,113850.21,1897.503500,0.000000,1500.000000,0.000000,16.666667,0.000000,...,Acute,-6715.47,14.953167,126.877667,150.000000,516.666667,33.333333,50.000000,per1000claimants_change,-0.333333
9238,ZZM-Contextant LLC June 19,True,Chronic Condition,11442.47,84.961420,17.254497,502.958580,153.846154,5.917160,5.917160,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
9239,"ZZN-Vibliome Operations, LLC",True,Acute,26997.56,191.343056,66.354352,560.185185,425.925926,4.629630,69.444444,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000


In [391]:
# Dynamic statement build

pmpm_agg_class_skinny = pd.merge(pmpm_agg_class_skinny, pmpm_agg_class[['tenantid', 'class_1', 'per_change_1', 'class_2', 'per_change_2', 'class_3', 'per_change_3']], how='left',on=['tenantid'])
pmpm_agg_class_skinny['direction_class_1'] = pmpm_agg_class_skinny.per_change_1 > 0
pmpm_agg_class_skinny['direction_class_2'] = pmpm_agg_class_skinny.per_change_2 > 0
pmpm_agg_class_skinny['direction_class_3'] = pmpm_agg_class_skinny.per_change_3 > 0

pmpm_agg_class_skinny['per_change_1'] = (round(pmpm_agg_class_skinny['per_change_1'] * 100)).astype(str).str[:-2] + "%"
pmpm_agg_class_skinny['per_change_2'] = (round(pmpm_agg_class_skinny['per_change_2'] * 100)).astype(str).str[:-2] + "%"
pmpm_agg_class_skinny['per_change_3'] = (round(pmpm_agg_class_skinny['per_change_3'] * 100)).astype(str).str[:-2] + "%"
# 
pmpm_agg_class_skinny['l2_statement_1'] = ("From prior to current " + pmpm_agg_class_skinny['class_1'] + " PMPMs " + pmpm_agg_class_skinny['direction_class_1'].astype(str).map(direction_dict_1) + " " + pmpm_agg_class_skinny['per_change_1'])
pmpm_agg_class_skinny['l2_statement_2'] = ( np.where(pmpm_agg_class_skinny['class_2'] != 0,(", " +pmpm_agg_class_skinny['class_2'].astype(str) + " PMPMs " + pmpm_agg_class_skinny['direction_class_3'].astype(str).map(direction_dict_1) + " " + pmpm_agg_class_skinny['per_change_3'].astype(str)),''))
pmpm_agg_class_skinny['l2_statement_3'] = ( np.where(pmpm_agg_class_skinny['class_3'] != 0,(", and " +pmpm_agg_class_skinny['class_3'].astype(str) + " PMPMs " + pmpm_agg_class_skinny['direction_class_3'].astype(str).map(direction_dict_1) + " " + pmpm_agg_class_skinny['per_change_3'].astype(str)),''))
pmpm_agg_class_skinny['l2_statement'] = pmpm_agg_class_skinny['l2_statement_1'] + pmpm_agg_class_skinny['l2_statement_2'] + pmpm_agg_class_skinny['l2_statement_3']

pmpm_agg_class_skinny['year'] = pd.to_datetime(curr_month).strftime('%Y')
pmpm_agg_class_skinny['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
pmpm_agg_class_skinny['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')

pmpm_agg_class_skinny.to_csv(f'Outputs/Level_2_{curr_month.replace("-", "_")}.csv', index=False)


### Level 3

In [392]:
# LEVEL 3 Table
pmpm_agg_group_c = pd.DataFrame(pmpm_agg_c.groupby(['tenantid', 'class', 'group']).agg({'paid': 'sum'})).reset_index()
pmpm_agg_group_c = pd.merge(pmpm_agg_group_c, pmpm_class_members_c, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_group_c['pmpm'] = pmpm_agg_group_c['paid'] / pmpm_agg_group_c['mm_months']

pmpm_agg_group_p = pd.DataFrame(pmpm_agg_p.groupby(['tenantid', 'class', 'group'])['paid'].sum()).reset_index()
pmpm_agg_group_p = pd.merge(pmpm_agg_group_p, pmpm_class_members_p, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_group_p['pmpm'] = pmpm_agg_group_p['paid'] / pmpm_agg_group_p['mm_months']
# 
pmpm_agg_group = pd.merge(pmpm_agg_group_c, pmpm_agg_group_p, how='outer', on=['tenantid', 'class', 'group'], suffixes=('_c','_p'))  # # Merge current and prior pmpm df
pmpm_agg_group = pmpm_agg_group.iloc[:, [0, 1, 2, 5, 8]]  # Reordering columns
pmpm_agg_group.fillna(0, inplace=True)
# # pmpm_agg_class_only = pmpm_agg_class.filter(['tenantid', 'class'], axis=1)  # Get list of class drivers for each tenantid
# # pmpm_agg_group = pd.merge(pmpm_agg_group, pmpm_agg_class_only, how='inner', on=['tenantid', 'class'])  # Filter for class driver only per tenantid
# pmpm_agg_group[["pmpm_c", "pmpm_p"]] = pmpm_agg_group[["pmpm_c", "pmpm_p"]].fillna(0)  # Force NaN to 0
pmpm_agg_group['per_change'] = pmpm_agg_group['pmpm_c'] / pmpm_agg_group['pmpm_p'] - 1  # Calculate the PMPM percent change
pmpm_agg_group = pmpm_agg_group.replace([np.inf], 1)  # Percent change from 0 to something forced to 1
pmpm_agg_group['num_change'] = (pmpm_agg_group['pmpm_c'] - pmpm_agg_group['pmpm_p']).astype(float)  # Calculate the PMPM change
pmpm_agg_group = pmpm_agg_tenant[['tenantid', 'direction']].merge(pmpm_agg_group, how='outer')  # Pull in overall PMPM trend for sorting
pmpm_agg_group['sort'] = pmpm_agg_group.sort_values(['tenantid', 'num_change'], ascending=False).groupby(['tenantid']).cumcount() + 1  # Partition and create row numbers for sorting based on tenant's overall pmpm trend
pmpm_agg_group = pmpm_agg_group.sort_values(['tenantid', 'sort'])  # Ensures proper sorting
pmpm_agg_group['sort2'] = np.where(pmpm_agg_group['direction'] == False, (-pmpm_agg_group['sort']), pmpm_agg_group['sort'])  # If direction = false, then make negative
pmpm_agg_group = pmpm_agg_group.sort_values(['tenantid', 'sort2'], ascending=True)  # Sort by ascending
pmpm_agg_group['drop'] = ((pmpm_agg_group['direction'] == False) & (pmpm_agg_group['num_change'] > 0)) | ((pmpm_agg_group['direction'] == True) & (pmpm_agg_group['num_change'] < 0))  # Identify instances where change is neg. when overall change is positive (vice-versa)
pmpm_agg_group = pmpm_agg_group.loc[pmpm_agg_group['drop'] == False]
pmpm_agg_group = pmpm_agg_group.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7]]  # Clean up

# Selecting top 3 drivers
pmpm_agg_group_temp = pmpm_agg_group.iloc[:, [0, 7]]  # Creates table to calculate total PMPM change by group
pmpm_agg_group_temp = pd.DataFrame(pmpm_agg_group_temp.groupby(['tenantid'])['num_change'].sum())  # Calculates total PMPM change by group
pmpm_agg_group_temp.rename(columns={"num_change": "total_change"}, inplace=True)
pmpm_agg_group = pd.merge(pmpm_agg_group, pmpm_agg_group_temp, how='left', on=['tenantid'])  # Join into main table to calculate percent change of total change per tenantid
pmpm_agg_group['per_change_of_total'] = pmpm_agg_group['num_change'] / pmpm_agg_group['total_change']  # Calculate the percent change of total change per tenantid
pmpm_agg_group['per_change_of_total_rsum'] = pmpm_agg_group.groupby(['tenantid', 'class'])['per_change_of_total'].cumsum()  # Calculate the running total of the total change
# pmpm_agg_group['group'] = pmpm_agg_group['group'].str.capitalize()
pmpm_agg_group['group'] = pmpm_agg_group['group'].replace(['metabolic', 'only', 'complex', 'mental', 'health', 'cancer', 'other', 'single', 'acute_nonER', 'acute_ER', 'heart', 'disease', 'diabetes', 'lung'], 
                                                          ['Metabolic', 'Only', 'Complex', 'Mental', 'Health', 'Cancer', 'Other', 'Single', 'Acute Non-ER', 'Acute ER', 'Heart', 'Disease', 'diabetes', 'lung'], regex=True)
pmpm_agg_group['group_change'] = pmpm_agg_group['group'] + " (" + np.where(pmpm_agg_group.direction == True, '▲', '▼') + " " + round((pmpm_agg_group['per_change']*100)).astype(str).str[:-2] + "%)"

pmpm_agg_group['year'] = pd.to_datetime(curr_month).strftime('%Y')
pmpm_agg_group['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
pmpm_agg_group['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')

pmpm_agg_group.to_csv(f'Outputs/Level_3_{curr_month.replace("-", "_")}.csv', index=False)

### Level 4

In [393]:
# LEVEL 4
pmpm_agg_service_type_c = pd.DataFrame(pmpm_agg_c.groupby(['tenantid', 'class', 'group', 'service_type'])['paid'].sum()).reset_index()  # Current PMPM by tenant/class
pmpm_agg_service_type_c = pd.merge(pmpm_agg_service_type_c, pmpm_class_members_c, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_service_type_c['pmpm'] = pmpm_agg_service_type_c['paid'] / pmpm_agg_service_type_c['mm_months']

pmpm_agg_service_type_p = pd.DataFrame(pmpm_agg_p.groupby(['tenantid', 'class', 'group', 'service_type'])['paid'].sum()).reset_index()  # Prior PMPM by tenant/class
pmpm_agg_service_type_p = pd.merge(pmpm_agg_service_type_p, pmpm_class_members_p, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_service_type_p['pmpm'] = pmpm_agg_service_type_p['paid'] / pmpm_agg_service_type_p['mm_months']

pmpm_agg_service_type = pd.merge(pmpm_agg_service_type_c, pmpm_agg_service_type_p, how='outer', on=['tenantid', 'class', 'group', 'service_type'], suffixes=('_c', '_p'))  # Merge current and prior pmpm df
pmpm_agg_service_type.fillna(0, inplace=True)

# Create % Change Field

pmpm_agg_service_type['pmpm_change'] = pmpm_agg_service_type['pmpm_c'] / pmpm_agg_service_type['pmpm_p'] -1
pmpm_agg_service_type.replace([np.inf, -np.inf], 1, inplace=True)
# pmpm_agg_class_only = pmpm_agg_group.filter(['tenantid', 'direction', 'class', 'group'], axis=1)  # Get list of class-group drivers for each tenantid

pmpm_agg_service_type = pd.merge(pmpm_agg_service_type, pmpm_agg_class[['tenantid', 'class_1', 'direction_1']], how='inner', left_on=['tenantid', 'class'], right_on=['tenantid', 'class_1'])  # Filter for class driver only per tenantid

pmpm_agg_group_class_only = pmpm_agg_group.filter(['tenantid', 'direction', 'class', 'group'], axis=1)  # Get list of class-group drivers for each tenantid
pmpm_agg_group_class_only['group'] = pmpm_agg_group_class_only['group'].str.lower()
pmpm_agg_service_type['group'] = pmpm_agg_service_type['group'].str.lower()
pmpm_agg_service_type = pd.merge(pmpm_agg_service_type, pmpm_agg_group_class_only, how='inner', on=['tenantid', 'class', 'group'])  # Filter for class driver only per tenantid

# Create % of Total Change Field
pmpm_agg_service_type_total = pmpm_agg_service_type
pmpm_agg_service_type_total['pmpm_change_total'] = pmpm_agg_service_type_total['pmpm_c'] - pmpm_agg_service_type_total['pmpm_p']
pmpm_agg_service_type_total = pd.DataFrame(pmpm_agg_service_type_total.groupby(['tenantid'])['pmpm_change_total'].sum()).reset_index()  # Get total change per group for the denominator of the pmpm_change_total_per calc

# Filter only for tenants that qualify to get a Level 4 statement

pmpm_agg_group_class_count = pd.DataFrame(pmpm_agg_group.groupby(['tenantid']).size()).reset_index()  # Count the # of class drivers per tenant
pmpm_agg_group_class_count = pmpm_agg_group_class_count.loc[pmpm_agg_group_class_count[0] == 1]  # Filter for groups with a single class driver only

pmpm_agg_service_type = pd.merge(pmpm_agg_service_type, pmpm_agg_group_class_count, how='left', on=['tenantid'])
pmpm_agg_service_type = pd.merge(pmpm_agg_service_type, pmpm_agg_service_type_total, how='inner', on=['tenantid'])
pmpm_agg_service_type['pmpm_change_total_per'] = pmpm_agg_service_type['pmpm_change_total_x'] / pmpm_agg_service_type['pmpm_change_total_y']
pmpm_agg_service_type['pmpm_change_total_per'] = np.where(pmpm_agg_service_type['direction_1'] == False, (-pmpm_agg_service_type['pmpm_change_total_per']), pmpm_agg_service_type['pmpm_change_total_per'])  # If direction = false, then make negative

pmpm_agg_service_type['sort'] = pmpm_agg_service_type.sort_values(['tenantid', 'pmpm_change_total_per'], ascending=False).groupby(['tenantid']).cumcount() + 1
pmpm_agg_service_type = pmpm_agg_service_type.sort_values(['tenantid', 'sort'])  # Ensures proper sorting
pmpm_agg_service_type['sort2'] = np.where(pmpm_agg_service_type['direction'] == False, (-pmpm_agg_service_type['sort']), pmpm_agg_service_type['sort']) # If direction = false, then make negative
pmpm_agg_service_type = pmpm_agg_service_type.sort_values(['tenantid', 'sort2'], ascending=True)  # Sort by ascending
pmpm_agg_service_type = pmpm_agg_service_type.groupby('tenantid').head(1)  # Filter for first record of each partition

# LEVEL IV STATEMENT
pmpm_agg_service_type['pmpm_change'] = pmpm_agg_service_type['pmpm_change'].replace(np.nan, 1)  # Ensures all 0 to something changes are flagged as 100% change
pmpm_agg_service_type['pmpm_change_str'] = round((pmpm_agg_service_type['pmpm_change']*100), 0).astype(str).str[:-2] + "%"
pmpm_agg_service_type['l4_statement'] = " related " + pmpm_agg_service_type['service_type'] + " (" + np.where(pmpm_agg_service_type.direction == True, '▲', '▼') + " " + pmpm_agg_service_type['pmpm_change_str'] + ") spend per member"
pmpm_agg_service_type_skinny = pmpm_agg_service_type.iloc[:, [0, 21]]

pmpm_agg_service_type_skinny['year'] = pd.to_datetime(curr_month).strftime('%Y')
pmpm_agg_service_type_skinny['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
pmpm_agg_service_type_skinny['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')

pmpm_agg_service_type_skinny.to_csv(f'Outputs/Level_4_{curr_month.replace("-", "_")}.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pmpm_agg_service_type_skinny['year'] = pd.to_datetime(curr_month).strftime('%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pmpm_agg_service_type_skinny['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pmpm_agg_service_type_skinny['s

### Statements

In [394]:
def format_number(num):
    num_in_thousands = num / 1000
    formatted_num = f"{num_in_thousands:,.0f}K"
    return f"${formatted_num}"

In [395]:
statements_tenant = pmpm_agg_tenant.copy()
potential_loss = pd.DataFrame(final.groupby(['tenantid'])['potential_loss'].sum()).reset_index()
statements_tenant = statements_tenant[['tenantid', 'paid_c', 'direction_pmpm_bob', 'l1_statement_pmpm_1', 'l1_statement_pmpm_2', 'l1_statement_pepm_1', 'l1_statement_pepm_2', 'l1_statement_TCR_1', 'l1_statement_TCR_2']]
statements_tenant = pd.merge(statements_tenant, potential_loss, how='left', on=['tenantid'])
statements_tenant['potential_loss_formatted'] = statements_tenant['potential_loss'].apply(format_number)
statements_tenant['paid_formatted'] = statements_tenant['paid_c'].apply(format_number)
statements_tenant['l1_statement_3'] = ("During this period, " + statements_tenant['tenantid'] + " experienced " + statements_tenant['paid_formatted'].astype(str) + " in Medical and Rx claims.")
statements_tenant['l1_statement_4'] = ("This amounts to " + statements_tenant['direction_pmpm_bob'].astype(str).map(direction_dict_4) +" claims risk " + statements_tenant['direction_pmpm_bob'].astype(str).map(direction_dict_2) + " expected trend by roughly " + statements_tenant['potential_loss_formatted'] + ".")
CatClaimants = pmpm_agg_c.copy()
CatClaimants = CatClaimants[CatClaimants['class'] == 'Catastrophic']
CatClaimants = pd.DataFrame(CatClaimants.groupby(['tenantid'])['claimants'].first()).reset_index()
statements_tenant = pd.merge(statements_tenant, CatClaimants, how='left', on=['tenantid'])
statements_tenant.fillna(0, inplace=True)
statements_tenant.rename(columns={'claimants': 'CatClaimants'}, inplace=True)
statements_tenant['year'] = pd.to_datetime(curr_month).strftime('%Y')
statements_tenant['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
statements_tenant['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')


# statements_class = pmpm_agg_class_skinny.copy()
# statements_class = statements_class[['tenantid', 'l2_statement']]
# statements_class.drop_duplicates(inplace=True)

# statements_group = pmpm_agg_group.copy()
# statements_group = statements_group[['tenantid', 'l2_statement']]

# statements_service = pmpm_agg_service_type.copy()
    # = pd.merge(pmpm_agg_group, pmpm_agg_tenant, how='inner', on=['tenantid'])
# statements = pd.merge(statements, pmpm_agg_service, how='outer', on=['tenantid'])
# statements['l4_statement'] = statements.l4_statement.fillna('')
# 
# statements = pd.merge(statements, TenantTotals, how='inner', on=['tenantid'])
# statements = statements.reset_index()
# 
# statements = pd.merge(statements, exchange, how='left', on=['tenantid'])
# statements = pd.merge(statements, carrier, how='left', on=['tenantid'])
# statements = statements.rename(columns={'tenantid': 'Account',
#                                         'current_member_count': 'Members'})
# statements['potential_loss'] = round(statements['potential_loss'],-4)
statements_tenant.to_csv(f'Outputs/statements_tenant_{curr_month.replace("-", "_")}.csv', index=False)
statements_tenant

Unnamed: 0,tenantid,paid_c,direction_pmpm_bob,l1_statement_pmpm_1,l1_statement_pmpm_2,l1_statement_pepm_1,l1_statement_pepm_2,l1_statement_TCR_1,l1_statement_TCR_2,potential_loss,potential_loss_formatted,paid_formatted,l1_statement_3,l1_statement_4,CatClaimants,year,start_date,stop_date
0,100F-New York City Industrial T,50918.66,False,"On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, PMPMs are -46% lower t...","On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, PEPMs are -35% lower t...","On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, TCR is -46% lower than...",1336.22,$1K,$51K,"During this period, 100F-New York City Industr...",This amounts to a reduced claims risk below ex...,0.0,2024,2023-04-01,2024-03-31
1,100G-Iron Park Capital Partners,322751.36,True,"On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, PMPMs are 291% higher ...","On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, PEPMs are 508% higher ...","On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, TCR is 291% higher tha...",-109203.20,$-109K,$323K,"During this period, 100G-Iron Park Capital Par...",This amounts to an excess claims risk above ex...,0.0,2024,2023-04-01,2024-03-31
2,100J-Tamares Management LLC,89116.89,False,"On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, PMPMs are -37% lower t...","On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, PEPMs are -16% lower t...","On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, TCR is -37% lower than...",3826.50,$4K,$89K,"During this period, 100J-Tamares Management LL...",This amounts to a reduced claims risk below ex...,0.0,2024,2023-04-01,2024-03-31
3,100M-Wellness Insight Technologies,705.30,False,"On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, PMPMs are -85% lower t...","On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, PEPMs are -88% lower t...","On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, TCR is -85% lower than...",-77.63,$-0K,$1K,"During this period, 100M-Wellness Insight Tech...",This amounts to a reduced claims risk below ex...,0.0,2024,2023-04-01,2024-03-31
4,"100P-Walking Fish Therapeutics, I",707329.40,True,"On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, PMPMs are 54% higher t...","On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, PEPMs are 55% higher t...","On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, TCR is 54% lower than ...",229213.53,$229K,$707K,"During this period, 100P-Walking Fish Therapeu...",This amounts to an excess claims risk above ex...,1.0,2024,2023-04-01,2024-03-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9236,ZZJ-Atlantic Pictures LLC,86514.87,False,"On a paid date basis, ZZJ-Atlantic Pictures LL...","Compared to benchmarks, PMPMs are -54% lower t...","On a paid date basis, ZZJ-Atlantic Pictures LL...","Compared to benchmarks, PEPMs are -61% lower t...","On a paid date basis, ZZJ-Atlantic Pictures LL...","Compared to benchmarks, TCR is -54% lower than...",3295.91,$3K,$87K,"During this period, ZZJ-Atlantic Pictures LLC ...",This amounts to a reduced claims risk below ex...,0.0,2024,2023-04-01,2024-03-31
9237,ZZK-Altman Inc,121860.44,True,"On a paid date basis, ZZK-Altman Inc's paid PM...","Compared to benchmarks, PMPMs are 228% higher ...","On a paid date basis, ZZK-Altman Inc's paid PE...","Compared to benchmarks, PEPMs are 717% higher ...","On a paid date basis, ZZK-Altman Inc's TCR inc...","Compared to benchmarks, TCR is 228% higher tha...",62714.27,$63K,$122K,"During this period, ZZK-Altman Inc experienced...",This amounts to an excess claims risk above ex...,1.0,2024,2023-04-01,2024-03-31
9238,ZZM-Contextant LLC June 19,24101.28,False,"On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, PMPMs are -73% lower t...","On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, PEPMs are -49% lower t...","On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, TCR is -73% lower than...",684.73,$1K,$24K,"During this period, ZZM-Contextant LLC June ...",This amounts to a reduced claims risk below ex...,0.0,2024,2023-04-01,2024-03-31
9239,"ZZN-Vibliome Operations, LLC",58204.51,False,"On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, PMPMs are -49% lower t...","On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, PEPMs are -44% lower t...","On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, TCR is -49% lower than...",1725.47,$2K,$58K,"During this period, ZZN-Vibliome Operations, L...",This amounts to a reduced claims risk below ex...,0.0,2024,2023-04-01,2024-03-31
