In [2]:
import pandas as pd
import numpy as np

### Set Period

In [3]:
curr_month = (pd.to_datetime(('2023-11-01'))).strftime('%Y-%m-%d')
prior_month = (pd.to_datetime(curr_month) - pd.DateOffset(months=12)).strftime('%Y-%m-%d')
stop_date = (pd.to_datetime(curr_month) + pd.DateOffset(months=1) - pd.DateOffset(days=1)).strftime('%Y-%m-%d')
start_date = (pd.to_datetime(stop_date) - pd.DateOffset(months=12) + pd.DateOffset(days=1) ).strftime('%Y-%m-%d')
start_date_prior = (pd.to_datetime(start_date) - pd.DateOffset(months=12)).strftime('%Y-%m-%d')
stop_date_prior = (pd.to_datetime(stop_date) - pd.DateOffset(months=12)).strftime('%Y-%m-%d')

In [4]:
claims_current = pd.read_csv(f'Claims Data/claims_{curr_month.replace("-", "_")}.csv')
claims_prior = pd.read_csv(f'Claims Data/claims_{prior_month.replace("-", "_")}.csv')
elig_current = pd.read_csv(f'Elig Data/elig_{curr_month.replace("-", "_")}.csv')
elig_prior = pd.read_csv(f'Elig Data/elig_{prior_month.replace("-", "_")}.csv')
monthsdata = pd.read_csv(f'Elig Data/MonthsData_{curr_month.replace("-", "_")}.csv')

df_current = claims_current.copy()
df_prior = claims_prior.copy()
mm_current = elig_current.copy()
mm_prior = elig_prior.copy()
monthsdata = monthsdata.copy()

In [5]:
# Categorize Others
# Categorizes specific groups into an "Others" category
Others = ['Null',
          'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified',
          'Factors influencing health status and contact with health services',
          'Diseases of the skin and subcutaneous tissue',
          'External causes of morbidity',
          'Diseases of the ear and mastoid process',
          'Diseases of the eye and adnexa']
df_current.loc[df_current['group'].isin(Others), 'group'] = 'Others'
df_prior.loc[df_prior['group'].isin(Others), 'group'] = 'Others'

In [6]:
# Data Preprocessing
df_mm = pd.concat([mm_prior, mm_current])
df_events = pd.concat([df_prior, df_current])

df_mm['year'] = df_mm['year'].astype(str)
df_events['year'] = df_events['year'].astype(str)
df_mm = df_mm.rename(columns={'total_member_count': 'total_member_count'})

# Indexing
df_events['index'] = df_events['year'] + "-" + df_events['class']
df_events['index0'] = df_events['class'] + "-" + df_events['group']
df_events['index1'] = df_events['class'] + "-" + df_events['group'] + "-" + df_events['service_type']
df_mm['index'] = df_mm['year'] + "-" + df_mm['tenantid']

monthsdata.rename(columns={'division': 'tenantid'}, inplace=True)


In [7]:
# CREATION OF COMPARISON TABLES
df_current.rename(columns={'tag_tpa': 'carrier', 'tag_exchange':'exchange'}, inplace=True)
mm_current['carrier'] = mm_current['carrier'].str.lower()
monthsdata['carrier'] = monthsdata['carrier'].str.lower()
pmpm_agg_c = pd.merge(df_current, mm_current, how='inner', on=['tenantid', 'carrier', 'exchange', 'year', 'start_date', 'stop_date'])
pmpm_agg_c = pd.merge(pmpm_agg_c, monthsdata, how='inner', on=['tenantid','carrier', 'exchange'])
pmpm_agg_c = pmpm_agg_c[pmpm_agg_c['MaxMonth'] >= pmpm_agg_c['curr_month']]
pmpm_agg_c = pmpm_agg_c[pmpm_agg_c['months'] >= 30]
# pmpm_agg_c = pmpm_agg_c[pmpm_agg_c['stop_member_count'] >= 10]

df_current_final = pmpm_agg_c

df_prior.rename(columns={'tag_tpa': 'carrier', 'tag_exchange':'exchange'}, inplace=True)
mm_prior['carrier'] = mm_prior['carrier'].str.lower()
monthsdata['carrier'] = monthsdata['carrier'].str.lower()
tenantlist = pmpm_agg_c['tenantid'].unique()
pmpm_agg_p = tenantlist.tolist()
pmpm_agg_p = pd.DataFrame(pmpm_agg_p, columns=['tenantid'])
pmpm_agg_p = pd.merge(pmpm_agg_p, df_prior, how='inner', on=['tenantid'])
pmpm_agg_p = pd.merge(pmpm_agg_p, mm_prior, how='inner', on=['tenantid', 'carrier', 'exchange', 'year', 'start_date', 'stop_date'])
pmpm_agg_p = pd.merge(pmpm_agg_p, monthsdata, how='inner', on=['tenantid','carrier', 'exchange'])


df_prior_final = pmpm_agg_p
pmpm_agg_c

Unnamed: 0,tenantid,carrier,exchange,class,group,service_type,counts,allowed,paid,claimants,...,stop_employee_count,mm_months,ee_months,total_rate,paying_rate,admin_expense,pooling_expense,other_expense,months,MaxMonth
271,100F-New York City Industrial T,aetna,TriNet III,Acute,acute_NonER,Office Visit,43,1.079770e+04,9.020870e+03,6,...,5,178,71,235115.6,234794.00,10571.45,3447.15,858.25,37,2023-11-01
272,100F-New York City Industrial T,aetna,TriNet III,Acute,acute_NonER,Outpatient,21,1.829930e+03,1.390160e+03,6,...,5,178,71,235115.6,234794.00,10571.45,3447.15,858.25,37,2023-11-01
273,100F-New York City Industrial T,aetna,TriNet III,Acute,acute_NonER,Rx,33,1.209990e+03,8.946400e+02,6,...,5,178,71,235115.6,234794.00,10571.45,3447.15,858.25,37,2023-11-01
274,100F-New York City Industrial T,aetna,TriNet III,Chronic Condition,CKD complex,Office Visit,20,3.114200e+03,1.653200e+03,1,...,5,178,71,235115.6,234794.00,10571.45,3447.15,858.25,37,2023-11-01
275,100F-New York City Industrial T,aetna,TriNet III,Chronic Condition,CKD complex,Outpatient,9,3.430510e+03,3.323980e+03,1,...,5,178,71,235115.6,234794.00,10571.45,3447.15,858.25,37,2023-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377674,"ZZY-Monzo, Inc",kaiser,TriNet III,Acute,acute_NonER,Outpatient,3,-7.105427e-15,-7.105427e-15,1,...,2,25,25,9977.0,9573.33,0.00,0.00,0.00,37,2023-11-01
377675,"ZZY-Monzo, Inc",kaiser,TriNet III,Chronic Condition,diabetes only,ER,1,3.217680e+03,3.117680e+03,1,...,2,25,25,9977.0,9573.33,0.00,0.00,0.00,37,2023-11-01
377676,"ZZY-Monzo, Inc",kaiser,TriNet III,Chronic Condition,diabetes only,Office Visit,10,1.187810e+03,1.097810e+03,1,...,2,25,25,9977.0,9573.33,0.00,0.00,0.00,37,2023-11-01
377677,"ZZY-Monzo, Inc",kaiser,TriNet III,Chronic Condition,diabetes only,Outpatient,7,5.853800e+02,5.627100e+02,1,...,2,25,25,9977.0,9573.33,0.00,0.00,0.00,37,2023-11-01


In [8]:
# # Current BoB Build

bob_current = pd.DataFrame(pmpm_agg_c.groupby(['tenantid','carrier', 'exchange']).agg({'mm_months': 'max', 'ee_months': 'max', 'allowed':'sum', 'paid': 'sum', 'claimants': 'sum', 'admin_expense':'max', 'pooling_expense': 'max', 'other_expense': 'max', 'total_rate': 'max'})).reset_index()
bob_current['admin'] = bob_current['admin_expense'] + bob_current['pooling_expense'] + bob_current['other_expense']
bob_current.drop(['admin_expense', 'pooling_expense', 'other_expense'], axis=1, inplace=True)

bob_prior = pd.DataFrame(pmpm_agg_p.groupby(['tenantid','carrier', 'exchange']).agg({'mm_months': 'max', 'ee_months': 'max', 'allowed':'sum', 'paid': 'sum', 'claimants':'sum', 'admin_expense':'max', 'pooling_expense': 'max', 'other_expense': 'max', 'total_rate': 'max'})).reset_index()
bob_prior['admin'] = bob_prior['admin_expense'] + bob_prior['pooling_expense'] + bob_prior['other_expense']
bob_prior.drop(['admin_expense', 'pooling_expense', 'other_expense'], axis=1, inplace=True)

pmpm_paid_bob = round((bob_current.paid.sum() ) / bob_current.mm_months.sum(),2) # Calculate BoB PMPM
pmpm_admin_bob = round((bob_current.admin.sum()) / bob_current.mm_months.sum(),2) # Calculate BoB PMPM
pmpm_bob = pmpm_paid_bob + pmpm_admin_bob
pepm_paid_bob = round((bob_current.paid.sum()) / bob_current.ee_months.sum(),2) # Calculate BoB PMPM
pepm_admin_bob = round((bob_current.admin.sum()) / bob_current.ee_months.sum(),2) # Calculate BoB PMPM
pepm_bob = pepm_paid_bob + pepm_admin_bob
TCR_paid_bob = round((bob_current.paid.sum()) / bob_current.total_rate.sum(), 2) # Calculate BoB PMPM
TCR_admin_bob = round((bob_current.admin.sum()) / bob_current.total_rate.sum(), 2) # Calculate BoB PMPM
TCR_bob = round((bob_current.paid.sum() + bob_current.admin.sum()) / bob_current.total_rate.sum(), 2) # Calculate BoB PMPM

# Prior BoB Build
p_pmpm_paid_bob = round((bob_prior.paid.sum() ) / bob_prior.mm_months.sum(),2) # Calculate BoB PMPM
p_pmpm_admin_bob = round((bob_prior.admin.sum()) / bob_prior.mm_months.sum(),2) # Calculate BoB PMPM
p_pmpm_bob = p_pmpm_paid_bob + p_pmpm_admin_bob
p_pepm_paid_bob = round((bob_prior.paid.sum()) / bob_prior.ee_months.sum(),2) # Calculate BoB PMPM
p_pepm_admin_bob = round((bob_prior.admin.sum()) / bob_prior.ee_months.sum(),2) # Calculate BoB PMPM
p_pepm_bob = p_pepm_paid_bob + p_pepm_admin_bob
p_TCR_paid_bob = round((bob_prior.paid.sum()) / bob_prior.total_rate.sum(), 2) # Calculate BoB PMPM
p_TCR_admin_bob = round((bob_prior.admin.sum()) / bob_prior.total_rate.sum(), 2) # Calculate BoB PMPM
p_TCR_bob = round((bob_prior.paid.sum() + bob_prior.admin.sum()) / bob_prior.total_rate.sum(), 2) # Calculate BoB PMPM

summary = {'pmpm_allowed_bob': [pmpm_bob],
            'pmpm_allowed_bob_p': [p_pmpm_bob],
            'pmpm_paid_bob': [pmpm_paid_bob],
            'pmpm_paid_bob_p': [p_pmpm_paid_bob],
            'pepm_allowed_bob': [pepm_bob],
            'pepm_allowed_bob_p': [p_pepm_bob],
            'pepm_paid_bob': [pepm_paid_bob],
            'pepm_paid_bob_p': [p_pepm_paid_bob],
            'TCR_allowed_bob': [TCR_bob],
            'TCR_allowed_bob_p': [p_TCR_bob],
            'TCR_paid_bob': [TCR_paid_bob],
            'TCR_paid_bob_p': [p_TCR_paid_bob]}
summary = pd.DataFrame(summary)

print(summary)

# filename = f'Outputs/BookOfBusiness_{curr_month}.csv'

# summary.to_csv(filename, index=False)

# print(f"Output saved to {filename}.")


   pmpm_allowed_bob  pmpm_allowed_bob_p  pmpm_paid_bob  pmpm_paid_bob_p  \
0            664.15              598.64         567.09            504.3   

   pepm_allowed_bob  pepm_allowed_bob_p  pepm_paid_bob  pepm_paid_bob_p  \
0           1335.62             1198.72        1140.43          1009.81   

   TCR_allowed_bob  TCR_allowed_bob_p  TCR_paid_bob  TCR_paid_bob_p  
0             0.94                0.9           0.8            0.76  


### Statements

In [9]:
dmca_claims_elig_c = pmpm_agg_c.copy()

non_recurring = ['Pregnancy, childbirth and the puerperium', 'Certain conditions originating in the perinatal period', 'Certain infectious and parasitic diseases', 'Injury, poisoning and certain other consequences of external causes']
non_recurring_c = dmca_claims_elig_c[dmca_claims_elig_c['group'].isin(non_recurring)]
non_recurring_c = non_recurring_c.groupby(['tenantid', 'carrier', 'exchange']).agg({'allowed': 'sum','paid': 'sum'}).reset_index()
non_recurring_c.rename(columns={'allowed': 'non_recurring_allowed', 'paid': 'non_recurring_paid'}, inplace=True)
transplants_c = pd.read_csv(f'Claims Data/Transplants_{curr_month.replace("-", "_")}.csv')
transplants_c = transplants_c.groupby(['tenantid', 'carrier', 'exchange']).agg({'amtallowed': 'sum', 'amtpaid':'sum'}).reset_index()
transplants_c.rename(columns={'amtallowed': 'non_recurring_allowed', 'amtpaid': 'non_recurring_paid'}, inplace=True)
non_recurring_c = pd.concat([non_recurring_c, transplants_c])
non_recurring_c = non_recurring_c.groupby(['tenantid', 'carrier', 'exchange']).agg({'non_recurring_allowed': 'sum', 'non_recurring_paid': 'sum'}).reset_index()

dmca_claims_elig_c = dmca_claims_elig_c.groupby(['tenantid', 'carrier', 'exchange','year', 'start_date', 'stop_date']).agg({
    'allowed': 'sum', 
    'paid': 'sum', 
    'start_member_count':'first', 
    'stop_member_count':'first', 
    'start_employee_count': 'first', 
    'stop_employee_count': 'first', 
    'mm_months':'first', 
    'ee_months': 'first', 
    'total_rate': 'first', 
    'admin_expense': 'first',
    'pooling_expense': 'first',
    'other_expense': 'first',
}).reset_index()

dmca_claims_elig_c = pd.merge(dmca_claims_elig_c, non_recurring_c, how='left', on=['tenantid', 'carrier', 'exchange'])
dmca_claims_elig_c['non_recurring_allowed'].fillna(0, inplace=True)
dmca_claims_elig_c['non_recurring_paid'].fillna(0, inplace=True)
dmca_claims_elig_c['allowed_adj'] = (dmca_claims_elig_c['allowed'] - dmca_claims_elig_c['non_recurring_allowed']).clip(lower=0)
dmca_claims_elig_c['paid_adj'] = (dmca_claims_elig_c['paid'] - dmca_claims_elig_c['non_recurring_paid']).clip(lower=0)

dmca_claims_elig_c['admin'] = dmca_claims_elig_c['admin_expense'] + dmca_claims_elig_c['pooling_expense'] + dmca_claims_elig_c['other_expense']
dmca_claims_elig_c.drop(['admin_expense', 'pooling_expense', 'other_expense'], axis=1, inplace=True)
dmca_claims_elig_c['avg_mbrs'] = dmca_claims_elig_c['mm_months'] / 12
dmca_claims_elig_c['turnover'] = 1 - abs((dmca_claims_elig_c['stop_member_count'] - dmca_claims_elig_c['start_member_count']) / dmca_claims_elig_c['start_member_count']).clip(upper=1)
dmca_claims_elig_c['credibility'] = (0.25*dmca_claims_elig_c['turnover'] + (dmca_claims_elig_c['avg_mbrs'] - dmca_claims_elig_c['turnover'])*0.01)/(1+(dmca_claims_elig_c['avg_mbrs'] - 1)*0.01)

dmca_claims_elig_p = pmpm_agg_p.copy()

non_recurring_p = dmca_claims_elig_p[dmca_claims_elig_p['group'].isin(non_recurring)]
non_recurring_p = non_recurring_p.groupby(['tenantid', 'carrier', 'exchange']).agg({'allowed': 'sum','paid': 'sum'}).reset_index()
non_recurring_p.rename(columns={'allowed': 'non_recurring_allowed', 'paid': 'non_recurring_paid'}, inplace=True)
transplants_p = pd.read_csv(f'Claims Data/Transplants_{prior_month.replace("-", "_")}.csv')
transplants_p = transplants_p.groupby(['tenantid', 'carrier', 'exchange']).agg({'amtallowed': 'sum', 'amtpaid':'sum'}).reset_index()
transplants_p.rename(columns={'amtallowed': 'non_recurring_allowed', 'amtpaid': 'non_recurring_paid'}, inplace=True)
non_recurring_p = pd.concat([non_recurring_p, transplants_p])
non_recurring_p = non_recurring_p.groupby(['tenantid', 'carrier', 'exchange']).agg({'non_recurring_allowed': 'sum', 'non_recurring_paid': 'sum'}).reset_index()

dmca_claims_elig_p = dmca_claims_elig_p.groupby(['tenantid', 'carrier', 'exchange','year', 'start_date', 'stop_date']).agg({
    'allowed': 'sum', 
    'paid': 'sum', 
    'start_member_count':'first', 
    'stop_member_count':'first', 
    'start_employee_count': 'first', 
    'stop_employee_count': 'first', 
    'mm_months':'first', 
    'ee_months': 'first', 
    'total_rate': 'first', 
    'admin_expense': 'first',
    'pooling_expense': 'first',
    'other_expense': 'first',
}).reset_index()

dmca_claims_elig_p = pd.merge(dmca_claims_elig_p, non_recurring_p, how='left', on=['tenantid', 'carrier', 'exchange'])
dmca_claims_elig_p['non_recurring_allowed'].fillna(0, inplace=True)
dmca_claims_elig_p['non_recurring_paid'].fillna(0, inplace=True)
dmca_claims_elig_p['allowed_adj'] = (dmca_claims_elig_p['allowed'] - dmca_claims_elig_p['non_recurring_allowed']).clip(lower=0)
dmca_claims_elig_p['paid_adj'] = (dmca_claims_elig_p['paid'] - dmca_claims_elig_p['non_recurring_paid']).clip(lower=0)

dmca_claims_elig_p['admin'] = dmca_claims_elig_p['admin_expense'] + dmca_claims_elig_p['pooling_expense'] + dmca_claims_elig_p['other_expense']
dmca_claims_elig_p.drop(['admin_expense', 'pooling_expense', 'other_expense'], axis=1, inplace=True)
dmca_claims_elig_p['avg_mbrs'] = dmca_claims_elig_p['mm_months'] / 12
dmca_claims_elig_p['turnover'] = 1 - abs((dmca_claims_elig_p['stop_member_count'] - dmca_claims_elig_p['start_member_count']) / dmca_claims_elig_p['start_member_count']).clip(upper=1)
dmca_claims_elig_p['credibility'] = (0.25*dmca_claims_elig_p['turnover'] + (dmca_claims_elig_p['avg_mbrs'] - dmca_claims_elig_p['turnover'])*0.01)/(1+(dmca_claims_elig_p['avg_mbrs'] - 1)*0.01)

dmca_claims_elig = pd.merge(dmca_claims_elig_c, dmca_claims_elig_p, how='outer', on=['tenantid', 'carrier', 'exchange'], suffixes=('_c', '_p'))
dmca_claims_elig['year_c'] = pd.to_datetime(curr_month).strftime('%Y')
dmca_claims_elig['year_p'] = pd.to_datetime(prior_month).strftime('%Y')
dmca_claims_elig['start_date_c'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
dmca_claims_elig['start_date_p'] = pd.to_datetime(start_date_prior).strftime('%Y-%m-%d')
dmca_claims_elig['stop_date_c'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')
dmca_claims_elig['stop_date_p'] = pd.to_datetime(stop_date_prior).strftime('%Y-%m-%d')
dmca_claims_elig.fillna(0, inplace=True)

dmca_claims_elig['pmpm_c'] = dmca_claims_elig['paid_adj_c'] / dmca_claims_elig['mm_months_c']
dmca_claims_elig['pmpm_p'] = dmca_claims_elig['paid_adj_p'] / dmca_claims_elig['mm_months_p']
dmca_claims_elig['bob_pmpm_c'] = pmpm_paid_bob
dmca_claims_elig['bob_pmpm_p'] = p_pmpm_paid_bob
dmca_claims_elig['bob_pmpm_trend'] = dmca_claims_elig['bob_pmpm_c'] / dmca_claims_elig['bob_pmpm_p'] - 1
dmca_claims_elig['current_cred_pmpm'] = (dmca_claims_elig['pmpm_c'] * dmca_claims_elig['credibility_c'] + dmca_claims_elig['bob_pmpm_c']* (1-dmca_claims_elig['credibility_c']))
dmca_claims_elig['prior_cred_pmpm'] = (dmca_claims_elig['pmpm_p'] * dmca_claims_elig['credibility_p'] + dmca_claims_elig['bob_pmpm_p']* (1-dmca_claims_elig['credibility_p']))
dmca_claims_elig['cred_trend'] = dmca_claims_elig['current_cred_pmpm']/dmca_claims_elig['prior_cred_pmpm'] - 1
dmca_claims_elig['potential_loss'] = round((dmca_claims_elig['paid_adj_c'].clip(lower=0) * (1-((dmca_claims_elig['bob_pmpm_trend'] + 1) / (dmca_claims_elig['cred_trend'] + 1)))),2)
dmca_claims_elig['potential_loss_pct'] = (dmca_claims_elig['potential_loss']/dmca_claims_elig['paid_adj_c'].clip(lower=0)).clip(lower=-0.5)
dmca_claims_elig['potential_loss_pct'].fillna(0, inplace=True)

#Risk Index
dmca_claims_elig['loss_pct_std_dev'] = np.std(dmca_claims_elig.potential_loss_pct.clip(lower=0))
dmca_claims_elig['pct_std_dev_ratio'] = dmca_claims_elig.potential_loss_pct/dmca_claims_elig.loss_pct_std_dev
dmca_claims_elig['loss_dol_std_dev'] = np.std(dmca_claims_elig.potential_loss.clip(lower=0))
dmca_claims_elig['dol_std_dev_ratio'] = dmca_claims_elig.potential_loss/dmca_claims_elig.loss_dol_std_dev
dmca_claims_elig['ratio_avg'] = (dmca_claims_elig.dol_std_dev_ratio + dmca_claims_elig.pct_std_dev_ratio) / 2
dmca_claims_elig['risk_index_setup'] = (2.5 + 1*dmca_claims_elig.ratio_avg).clip(upper=5)
dmca_claims_elig['risk_index'] = round(dmca_claims_elig.risk_index_setup.clip(lower=1), 1)
dmca_claims_elig.sort_values(by=['risk_index', 'potential_loss'], ascending=[False, False], inplace=True)
dmca_claims_elig['rank'] = dmca_claims_elig.reset_index().index + 1

dmca_claims_elig.drop(['pmpm_c', 'pmpm_p', 'credibility_c', 'credibility_p', 'avg_mbrs_c', 'avg_mbrs_p', 'turnover_c', 'turnover_p', 'loss_pct_std_dev', 'pct_std_dev_ratio', 'loss_dol_std_dev', 'dol_std_dev_ratio', 'ratio_avg', 'risk_index_setup', 'current_cred_pmpm', 'prior_cred_pmpm', 'cred_trend', 'potential_loss_pct'], axis=1, inplace=True)
# dmca_claims_elig = dmca_claims_elig[dmca_claims_elig['tenantid'] == 'U9J-SOPHiA GENETICS, Inc']
dmca_claims_elig

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dmca_claims_elig_c['non_recurring_allowed'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dmca_claims_elig_c['non_recurring_paid'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediat

Unnamed: 0,tenantid,carrier,exchange,year_c,start_date_c,stop_date_c,allowed_c,paid_c,start_member_count_c,stop_member_count_c,...,non_recurring_paid_p,allowed_adj_p,paid_adj_p,admin_p,bob_pmpm_c,bob_pmpm_p,bob_pmpm_trend,potential_loss,risk_index,rank
4623,"AR0-MariaDB USA, Inc",aetna,TriNet III,2023,2022-12-01,2023-11-30,3537551.04,3388465.99,151.0,76.0,...,0.0,1137968.79,1009182.27,136089.22,567.09,504.3,0.124509,2037192.68,5.0,1
5422,D7P-Zapier,bsca,TriNet III,2023,2022-12-01,2023-11-30,3776581.74,3474694.61,298.0,298.0,...,0.0,1250593.23,1049162.08,452091.64,567.09,504.3,0.124509,1368365.54,5.0,2
6361,GQ2-Soroban Capital Partners LP,uhc,TriNet IV,2023,2022-12-01,2023-11-30,2422607.10,2348088.83,99.0,100.0,...,0.0,773486.74,719104.34,131874.10,567.09,504.3,0.124509,1302820.12,5.0,3
4721,B50-Prolacta Bioscience,bsca,TriNet III,2023,2022-12-01,2023-11-30,3925154.07,3616392.61,373.0,378.0,...,0.0,2176326.59,1896164.64,413805.85,567.09,504.3,0.124509,1154712.66,5.0,4
12064,V8T-Assured Inc,aetna,TriNet III,2023,2022-12-01,2023-11-30,1700951.48,1640217.91,37.0,38.0,...,0.0,289980.84,255727.77,28395.04,567.09,504.3,0.124509,1090168.99,5.0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14962,"ZYX-PrivCo Media, LLC",kaiser,TriNet III,2023,2022-12-01,2023-11-30,0.00,0.00,0.0,0.0,...,0.0,9472.05,9166.99,0.00,567.09,504.3,0.124509,,,14986
14963,"ZYX-PrivCo Media, LLC",tufts,TriNet III,2023,2022-12-01,2023-11-30,0.00,0.00,0.0,0.0,...,0.0,315.17,314.07,0.00,567.09,504.3,0.124509,,,14987
14965,ZYZ-Head Count Inc,bsca,TriNet III,2023,2022-12-01,2023-11-30,0.00,0.00,0.0,0.0,...,0.0,1492.99,1304.98,1244.85,567.09,504.3,0.124509,,,14988
14976,ZZ7-Surfside Solutions Inc,kaiser,TriNet III,2023,2022-12-01,2023-11-30,0.00,0.00,0.0,0.0,...,0.0,1248.00,1248.00,0.00,567.09,504.3,0.124509,,,14989


In [179]:

dmca_claims_elig_tenant_c = dmca_claims_elig_c.groupby(['tenantid','year', 'start_date', 'stop_date']).agg({
    'allowed': 'sum', 
    'paid': 'sum', 
    'start_member_count':'sum', 
    'stop_member_count':'sum', 
    'start_employee_count': 'sum', 
    'stop_employee_count': 'sum', 
    'mm_months':'sum', 
    'ee_months': 'sum', 
    'total_rate': 'sum', 
    'admin': 'sum',
    'non_recurring_allowed': 'sum',
    'non_recurring_paid': 'sum',
    'allowed_adj': 'sum',
    'paid_adj': 'sum',
}).reset_index()

dmca_claims_elig_tenant_c['avg_mbrs'] = dmca_claims_elig_tenant_c['mm_months'] / 12
dmca_claims_elig_tenant_c['turnover'] = 1 - abs((dmca_claims_elig_tenant_c['stop_member_count'] - dmca_claims_elig_tenant_c['start_member_count']) / dmca_claims_elig_tenant_c['start_member_count']).clip(upper=1)
dmca_claims_elig_tenant_c['credibility'] = (0.25*dmca_claims_elig_tenant_c['turnover'] + (dmca_claims_elig_tenant_c['avg_mbrs'] - dmca_claims_elig_tenant_c['turnover'])*0.01)/(1+(dmca_claims_elig_tenant_c['avg_mbrs'] - 1)*0.01)

dmca_claims_elig_tenant_p = dmca_claims_elig_p.groupby(['tenantid','year', 'start_date', 'stop_date']).agg({
    'allowed': 'sum', 
    'paid': 'sum', 
    'start_member_count':'sum', 
    'stop_member_count':'sum', 
    'start_employee_count': 'sum', 
    'stop_employee_count': 'sum', 
    'mm_months':'sum', 
    'ee_months': 'sum', 
    'total_rate': 'sum', 
    'admin': 'sum',
    'non_recurring_allowed': 'sum',
    'non_recurring_paid': 'sum',
    'allowed_adj': 'sum',
    'paid_adj': 'sum',

}).reset_index()


dmca_claims_elig_tenant_p['avg_mbrs'] = dmca_claims_elig_tenant_p['mm_months'] / 12
dmca_claims_elig_tenant_p['turnover'] = 1 - abs((dmca_claims_elig_tenant_p['stop_member_count'] - dmca_claims_elig_tenant_p['start_member_count']) / dmca_claims_elig_tenant_p['start_member_count']).clip(upper=1)
dmca_claims_elig_tenant_p['credibility'] = (0.25*dmca_claims_elig_tenant_p['turnover'] + (dmca_claims_elig_tenant_p['avg_mbrs'] - dmca_claims_elig_tenant_p['turnover'])*0.01)/(1+(dmca_claims_elig_tenant_p['avg_mbrs'] - 1)*0.01)

dmca_claims_elig_tenant = pd.merge(dmca_claims_elig_tenant_c, dmca_claims_elig_tenant_p, how='outer', on=['tenantid'], suffixes=('_c', '_p'))
dmca_claims_elig_tenant['year_c'] = pd.to_datetime(curr_month).strftime('%Y')
dmca_claims_elig_tenant['year_p'] = pd.to_datetime(prior_month).strftime('%Y')
dmca_claims_elig_tenant['start_date_c'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
dmca_claims_elig_tenant['start_date_p'] = pd.to_datetime(start_date_prior).strftime('%Y-%m-%d')
dmca_claims_elig_tenant['stop_date_c'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')
dmca_claims_elig_tenant['stop_date_p'] = pd.to_datetime(stop_date_prior).strftime('%Y-%m-%d')
dmca_claims_elig_tenant.fillna(0, inplace=True)

dmca_claims_elig_tenant['pmpm_c'] = dmca_claims_elig_tenant['paid_adj_c'] / dmca_claims_elig_tenant['mm_months_c']
dmca_claims_elig_tenant['pmpm_p'] = dmca_claims_elig_tenant['paid_adj_p'] / dmca_claims_elig_tenant['mm_months_p']
dmca_claims_elig_tenant['bob_pmpm_c'] = pmpm_paid_bob
dmca_claims_elig_tenant['bob_pmpm_p'] = p_pmpm_paid_bob
dmca_claims_elig_tenant['bob_pmpm_trend'] = dmca_claims_elig_tenant['bob_pmpm_c'] / dmca_claims_elig_tenant['bob_pmpm_p'] - 1
dmca_claims_elig_tenant['current_cred_pmpm'] = (dmca_claims_elig_tenant['pmpm_c'] * dmca_claims_elig_tenant['credibility_c'] + dmca_claims_elig_tenant['bob_pmpm_c']* (1-dmca_claims_elig_tenant['credibility_c']))
dmca_claims_elig_tenant['prior_cred_pmpm'] = (dmca_claims_elig_tenant['pmpm_p'] * dmca_claims_elig_tenant['credibility_p'] + dmca_claims_elig_tenant['bob_pmpm_p']* (1-dmca_claims_elig_tenant['credibility_p']))
dmca_claims_elig_tenant['cred_trend'] = dmca_claims_elig_tenant['current_cred_pmpm']/dmca_claims_elig_tenant['prior_cred_pmpm'] - 1
dmca_claims_elig_tenant['potential_loss'] = round((dmca_claims_elig_tenant['paid_adj_c'].clip(lower=0) * (1-((dmca_claims_elig_tenant['bob_pmpm_trend'] + 1) / (dmca_claims_elig_tenant['cred_trend'] + 1)))),2)
dmca_claims_elig_tenant['potential_loss_pct'] = (dmca_claims_elig_tenant['potential_loss']/dmca_claims_elig_tenant['paid_adj_c'].clip(lower=0)).clip(lower=-0.5)
dmca_claims_elig_tenant['potential_loss_pct'].fillna(0, inplace=True)

#Risk Index
dmca_claims_elig_tenant['loss_pct_std_dev'] = np.std(dmca_claims_elig_tenant.potential_loss_pct.clip(lower=0))
dmca_claims_elig_tenant['pct_std_dev_ratio'] = dmca_claims_elig_tenant.potential_loss_pct/dmca_claims_elig_tenant.loss_pct_std_dev
dmca_claims_elig_tenant['loss_dol_std_dev'] = np.std(dmca_claims_elig_tenant.potential_loss.clip(lower=0))
dmca_claims_elig_tenant['dol_std_dev_ratio'] = dmca_claims_elig_tenant.potential_loss/dmca_claims_elig_tenant.loss_dol_std_dev
dmca_claims_elig_tenant['ratio_avg'] = (dmca_claims_elig_tenant.dol_std_dev_ratio + dmca_claims_elig_tenant.pct_std_dev_ratio) / 2
dmca_claims_elig_tenant['risk_index_setup'] = (2.5 + 1*dmca_claims_elig_tenant.ratio_avg).clip(upper=5)
dmca_claims_elig_tenant['risk_index'] = round(dmca_claims_elig_tenant.risk_index_setup.clip(lower=1), 1)
dmca_claims_elig_tenant.sort_values(by=['risk_index', 'potential_loss'], ascending=[False, False], inplace=True)
dmca_claims_elig_tenant['rank'] = dmca_claims_elig_tenant.reset_index().index + 1

dmca_claims_elig_tenant.drop(['pmpm_c', 'pmpm_p', 'credibility_c', 'credibility_p', 'avg_mbrs_c', 'avg_mbrs_p', 'turnover_c', 'turnover_p', 'loss_pct_std_dev', 'pct_std_dev_ratio', 'loss_dol_std_dev', 'dol_std_dev_ratio', 'ratio_avg', 'risk_index_setup', 'current_cred_pmpm', 'prior_cred_pmpm', 'cred_trend', 'potential_loss_pct'], axis=1, inplace=True)

dmca_claims_elig_tenant = dmca_claims_elig_tenant[['tenantid', 'risk_index', 'rank']]

# dmca_claims_elig_tenant = dmca_claims_elig_tenant[dmca_claims_elig_tenant['tenantid'] == 'U9J-SOPHiA GENETICS, Inc']
dmca_claims_elig_tenant


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dmca_claims_elig_tenant['potential_loss_pct'].fillna(0, inplace=True)


Unnamed: 0,tenantid,risk_index,rank
2585,"AR0-MariaDB USA, Inc",5.0,1
6043,SOJ-Payward Inc,5.0,2
3578,GQ2-Soroban Capital Partners LP,5.0,3
2723,"BTZ-Spinnaker Services, LLC",5.0,4
5191,NY6-Brandywine Group Advisors Inc,5.0,5
...,...,...,...
4522,KRA-Reltio Inc,1.0,8649
3634,GW8-Steward Partners Global Adviso,1.0,8650
1172,13A2-MTI Solutions Inc,,8651
7222,W4W-Rockin Jump San Diego,,8652


In [180]:
final = pd.merge(dmca_claims_elig, dmca_claims_elig_tenant, how='left', on=['tenantid'], suffixes=('_carrier', '_tenant'))
final['business_risk_tenant'] = final['risk_index_tenant'].apply(lambda x: 'High' if x >= 4.0 else 'Medium' if x >= 2.5 else 'Low')
final['business_risk_carrier'] = final['risk_index_carrier'].apply(lambda x: 'High' if x >= 4.0 else 'Medium' if x >= 2.5 else 'Low')
final['group_size'] = final['stop_member_count_c'].apply(lambda x: '<100' if x < 100 else '100-500' if x < 500 else '500+')
final['group_size_tenant'] = final.groupby(['tenantid'])['stop_member_count_c'].transform('sum').apply(lambda x: '<100' if x < 100 else '100-500' if x < 500 else '500+')
final.sort_values(by=['risk_index_tenant', 'potential_loss'], ascending=[False, False], inplace=True)

final.to_csv(f'dmca_claims_elig_{curr_month.replace("-", "_")}.csv', index=False)
final

Unnamed: 0,tenantid,carrier,exchange,year_c,start_date_c,stop_date_c,allowed_c,paid_c,start_member_count_c,stop_member_count_c,...,bob_pmpm_trend,potential_loss,risk_index_carrier,rank_carrier,risk_index_tenant,rank_tenant,business_risk_tenant,business_risk_carrier,group_size,group_size_tenant
0,"AR0-MariaDB USA, Inc",aetna,TriNet III,2023,2022-12-01,2023-11-30,3537551.04,3388465.99,151.0,76.0,...,0.124509,2037192.68,5.0,1,5.0,1,High,High,<100,100-500
1,D7P-Zapier,bsca,TriNet III,2023,2022-12-01,2023-11-30,3776581.74,3474694.61,298.0,298.0,...,0.124509,1368365.54,5.0,2,5.0,6,High,High,100-500,500+
2,GQ2-Soroban Capital Partners LP,uhc,TriNet IV,2023,2022-12-01,2023-11-30,2422607.10,2348088.83,99.0,100.0,...,0.124509,1302820.12,5.0,3,5.0,3,High,High,100-500,100-500
3,B50-Prolacta Bioscience,bsca,TriNet III,2023,2022-12-01,2023-11-30,3925154.07,3616392.61,373.0,378.0,...,0.124509,1154712.66,5.0,4,5.0,7,High,High,100-500,500+
4,V8T-Assured Inc,aetna,TriNet III,2023,2022-12-01,2023-11-30,1700951.48,1640217.91,37.0,38.0,...,0.124509,1090168.99,5.0,5,5.0,12,High,High,<100,<100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14950,ZM2-Very Great Inc,uhc,TriNet III,2023,2022-12-01,2023-11-30,0.00,0.00,0.0,0.0,...,0.124509,,,14951,1.0,8277,Low,Low,<100,<100
14977,ZRN-Circles,bsca,TriNet III,2023,2022-12-01,2023-11-30,0.00,0.00,0.0,0.0,...,0.124509,,,14978,1.0,8593,Low,Low,<100,100-500
12955,13A2-MTI Solutions Inc,kaiser,TriNet III,2023,2022-12-01,2023-11-30,551.00,278.00,1.0,1.0,...,0.124509,,,12956,,8651,Low,Low,<100,<100
14619,W4W-Rockin Jump San Diego,aetna,TriNet II,2023,2022-12-01,2023-11-30,1273.84,716.12,1.0,2.0,...,0.124509,,,14620,,8652,Low,Low,<100,<100


### Level 1

In [181]:
direction_dict_1 = {'True': 'increased', 'False': 'decreased'}
direction_dict_2 = {'True': 'above', 'False': 'below'}
direction_dict_3 = {'True': 'higher', 'False': 'lower'}
direction_dict_4 = {'True': 'an excess', 'False': 'a reduced'}
metric_dict_1 = {'per1000_change': 'per 1000 rate', 'pmpm_change': 'PMPM'}
class_dict_1 = {'Catastrophic': 'catastrophic', 'Acute': 'acute', 'Chronic Condition': 'chronic'}
group_count_dict_1 = {'1': 'specifically', '2': 'including'}

In [182]:
pmpm_agg_tenant_c = bob_current.copy()
pmpm_agg_tenant_c = pmpm_agg_tenant_c.groupby(['tenantid']).agg({'paid': 'sum', 'admin': 'sum', 'mm_months': 'sum', 'ee_months': 'sum', 'total_rate': 'sum'}).reset_index()
pmpm_agg_tenant_c['pmpm_c'] = (pmpm_agg_tenant_c['paid'] + pmpm_agg_tenant_c['admin']) / pmpm_agg_tenant_c['mm_months'] # Current PMPM by tenant
pmpm_agg_tenant_c['pepm_c'] = (pmpm_agg_tenant_c['paid'] + pmpm_agg_tenant_c['admin']) / pmpm_agg_tenant_c['ee_months'] # Current PEPM by tenant
pmpm_agg_tenant_c['TCR_c'] = (pmpm_agg_tenant_c['paid'] + pmpm_agg_tenant_c['admin']) / pmpm_agg_tenant_c['total_rate'] # Current TCR by tenant
pmpm_agg_tenant_c = pmpm_agg_tenant_c[['tenantid', 'paid', 'mm_months', 'pmpm_c', 'pepm_c', 'TCR_c']]

pmpm_agg_tenant_p = bob_prior.copy()
pmpm_agg_tenant_p = pmpm_agg_tenant_p.groupby(['tenantid']).agg({'paid': 'sum', 'admin': 'sum', 'mm_months': 'sum', 'ee_months': 'sum', 'total_rate': 'sum'}).reset_index()
pmpm_agg_tenant_p['pmpm_p'] = (pmpm_agg_tenant_p['paid'] + pmpm_agg_tenant_p['admin']) / pmpm_agg_tenant_p['mm_months'] # Current PMPM by tenant
pmpm_agg_tenant_p['pepm_p'] = (pmpm_agg_tenant_p['paid'] + pmpm_agg_tenant_p['admin']) / pmpm_agg_tenant_p['ee_months'] # Current PEPM by tenant
pmpm_agg_tenant_p['TCR_p'] = (pmpm_agg_tenant_p['paid'] + pmpm_agg_tenant_p['admin']) / pmpm_agg_tenant_p['total_rate'] # Current TCR by tenant
pmpm_agg_tenant_p = pmpm_agg_tenant_p[['tenantid', 'paid', 'mm_months', 'pmpm_p', 'pepm_p', 'TCR_p']]

pmpm_agg_tenant = pd.merge(pmpm_agg_tenant_c, pmpm_agg_tenant_p, how='outer', on=['tenantid'], suffixes=('_c','_p')) # Merge current and prior pmpm df
pmpm_agg_tenant.fillna(0, inplace=True)
pmpm_agg_tenant['pmpm_change_t'] = pmpm_agg_tenant['pmpm_c'] / pmpm_agg_tenant['pmpm_p'] -1 # Calculate pmpm change by tenant
pmpm_agg_tenant['pepm_change_t'] = pmpm_agg_tenant['pepm_c'] / pmpm_agg_tenant['pepm_p'] -1 # Calculate pepm change by tenant
pmpm_agg_tenant['TCR_change_t'] = pmpm_agg_tenant['TCR_c'] / pmpm_agg_tenant['TCR_p'] -1 # Calculate TCR change by tenant

pmpm_agg_tenant['members_change_t'] = pmpm_agg_tenant['mm_months_c'] / pmpm_agg_tenant['mm_months_p'] -1 # Calculate member change by tenant
pmpm_agg_tenant['direction'] = pmpm_agg_tenant['pmpm_change_t'] > 0 # Determines if the PMPM has increased or decreased compared to prior period
pmpm_agg_tenant.replace([np.inf, -np.inf], 1, inplace=True)


pmpm_agg_tenant

Unnamed: 0,tenantid,paid_c,mm_months_c,pmpm_c,pepm_c,TCR_c,paid_p,mm_months_p,pmpm_p,pepm_p,TCR_p,pmpm_change_t,pepm_change_t,TCR_change_t,members_change_t,direction
0,100F-New York City Industrial T,39611.69,178,306.115393,767.444225,0.231752,59973.03,179.0,418.155754,972.076364,0.297960,-0.267939,-0.210510,-0.222203,-0.005587,False
1,100G-Iron Park Capital Partners,531289.89,424,1410.948113,3519.070588,1.374658,502525.49,862.0,660.646868,1551.710082,0.651177,1.135707,1.267866,1.111036,-0.508121,True
2,100J-Tamares Management LLC,93513.08,255,478.629843,1271.360521,0.472303,183930.75,262.0,810.947634,1985.684860,0.965098,-0.409789,-0.359737,-0.510617,-0.026718,False
3,100M-Wellness Insight Technologies,2485.33,36,267.115000,400.672500,0.392816,23990.80,62.0,501.961452,723.758372,0.738109,-0.467858,-0.446400,-0.467807,-0.419355,False
4,"100P-Walking Fish Therapeutics, I",407821.10,730,622.735644,1238.683978,0.526353,194794.77,384.0,629.090339,1123.584605,0.536534,-0.010101,0.102439,-0.018976,0.901042,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8648,ZZK-Altman Inc,112010.38,60,1943.465000,9717.325000,6.164877,12411.84,79.0,215.308354,548.689032,0.708236,8.026426,16.710077,7.704553,-0.240506,True
8649,ZZM-Contextant LLC June 19,26521.53,144,242.420972,872.715500,0.740123,8594.71,111.0,152.989189,628.955556,0.493198,0.584563,0.387563,0.500660,0.297297,True
8650,"ZZN-Vibliome Operations, LLC",46975.75,259,275.989730,610.951624,0.371002,40237.58,216.0,299.736898,674.408021,0.474059,-0.079227,-0.094092,-0.217392,0.199074,False
8651,ZZV-Purple Fete LLC,4345.70,56,191.684107,219.067551,0.226429,610.40,21.0,333.286190,333.286190,0.426301,-0.424866,-0.342704,-0.468852,1.666667,False


In [183]:
# Dynamic statement build
pmpm_agg_tenant['direction_pmpm_bob'] = pmpm_agg_tenant.pmpm_c > pmpm_bob # Determines if the PMPM is above or below the bob
pmpm_agg_tenant['direction_pepm_bob'] = pmpm_agg_tenant.pepm_c > pepm_bob # Determines if the PMPM is above or below the bob
pmpm_agg_tenant['direction_TCR_bob'] = pmpm_agg_tenant.TCR_c > TCR_bob # Determines if the PMPM is above or below the bob
pmpm_agg_tenant['pmpm_change_t'] = (round(pmpm_agg_tenant['pmpm_change_t']*100)).astype(str).str[:-2] + "%" # Convert the pmpm_change_t to string percentage
pmpm_agg_tenant['pepm_change_t'] = (round(pmpm_agg_tenant['pepm_change_t']*100)).astype(str).str[:-2] + "%" # Convert the pmpm_change_t to string percentage
pmpm_agg_tenant['TCR_change_t'] = (round(pmpm_agg_tenant['TCR_change_t']*100)).astype(str).str[:-2] + "%" # Convert the pmpm_change_t to string percentage
pmpm_agg_tenant['pmpm_to_bob'] = round((pmpm_agg_tenant.pmpm_c/pmpm_bob -1)*100).astype(str).str[:-2] + "%" # Convert the pmpm_to_bob to string percentage
pmpm_agg_tenant['pepm_to_bob'] = round((pmpm_agg_tenant.pepm_c/pepm_bob -1)*100).astype(str).str[:-2] + "%" # Convert the pmpm_to_bob to string percentage
pmpm_agg_tenant['TCR_to_bob'] = round((pmpm_agg_tenant.TCR_c/TCR_bob -1)*100).astype(str).str[:-2] + "%" # Convert the pmpm_to_bob to string percentage
pmpm_agg_tenant['l1_statement_pmpm_1'] = ("On a paid date basis, " + pmpm_agg_tenant['tenantid'] + "'s paid PMPM " + pmpm_agg_tenant['direction'].astype(str).map(direction_dict_1) + " " + pmpm_agg_tenant['pmpm_change_t'] + " YoY, from " + "$" + round(pmpm_agg_tenant['pmpm_p']).astype(str).str[:-2] + " to $" + round(pmpm_agg_tenant['pmpm_c']).astype(str).str[:-2])
pmpm_agg_tenant['l1_statement_pmpm_2'] = ("Compared to benchmarks, PMPMs are " + pmpm_agg_tenant.pmpm_to_bob + " " + pmpm_agg_tenant['direction_pmpm_bob'].astype(str).map(direction_dict_3) +  " than average.")
pmpm_agg_tenant['l1_statement_pepm_1'] = ("On a paid date basis, " + pmpm_agg_tenant['tenantid'] + "'s paid PEPM " + pmpm_agg_tenant['direction'].astype(str).map(direction_dict_1) + " " + pmpm_agg_tenant['pepm_change_t'] + " YoY, from " + "$" + round(pmpm_agg_tenant['pepm_p']).astype(str).str[:-2] + " to $" + round(pmpm_agg_tenant['pepm_c']).astype(str).str[:-2])
pmpm_agg_tenant['l1_statement_pepm_2'] = ("Compared to benchmarks, PEPMs are " + pmpm_agg_tenant.pepm_to_bob + " " + pmpm_agg_tenant['direction_pepm_bob'].astype(str).map(direction_dict_3) +  " than average.")
pmpm_agg_tenant['l1_statement_TCR_1'] = ("On a paid date basis, " + pmpm_agg_tenant['tenantid'] + "'s TCR " + pmpm_agg_tenant['direction'].astype(str).map(direction_dict_1) + " " + pmpm_agg_tenant['TCR_change_t'] + " YoY, from " + "$" + round(pmpm_agg_tenant['pmpm_p']).astype(str).str[:-2] + " to $" + round(pmpm_agg_tenant['pmpm_c']).astype(str).str[:-2])
pmpm_agg_tenant['l1_statement_TCR_2'] = ("Compared to benchmarks, TCR is " + pmpm_agg_tenant.pmpm_to_bob + " " + pmpm_agg_tenant['direction_TCR_bob'].astype(str).map(direction_dict_3) +  " than average.")

pmpm_agg_tenant

Unnamed: 0,tenantid,paid_c,mm_months_c,pmpm_c,pepm_c,TCR_c,paid_p,mm_months_p,pmpm_p,pepm_p,...,direction_TCR_bob,pmpm_to_bob,pepm_to_bob,TCR_to_bob,l1_statement_pmpm_1,l1_statement_pmpm_2,l1_statement_pepm_1,l1_statement_pepm_2,l1_statement_TCR_1,l1_statement_TCR_2
0,100F-New York City Industrial T,39611.69,178,306.115393,767.444225,0.231752,59973.03,179.0,418.155754,972.076364,...,False,-54%,-43%,-75%,"On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, PMPMs are -54% lower t...","On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, PEPMs are -43% lower t...","On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, TCR is -54% lower than..."
1,100G-Iron Park Capital Partners,531289.89,424,1410.948113,3519.070588,1.374658,502525.49,862.0,660.646868,1551.710082,...,True,112%,163%,46%,"On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, PMPMs are 112% higher ...","On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, PEPMs are 163% higher ...","On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, TCR is 112% higher tha..."
2,100J-Tamares Management LLC,93513.08,255,478.629843,1271.360521,0.472303,183930.75,262.0,810.947634,1985.684860,...,False,-28%,-5%,-50%,"On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, PMPMs are -28% lower t...","On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, PEPMs are -5% lower th...","On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, TCR is -28% lower than..."
3,100M-Wellness Insight Technologies,2485.33,36,267.115000,400.672500,0.392816,23990.80,62.0,501.961452,723.758372,...,False,-60%,-70%,-58%,"On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, PMPMs are -60% lower t...","On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, PEPMs are -70% lower t...","On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, TCR is -60% lower than..."
4,"100P-Walking Fish Therapeutics, I",407821.10,730,622.735644,1238.683978,0.526353,194794.77,384.0,629.090339,1123.584605,...,False,-6%,-7%,-44%,"On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, PMPMs are -6% lower th...","On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, PEPMs are -7% lower th...","On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, TCR is -6% lower than ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8648,ZZK-Altman Inc,112010.38,60,1943.465000,9717.325000,6.164877,12411.84,79.0,215.308354,548.689032,...,True,193%,628%,556%,"On a paid date basis, ZZK-Altman Inc's paid PM...","Compared to benchmarks, PMPMs are 193% higher ...","On a paid date basis, ZZK-Altman Inc's paid PE...","Compared to benchmarks, PEPMs are 628% higher ...","On a paid date basis, ZZK-Altman Inc's TCR inc...","Compared to benchmarks, TCR is 193% higher tha..."
8649,ZZM-Contextant LLC June 19,26521.53,144,242.420972,872.715500,0.740123,8594.71,111.0,152.989189,628.955556,...,False,-63%,-35%,-21%,"On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, PMPMs are -63% lower t...","On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, PEPMs are -35% lower t...","On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, TCR is -63% lower than..."
8650,"ZZN-Vibliome Operations, LLC",46975.75,259,275.989730,610.951624,0.371002,40237.58,216.0,299.736898,674.408021,...,False,-58%,-54%,-61%,"On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, PMPMs are -58% lower t...","On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, PEPMs are -54% lower t...","On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, TCR is -58% lower than..."
8651,ZZV-Purple Fete LLC,4345.70,56,191.684107,219.067551,0.226429,610.40,21.0,333.286190,333.286190,...,False,-71%,-84%,-76%,"On a paid date basis, ZZV-Purple Fete LLC's pa...","Compared to benchmarks, PMPMs are -71% lower t...","On a paid date basis, ZZV-Purple Fete LLC's pa...","Compared to benchmarks, PEPMs are -84% lower t...","On a paid date basis, ZZV-Purple Fete LLC's TC...","Compared to benchmarks, TCR is -71% lower than..."


### Level 2

In [184]:
# BoB PMPMs by Class
pmpm_class_bob_c = pd.DataFrame(pmpm_agg_c.groupby(['class'])['paid'].sum()).reset_index()
pmpm_class_members_c = pmpm_agg_tenant_c[['tenantid', 'mm_months']]
pmpm_class_members_p = pmpm_agg_tenant_c[['tenantid', 'mm_months']]
# current member count
pmpm_class_bob_c['members_c'] = pmpm_class_members_c['mm_months'].sum()
pmpm_class_bob_c['bob_class_pmpm'] = pmpm_class_bob_c['paid'] / pmpm_class_bob_c['members_c']
pmpm_class_bob_c = pmpm_class_bob_c.iloc[:, [0, 3]]

# BoB Util by Class
per1000_class_c = pd.DataFrame(pmpm_agg_c.groupby(['class']).agg({'counts': 'sum'})).reset_index()
per1000_class_claimants_c = (pd.DataFrame(pmpm_agg_c.groupby(['tenantid', 'class']).agg({'claimants': 'first'})).reset_index().
                             groupby(['class']).agg({'claimants': 'sum'}).reset_index())
per1000_class_c = pd.merge(per1000_class_c, per1000_class_claimants_c, how='left', on=['class'])  # Merge
per1000_class_c['members_c'] = pmpm_class_members_c['mm_months'].sum()
per1000_class_c['per1000claims_bob'] = per1000_class_c['counts'] / (per1000_class_c['members_c'] / 1000)
per1000_class_c['per1000claimants_bob'] = per1000_class_c['claimants'] / (per1000_class_c['members_c'] / 1000)
per1000_class_c = per1000_class_c.iloc[:, [0, 4, 5]]

# LEVEL 2 Comparison Table
pmpm_agg_class_c = pd.DataFrame(pmpm_agg_c.groupby(['tenantid', 'class']).agg({'paid': 'sum', 'claimants': 'first', 'counts': 'sum'})).reset_index()
pmpm_agg_class_c = pd.merge(pmpm_agg_class_c, pmpm_class_members_c, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_class_c['pmpm'] = pmpm_agg_class_c['paid'] / pmpm_agg_class_c['mm_months']
pmpm_agg_class_c['per1000claims'] = pmpm_agg_class_c['counts'] / (pmpm_agg_class_c['mm_months'] / 1000)
pmpm_agg_class_c['per1000claimants'] = pmpm_agg_class_c['claimants'] / (pmpm_agg_class_c['mm_months'] / 1000)
total_member_count_c = pmpm_class_members_c['mm_months'].sum()

pmpm_agg_class_p = pd.DataFrame(pmpm_agg_p.groupby(['tenantid', 'class']).agg({'paid': 'sum', 'claimants': 'first', 'counts': 'sum'})).reset_index()
pmpm_agg_class_p = pd.merge(pmpm_agg_class_p, pmpm_class_members_p, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_class_p['pmpm'] = pmpm_agg_class_p['paid'] / pmpm_agg_class_p['mm_months']
pmpm_agg_class_p['per1000claims'] = pmpm_agg_class_p['counts'] / (pmpm_agg_class_p['mm_months'] / 1000)
pmpm_agg_class_p['per1000claimants'] = pmpm_agg_class_p['claimants'] / (pmpm_agg_class_p['mm_months'] / 1000)
total_member_count_p = pmpm_class_members_p['mm_months'].sum()

pmpm_agg_class = pd.merge(pmpm_agg_class_c, pmpm_agg_class_p, how='outer', on=['tenantid', 'class'], suffixes=('_c', '_p'))  # Merge current and prior pmpm df
pmpm_agg_class = pd.merge(pmpm_agg_class, per1000_class_c, how='left', on=['class'])  # Merge current and prior pmpm df

# Create % Change Field
pmpm_agg_class.fillna(0, inplace=True)
pmpm_agg_class['pmpm_change'] = pmpm_agg_class['pmpm_c'] / pmpm_agg_class['pmpm_p'] - 1
pmpm_agg_class['per1000claims_change'] = pmpm_agg_class['per1000claims_c'] / pmpm_agg_class['per1000claims_p'] - 1
pmpm_agg_class['per1000claimants_change'] = pmpm_agg_class['per1000claimants_c'] / pmpm_agg_class['per1000claimants_p'] - 1
pmpm_agg_class['paid_diff'] = pmpm_agg_class['paid_c'] - pmpm_agg_class['paid_p']

pmpm_agg_class['year'] = pd.to_datetime(curr_month).strftime('%Y')
pmpm_agg_class['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
pmpm_agg_class['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')

pmpm_agg_class.to_csv(f'Outputs/Level_22_{curr_month.replace("-", "_")}.csv', index=False)
pmpm_agg_class

Unnamed: 0,tenantid,class,paid_c,claimants_c,counts_c,mm_months_c,pmpm_c,per1000claims_c,per1000claimants_c,paid_p,...,per1000claimants_p,per1000claims_bob,per1000claimants_bob,pmpm_change,per1000claims_change,per1000claimants_change,paid_diff,year,start_date,stop_date
0,100F-New York City Industrial T,Acute,11305.67,6.0,97.0,178.0,63.515000,544.943820,33.707865,26142.24,...,73.033708,506.977520,9.907539,-0.567532,-0.259542,-0.538462,-14836.57,2023,2022-12-01,2023-11-30
1,100F-New York City Industrial T,Chronic Condition,28306.02,1.0,303.0,178.0,159.022584,1702.247191,5.617978,33830.79,...,11.235955,1353.478165,3.387718,-0.163306,0.578125,-0.500000,-5524.77,2023,2022-12-01,2023-11-30
2,100G-Iron Park Capital Partners,Acute,127701.65,1.0,240.0,424.0,301.183137,566.037736,2.358491,107192.06,...,2.358491,506.977520,9.907539,0.191335,-0.517103,0.000000,20509.59,2023,2022-12-01,2023-11-30
3,100G-Iron Park Capital Partners,Chronic Condition,403588.24,1.0,921.0,424.0,951.859057,2172.169811,2.358491,395333.43,...,2.358491,1353.478165,3.387718,0.020881,-0.284938,0.000000,8254.81,2023,2022-12-01,2023-11-30
4,100J-Tamares Management LLC,Acute,23367.45,11.0,99.0,255.0,91.637059,388.235294,43.137255,7938.93,...,47.058824,506.977520,9.907539,1.943400,-0.048077,-0.083333,15428.52,2023,2022-12-01,2023-11-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19216,"ZZN-Vibliome Operations, LLC",Chronic Condition,17299.63,1.0,179.0,259.0,66.793938,691.119691,3.861004,24567.35,...,3.861004,1353.478165,3.387718,-0.295828,0.808081,0.000000,-7267.72,2023,2022-12-01,2023-11-30
19217,ZZV-Purple Fete LLC,Acute,3381.40,4.0,41.0,56.0,60.382143,732.142857,71.428571,610.40,...,35.714286,506.977520,9.907539,4.539646,0.708333,1.000000,2771.00,2023,2022-12-01,2023-11-30
19218,ZZV-Purple Fete LLC,Chronic Condition,964.30,1.0,21.0,56.0,17.219643,375.000000,17.857143,0.00,...,0.000000,1353.478165,3.387718,inf,inf,inf,964.30,2023,2022-12-01,2023-11-30
19219,"ZZY-Monzo, Inc",Acute,186849.44,1.0,370.0,474.0,394.197131,780.590717,2.109705,40205.26,...,21.097046,506.977520,9.907539,3.647388,0.209150,-0.900000,146644.18,2023,2022-12-01,2023-11-30


In [185]:
# Create table to determine largest driver
pmpm_agg_class_skinny = pmpm_agg_class.iloc[:, [0, 1, 21, 6, 13, 18, 7, 14,19, 8, 15,20]]

pmpm_agg_class_skinny = pd.DataFrame(pmpm_agg_class_skinny.set_index(['tenantid', 'class', 'paid_diff', 'pmpm_c', 'pmpm_p', 'per1000claims_c', 'per1000claims_p', 'per1000claimants_c', 'per1000claimants_p']).stack()).rename(columns={0: "per_change"}).reset_index()
pmpm_agg_class_skinny['per_change'] = pmpm_agg_class_skinny.per_change.replace([np.inf, -np.inf], 1)  # This replaces per_change Infinity instances to 100% change

pmpm_agg_class_skinny = pmpm_agg_class_skinny.sort_values(['tenantid', 'paid_diff', 'per_change'],ascending=[True, False, False]).groupby(['tenantid', 'class']).head()
pmpm_agg_class_skinny.rename(columns={'level_9': 'level_2'}, inplace=True)
pmpm_agg_class_skinny = pmpm_agg_tenant[['tenantid', 'direction']].merge(pmpm_agg_class_skinny,how='outer')  # Pull in overall PMPM trend for sorting

pmpm_agg_class_skinny['sort'] = pmpm_agg_class_skinny.sort_values(['tenantid', 'paid_diff', 'per_change'], ascending=[True, False, False]).groupby(['tenantid']).cumcount() + 1  # Partition and create row numbers for sorting based on tenant's overall pmpm trend
pmpm_agg_class_skinny['sort2'] = np.where(pmpm_agg_class_skinny['direction'] == False, (-pmpm_agg_class_skinny['sort']), pmpm_agg_class_skinny['sort'])  # If direction = false, then make negative
pmpm_agg_class_skinny = pmpm_agg_class_skinny.sort_values(['tenantid', 'sort2'], ascending=True)  # Sort by ascending
pmpm_agg_class_skinny = pmpm_agg_class_skinny.groupby(['tenantid', 'class']).nth([0])  # Filter for first row only per partition
pmpm_agg_class_skinny = pd.merge(pmpm_agg_class_skinny, pmpm_class_bob_c, how='left', on='class')
pmpm_agg_class_skinny = pd.merge(pmpm_agg_class_skinny, per1000_class_c, how='left', on='class')
pmpm_agg_class_skinny_1 = pmpm_agg_class_skinny.groupby(['tenantid']).nth(0)  # Filter for first row only per partition
pmpm_agg_class_skinny_2 = pmpm_agg_class_skinny.groupby(['tenantid']).nth(1)  # Filter for first row only per partition
pmpm_agg_class_skinny_3 = pmpm_agg_class_skinny.groupby(['tenantid']).nth(2)  # Filter for first row only per partition
pmpm_agg_class_skinny_1 = pmpm_agg_class_skinny_1.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11]]  # Clean up
pmpm_agg_class_skinny_2 = pmpm_agg_class_skinny_2.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11]]  # Clean up
pmpm_agg_class_skinny_3 = pmpm_agg_class_skinny_3.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11]]  # Clean up
pmpm_agg_class = pd.merge(pmpm_agg_class_skinny_1, pmpm_agg_class_skinny_2, how='left', on=['tenantid'], suffixes=('_1', '_2'))  # Final table creation
pmpm_agg_class_skinny_3 = pmpm_agg_class_skinny_3.add_suffix('_3')
pmpm_agg_class_skinny_3.rename(columns={'tenantid_3': 'tenantid', 'carrier_3': 'carrier', 'exchange_3': 'exchange'}, inplace=True)
pmpm_agg_class = pd.merge(pmpm_agg_class, pmpm_agg_class_skinny_3, how='left', on=['tenantid'])
pmpm_agg_class.fillna(0, inplace=True)

pmpm_agg_class

Unnamed: 0,tenantid,direction_1,class_1,paid_diff_1,pmpm_c_1,pmpm_p_1,per1000claims_c_1,per1000claims_p_1,per1000claimants_c_1,per1000claimants_p_1,...,class_3,paid_diff_3,pmpm_c_3,pmpm_p_3,per1000claims_c_3,per1000claims_p_3,per1000claimants_c_3,per1000claimants_p_3,level_2_3,per_change_3
0,100F-New York City Industrial T,False,Acute,-14836.57,63.515000,146.866517,544.943820,735.955056,33.707865,73.033708,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
1,100G-Iron Park Capital Partners,True,Acute,20509.59,301.183137,252.811462,566.037736,1172.169811,2.358491,2.358491,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
2,100J-Tamares Management LLC,False,Catastrophic,-106548.84,0.000000,417.838588,0.000000,94.117647,0.000000,3.921569,...,Acute,15428.52,91.637059,31.133059,388.235294,407.843137,43.137255,47.058824,per1000claimants_change,-0.083333
3,100M-Wellness Insight Technologies,False,Chronic Condition,-22948.81,10.589722,648.056667,27.777778,2416.666667,27.777778,27.777778,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
4,"100P-Walking Fish Therapeutics, I",False,Acute,-8931.86,103.794671,116.030096,730.136986,908.219178,5.479452,24.657534,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8648,ZZK-Altman Inc,True,Chronic Condition,106835.23,1796.261000,15.673833,1833.333333,250.000000,16.666667,16.666667,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
8649,ZZM-Contextant LLC June 19,True,Chronic Condition,12313.22,96.779931,11.271458,395.833333,76.388889,6.944444,6.944444,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
8650,"ZZN-Vibliome Operations, LLC",False,Chronic Condition,-7267.72,66.793938,94.854633,691.119691,382.239382,3.861004,3.861004,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000
8651,ZZV-Purple Fete LLC,False,Chronic Condition,964.30,17.219643,0.000000,375.000000,0.000000,17.857143,0.000000,...,0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.000000


In [186]:
# Dynamic statement build

pmpm_agg_class_skinny = pd.merge(pmpm_agg_class_skinny, pmpm_agg_class[['tenantid', 'class_1', 'per_change_1', 'class_2', 'per_change_2', 'class_3', 'per_change_3']], how='left',on=['tenantid'])
pmpm_agg_class_skinny['direction_class_1'] = pmpm_agg_class_skinny.per_change_1 > 0
pmpm_agg_class_skinny['direction_class_2'] = pmpm_agg_class_skinny.per_change_2 > 0
pmpm_agg_class_skinny['direction_class_3'] = pmpm_agg_class_skinny.per_change_3 > 0

pmpm_agg_class_skinny['per_change_1'] = (round(pmpm_agg_class_skinny['per_change_1'] * 100)).astype(str).str[:-2] + "%"
pmpm_agg_class_skinny['per_change_2'] = (round(pmpm_agg_class_skinny['per_change_2'] * 100)).astype(str).str[:-2] + "%"
pmpm_agg_class_skinny['per_change_3'] = (round(pmpm_agg_class_skinny['per_change_3'] * 100)).astype(str).str[:-2] + "%"
# 
pmpm_agg_class_skinny['l2_statement_1'] = ("From prior to current " + pmpm_agg_class_skinny['class_1'] + " PMPMs " + pmpm_agg_class_skinny['direction_class_1'].astype(str).map(direction_dict_1) + " " + pmpm_agg_class_skinny['per_change_1'])
pmpm_agg_class_skinny['l2_statement_2'] = ( np.where(pmpm_agg_class_skinny['class_2'] != 0,(", " +pmpm_agg_class_skinny['class_2'].astype(str) + " PMPMs " + pmpm_agg_class_skinny['direction_class_3'].astype(str).map(direction_dict_1) + " " + pmpm_agg_class_skinny['per_change_3'].astype(str)),''))
pmpm_agg_class_skinny['l2_statement_3'] = ( np.where(pmpm_agg_class_skinny['class_3'] != 0,(", and " +pmpm_agg_class_skinny['class_3'].astype(str) + " PMPMs " + pmpm_agg_class_skinny['direction_class_3'].astype(str).map(direction_dict_1) + " " + pmpm_agg_class_skinny['per_change_3'].astype(str)),''))
pmpm_agg_class_skinny['l2_statement'] = pmpm_agg_class_skinny['l2_statement_1'] + pmpm_agg_class_skinny['l2_statement_2'] + pmpm_agg_class_skinny['l2_statement_3']

pmpm_agg_class_skinny['year'] = pd.to_datetime(curr_month).strftime('%Y')
pmpm_agg_class_skinny['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
pmpm_agg_class_skinny['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')

pmpm_agg_class_skinny.to_csv(f'Outputs/Level_2_{curr_month.replace("-", "_")}.csv', index=False)


### Level 3

In [187]:
# LEVEL 3 Table
pmpm_agg_group_c = pd.DataFrame(pmpm_agg_c.groupby(['tenantid', 'class', 'group']).agg({'paid': 'sum'})).reset_index()
pmpm_agg_group_c = pd.merge(pmpm_agg_group_c, pmpm_class_members_c, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_group_c['pmpm'] = pmpm_agg_group_c['paid'] / pmpm_agg_group_c['mm_months']

pmpm_agg_group_p = pd.DataFrame(pmpm_agg_p.groupby(['tenantid', 'class', 'group'])['paid'].sum()).reset_index()
pmpm_agg_group_p = pd.merge(pmpm_agg_group_p, pmpm_class_members_p, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_group_p['pmpm'] = pmpm_agg_group_p['paid'] / pmpm_agg_group_p['mm_months']
# 
pmpm_agg_group = pd.merge(pmpm_agg_group_c, pmpm_agg_group_p, how='outer', on=['tenantid', 'class', 'group'], suffixes=('_c','_p'))  # # Merge current and prior pmpm df
pmpm_agg_group = pmpm_agg_group.iloc[:, [0, 1, 2, 5, 8]]  # Reordering columns
pmpm_agg_group.fillna(0, inplace=True)
# # pmpm_agg_class_only = pmpm_agg_class.filter(['tenantid', 'class'], axis=1)  # Get list of class drivers for each tenantid
# # pmpm_agg_group = pd.merge(pmpm_agg_group, pmpm_agg_class_only, how='inner', on=['tenantid', 'class'])  # Filter for class driver only per tenantid
# pmpm_agg_group[["pmpm_c", "pmpm_p"]] = pmpm_agg_group[["pmpm_c", "pmpm_p"]].fillna(0)  # Force NaN to 0
pmpm_agg_group['per_change'] = pmpm_agg_group['pmpm_c'] / pmpm_agg_group['pmpm_p'] - 1  # Calculate the PMPM percent change
pmpm_agg_group = pmpm_agg_group.replace([np.inf], 1)  # Percent change from 0 to something forced to 1
pmpm_agg_group['num_change'] = (pmpm_agg_group['pmpm_c'] - pmpm_agg_group['pmpm_p']).astype(float)  # Calculate the PMPM change
pmpm_agg_group = pmpm_agg_tenant[['tenantid', 'direction']].merge(pmpm_agg_group, how='outer')  # Pull in overall PMPM trend for sorting
pmpm_agg_group['sort'] = pmpm_agg_group.sort_values(['tenantid', 'num_change'], ascending=False).groupby(['tenantid']).cumcount() + 1  # Partition and create row numbers for sorting based on tenant's overall pmpm trend
pmpm_agg_group = pmpm_agg_group.sort_values(['tenantid', 'sort'])  # Ensures proper sorting
pmpm_agg_group['sort2'] = np.where(pmpm_agg_group['direction'] == False, (-pmpm_agg_group['sort']), pmpm_agg_group['sort'])  # If direction = false, then make negative
pmpm_agg_group = pmpm_agg_group.sort_values(['tenantid', 'sort2'], ascending=True)  # Sort by ascending
pmpm_agg_group['drop'] = ((pmpm_agg_group['direction'] == False) & (pmpm_agg_group['num_change'] > 0)) | ((pmpm_agg_group['direction'] == True) & (pmpm_agg_group['num_change'] < 0))  # Identify instances where change is neg. when overall change is positive (vice-versa)
pmpm_agg_group = pmpm_agg_group.loc[pmpm_agg_group['drop'] == False]
pmpm_agg_group = pmpm_agg_group.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7]]  # Clean up

# Selecting top 3 drivers
pmpm_agg_group_temp = pmpm_agg_group.iloc[:, [0, 7]]  # Creates table to calculate total PMPM change by group
pmpm_agg_group_temp = pd.DataFrame(pmpm_agg_group_temp.groupby(['tenantid'])['num_change'].sum())  # Calculates total PMPM change by group
pmpm_agg_group_temp.rename(columns={"num_change": "total_change"}, inplace=True)
pmpm_agg_group = pd.merge(pmpm_agg_group, pmpm_agg_group_temp, how='left', on=['tenantid'])  # Join into main table to calculate percent change of total change per tenantid
pmpm_agg_group['per_change_of_total'] = pmpm_agg_group['num_change'] / pmpm_agg_group['total_change']  # Calculate the percent change of total change per tenantid
pmpm_agg_group['per_change_of_total_rsum'] = pmpm_agg_group.groupby(['tenantid', 'class'])['per_change_of_total'].cumsum()  # Calculate the running total of the total change
# pmpm_agg_group['group'] = pmpm_agg_group['group'].str.capitalize()
pmpm_agg_group['group'] = pmpm_agg_group['group'].replace(['metabolic', 'only', 'complex', 'mental', 'health', 'cancer', 'other', 'single', 'acute_nonER', 'acute_ER', 'heart', 'disease', 'diabetes', 'lung'], 
                                                          ['Metabolic', 'Only', 'Complex', 'Mental', 'Health', 'Cancer', 'Other', 'Single', 'Acute Non-ER', 'Acute ER', 'Heart', 'Disease', 'diabetes', 'lung'], regex=True)
pmpm_agg_group['group_change'] = pmpm_agg_group['group'] + " (" + np.where(pmpm_agg_group.direction == True, '▲', '▼') + " " + round((pmpm_agg_group['per_change']*100)).astype(str).str[:-2] + "%)"

pmpm_agg_group['year'] = pd.to_datetime(curr_month).strftime('%Y')
pmpm_agg_group['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
pmpm_agg_group['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')

pmpm_agg_group.to_csv(f'Outputs/Level_3_{curr_month.replace("-", "_")}.csv', index=False)

### Level 4

In [188]:
# LEVEL 4
pmpm_agg_service_type_c = pd.DataFrame(pmpm_agg_c.groupby(['tenantid', 'class', 'group', 'service_type'])['paid'].sum()).reset_index()  # Current PMPM by tenant/class
pmpm_agg_service_type_c = pd.merge(pmpm_agg_service_type_c, pmpm_class_members_c, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_service_type_c['pmpm'] = pmpm_agg_service_type_c['paid'] / pmpm_agg_service_type_c['mm_months']

pmpm_agg_service_type_p = pd.DataFrame(pmpm_agg_p.groupby(['tenantid', 'class', 'group', 'service_type'])['paid'].sum()).reset_index()  # Prior PMPM by tenant/class
pmpm_agg_service_type_p = pd.merge(pmpm_agg_service_type_p, pmpm_class_members_p, how='left', on=['tenantid'])  # Merge current and prior pmpm df
pmpm_agg_service_type_p['pmpm'] = pmpm_agg_service_type_p['paid'] / pmpm_agg_service_type_p['mm_months']

pmpm_agg_service_type = pd.merge(pmpm_agg_service_type_c, pmpm_agg_service_type_p, how='outer', on=['tenantid', 'class', 'group', 'service_type'], suffixes=('_c', '_p'))  # Merge current and prior pmpm df
pmpm_agg_service_type.fillna(0, inplace=True)

# Create % Change Field

pmpm_agg_service_type['pmpm_change'] = pmpm_agg_service_type['pmpm_c'] / pmpm_agg_service_type['pmpm_p'] -1
pmpm_agg_service_type.replace([np.inf, -np.inf], 1, inplace=True)
# pmpm_agg_class_only = pmpm_agg_group.filter(['tenantid', 'direction', 'class', 'group'], axis=1)  # Get list of class-group drivers for each tenantid

pmpm_agg_service_type = pd.merge(pmpm_agg_service_type, pmpm_agg_class[['tenantid', 'class_1', 'direction_1']], how='inner', left_on=['tenantid', 'class'], right_on=['tenantid', 'class_1'])  # Filter for class driver only per tenantid

pmpm_agg_group_class_only = pmpm_agg_group.filter(['tenantid', 'direction', 'class', 'group'], axis=1)  # Get list of class-group drivers for each tenantid
pmpm_agg_group_class_only['group'] = pmpm_agg_group_class_only['group'].str.lower()
pmpm_agg_service_type['group'] = pmpm_agg_service_type['group'].str.lower()
pmpm_agg_service_type = pd.merge(pmpm_agg_service_type, pmpm_agg_group_class_only, how='inner', on=['tenantid', 'class', 'group'])  # Filter for class driver only per tenantid

# Create % of Total Change Field
pmpm_agg_service_type_total = pmpm_agg_service_type
pmpm_agg_service_type_total['pmpm_change_total'] = pmpm_agg_service_type_total['pmpm_c'] - pmpm_agg_service_type_total['pmpm_p']
pmpm_agg_service_type_total = pd.DataFrame(pmpm_agg_service_type_total.groupby(['tenantid'])['pmpm_change_total'].sum()).reset_index()  # Get total change per group for the denominator of the pmpm_change_total_per calc

# Filter only for tenants that qualify to get a Level 4 statement

pmpm_agg_group_class_count = pd.DataFrame(pmpm_agg_group.groupby(['tenantid']).size()).reset_index()  # Count the # of class drivers per tenant
pmpm_agg_group_class_count = pmpm_agg_group_class_count.loc[pmpm_agg_group_class_count[0] == 1]  # Filter for groups with a single class driver only

pmpm_agg_service_type = pd.merge(pmpm_agg_service_type, pmpm_agg_group_class_count, how='left', on=['tenantid'])
pmpm_agg_service_type = pd.merge(pmpm_agg_service_type, pmpm_agg_service_type_total, how='inner', on=['tenantid'])
pmpm_agg_service_type['pmpm_change_total_per'] = pmpm_agg_service_type['pmpm_change_total_x'] / pmpm_agg_service_type['pmpm_change_total_y']
pmpm_agg_service_type['pmpm_change_total_per'] = np.where(pmpm_agg_service_type['direction_1'] == False, (-pmpm_agg_service_type['pmpm_change_total_per']), pmpm_agg_service_type['pmpm_change_total_per'])  # If direction = false, then make negative

pmpm_agg_service_type['sort'] = pmpm_agg_service_type.sort_values(['tenantid', 'pmpm_change_total_per'], ascending=False).groupby(['tenantid']).cumcount() + 1
pmpm_agg_service_type = pmpm_agg_service_type.sort_values(['tenantid', 'sort'])  # Ensures proper sorting
pmpm_agg_service_type['sort2'] = np.where(pmpm_agg_service_type['direction'] == False, (-pmpm_agg_service_type['sort']), pmpm_agg_service_type['sort']) # If direction = false, then make negative
pmpm_agg_service_type = pmpm_agg_service_type.sort_values(['tenantid', 'sort2'], ascending=True)  # Sort by ascending
pmpm_agg_service_type = pmpm_agg_service_type.groupby('tenantid').head(1)  # Filter for first record of each partition

# LEVEL IV STATEMENT
pmpm_agg_service_type['pmpm_change'] = pmpm_agg_service_type['pmpm_change'].replace(np.nan, 1)  # Ensures all 0 to something changes are flagged as 100% change
pmpm_agg_service_type['pmpm_change_str'] = round((pmpm_agg_service_type['pmpm_change']*100), 0).astype(str).str[:-2] + "%"
pmpm_agg_service_type['l4_statement'] = " related " + pmpm_agg_service_type['service_type'] + " (" + np.where(pmpm_agg_service_type.direction == True, '▲', '▼') + " " + pmpm_agg_service_type['pmpm_change_str'] + ") spend per member"
pmpm_agg_service_type_skinny = pmpm_agg_service_type.iloc[:, [0, 21]]

pmpm_agg_service_type_skinny['year'] = pd.to_datetime(curr_month).strftime('%Y')
pmpm_agg_service_type_skinny['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
pmpm_agg_service_type_skinny['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')

pmpm_agg_service_type_skinny.to_csv(f'Outputs/Level_4_{curr_month.replace("-", "_")}.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pmpm_agg_service_type_skinny['year'] = pd.to_datetime(curr_month).strftime('%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pmpm_agg_service_type_skinny['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pmpm_agg_service_type_skinny['s

### Statements

In [189]:
def format_number(num):
    num_in_thousands = num / 1000
    formatted_num = f"{num_in_thousands:,.0f}K"
    return f"${formatted_num}"

In [190]:
statements_tenant = pmpm_agg_tenant.copy()
potential_loss = pd.DataFrame(final.groupby(['tenantid'])['potential_loss'].sum()).reset_index()
statements_tenant = statements_tenant[['tenantid', 'paid_c', 'direction_pmpm_bob', 'l1_statement_pmpm_1', 'l1_statement_pmpm_2', 'l1_statement_pepm_1', 'l1_statement_pepm_2', 'l1_statement_TCR_1', 'l1_statement_TCR_2']]
statements_tenant = pd.merge(statements_tenant, potential_loss, how='left', on=['tenantid'])
statements_tenant['potential_loss_formatted'] = statements_tenant['potential_loss'].apply(format_number)
statements_tenant['paid_formatted'] = statements_tenant['paid_c'].apply(format_number)
statements_tenant['l1_statement_3'] = ("During this period, " + statements_tenant['tenantid'] + " experienced " + statements_tenant['paid_formatted'].astype(str) + " in Medical and Rx claims.")
statements_tenant['l1_statement_4'] = ("This amounts to " + statements_tenant['direction_pmpm_bob'].astype(str).map(direction_dict_4) +" claims risk " + statements_tenant['direction_pmpm_bob'].astype(str).map(direction_dict_2) + " expected trend by roughly " + statements_tenant['potential_loss_formatted'] + ".")
CatClaimants = pmpm_agg_c.copy()
CatClaimants = CatClaimants[CatClaimants['class'] == 'Catastrophic']
CatClaimants = pd.DataFrame(CatClaimants.groupby(['tenantid'])['claimants'].first()).reset_index()
statements_tenant = pd.merge(statements_tenant, CatClaimants, how='left', on=['tenantid'])
statements_tenant.fillna(0, inplace=True)
statements_tenant.rename(columns={'claimants': 'CatClaimants'}, inplace=True)
statements_tenant['year'] = pd.to_datetime(curr_month).strftime('%Y')
statements_tenant['start_date'] = pd.to_datetime(start_date).strftime('%Y-%m-%d')
statements_tenant['stop_date'] = pd.to_datetime(stop_date).strftime('%Y-%m-%d')


# statements_class = pmpm_agg_class_skinny.copy()
# statements_class = statements_class[['tenantid', 'l2_statement']]
# statements_class.drop_duplicates(inplace=True)

# statements_group = pmpm_agg_group.copy()
# statements_group = statements_group[['tenantid', 'l2_statement']]

# statements_service = pmpm_agg_service_type.copy()
    # = pd.merge(pmpm_agg_group, pmpm_agg_tenant, how='inner', on=['tenantid'])
# statements = pd.merge(statements, pmpm_agg_service, how='outer', on=['tenantid'])
# statements['l4_statement'] = statements.l4_statement.fillna('')
# 
# statements = pd.merge(statements, TenantTotals, how='inner', on=['tenantid'])
# statements = statements.reset_index()
# 
# statements = pd.merge(statements, exchange, how='left', on=['tenantid'])
# statements = pd.merge(statements, carrier, how='left', on=['tenantid'])
# statements = statements.rename(columns={'tenantid': 'Account',
#                                         'current_member_count': 'Members'})
# statements['potential_loss'] = round(statements['potential_loss'],-4)
statements_tenant.to_csv(f'Outputs/statements_tenant_{curr_month.replace("-", "_")}.csv', index=False)
statements_tenant

Unnamed: 0,tenantid,paid_c,direction_pmpm_bob,l1_statement_pmpm_1,l1_statement_pmpm_2,l1_statement_pepm_1,l1_statement_pepm_2,l1_statement_TCR_1,l1_statement_TCR_2,potential_loss,potential_loss_formatted,paid_formatted,l1_statement_3,l1_statement_4,CatClaimants,year,start_date,stop_date
0,100F-New York City Industrial T,39611.69,False,"On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, PMPMs are -54% lower t...","On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, PEPMs are -43% lower t...","On a paid date basis, 100F-New York City Indus...","Compared to benchmarks, TCR is -54% lower than...",-5864.61,$-6K,$40K,"During this period, 100F-New York City Industr...",This amounts to a reduced claims risk below ex...,0.0,2023,2022-12-01,2023-11-30
1,100G-Iron Park Capital Partners,531289.89,True,"On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, PMPMs are 112% higher ...","On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, PEPMs are 163% higher ...","On a paid date basis, 100G-Iron Park Capital P...","Compared to benchmarks, TCR is 112% higher tha...",98316.35,$98K,$531K,"During this period, 100G-Iron Park Capital Par...",This amounts to an excess claims risk above ex...,0.0,2023,2022-12-01,2023-11-30
2,100J-Tamares Management LLC,93513.08,False,"On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, PMPMs are -28% lower t...","On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, PEPMs are -5% lower th...","On a paid date basis, 100J-Tamares Management ...","Compared to benchmarks, TCR is -28% lower than...",1546.36,$2K,$94K,"During this period, 100J-Tamares Management LL...",This amounts to a reduced claims risk below ex...,0.0,2023,2022-12-01,2023-11-30
3,100M-Wellness Insight Technologies,2485.33,False,"On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, PMPMs are -60% lower t...","On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, PEPMs are -70% lower t...","On a paid date basis, 100M-Wellness Insight Te...","Compared to benchmarks, TCR is -60% lower than...",-599.69,$-1K,$2K,"During this period, 100M-Wellness Insight Tech...",This amounts to a reduced claims risk below ex...,0.0,2023,2022-12-01,2023-11-30
4,"100P-Walking Fish Therapeutics, I",407821.10,False,"On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, PMPMs are -6% lower th...","On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, PEPMs are -7% lower th...","On a paid date basis, 100P-Walking Fish Therap...","Compared to benchmarks, TCR is -6% lower than ...",-2477.02,$-2K,$408K,"During this period, 100P-Walking Fish Therapeu...",This amounts to a reduced claims risk below ex...,0.0,2023,2022-12-01,2023-11-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8648,ZZK-Altman Inc,112010.38,True,"On a paid date basis, ZZK-Altman Inc's paid PM...","Compared to benchmarks, PMPMs are 193% higher ...","On a paid date basis, ZZK-Altman Inc's paid PE...","Compared to benchmarks, PEPMs are 628% higher ...","On a paid date basis, ZZK-Altman Inc's TCR inc...","Compared to benchmarks, TCR is 193% higher tha...",53291.32,$53K,$112K,"During this period, ZZK-Altman Inc experienced...",This amounts to an excess claims risk above ex...,0.0,2023,2022-12-01,2023-11-30
8649,ZZM-Contextant LLC June 19,26521.53,False,"On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, PMPMs are -63% lower t...","On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, PEPMs are -35% lower t...","On a paid date basis, ZZM-Contextant LLC Jun...","Compared to benchmarks, TCR is -63% lower than...",3290.03,$3K,$27K,"During this period, ZZM-Contextant LLC June ...",This amounts to a reduced claims risk below ex...,0.0,2023,2022-12-01,2023-11-30
8650,"ZZN-Vibliome Operations, LLC",46975.75,False,"On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, PMPMs are -58% lower t...","On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, PEPMs are -54% lower t...","On a paid date basis, ZZN-Vibliome Operations,...","Compared to benchmarks, TCR is -58% lower than...",-2998.72,$-3K,$47K,"During this period, ZZN-Vibliome Operations, L...",This amounts to a reduced claims risk below ex...,0.0,2023,2022-12-01,2023-11-30
8651,ZZV-Purple Fete LLC,4345.70,False,"On a paid date basis, ZZV-Purple Fete LLC's pa...","Compared to benchmarks, PMPMs are -71% lower t...","On a paid date basis, ZZV-Purple Fete LLC's pa...","Compared to benchmarks, PEPMs are -84% lower t...","On a paid date basis, ZZV-Purple Fete LLC's TC...","Compared to benchmarks, TCR is -71% lower than...",-101.67,$-0K,$4K,"During this period, ZZV-Purple Fete LLC experi...",This amounts to a reduced claims risk below ex...,0.0,2023,2022-12-01,2023-11-30
