In [1]:
import pandas as pd

In [2]:
import numpy as np

In [4]:
provider_df=pd.read_csv("provider_summary.csv")

In [8]:
print(f"loaded provider_df with {len(provider_df)} providers") 

loaded provider_df with 4943 providers


In [9]:
provider_df.head()

Unnamed: 0,Provider,total_claim,total_revenue,avg_claim,unique_patients,first_claim,last_claim,active_days,claims_per_day,days_since_lastclaim,PotentialFraud
0,PRV51001,16,6250,390.625,15,2009-01-04,2009-12-17,347,0.045977,14,No
1,PRV51003,56,25260,451.071429,54,2009-01-09,2009-12-20,345,0.16185,11,Yes
2,PRV51004,107,35460,331.401869,96,2009-01-03,2009-12-27,358,0.29805,4,No
3,PRV51005,903,230010,254.717608,435,2008-12-23,2009-12-30,372,2.420912,1,Yes
4,PRV51007,55,14080,256.0,49,2009-01-01,2009-12-23,356,0.154062,8,No


In [16]:
print("DISTRIBUTION ANALYSIS")
print("*"*50)
metrics=["total_claim","total_revenue","days_since_lastclaim","active_days"]
for metric in metrics:
    print(metric)
    print(f"MIN:{(provider_df[metric]).min()}")
    print(f"25th PERCENTILE:{(provider_df[metric]).quantile(0.25)}")
    print(f"MEADIAN:{(provider_df[metric]).median()}")
    print(f"75th PERCENTILE:{(provider_df[metric]).quantile(0.75)}")
    print(f"MAX:{(provider_df[metric]).max()}")

DISTRIBUTION ANALYSIS
**************************************************
total_claim
MIN:1
25th PERCENTILE:8.0
MEADIAN:24.0
75th PERCENTILE:66.0
MAX:6206
total_revenue
MIN:10
25th PERCENTILE:2090.0
MEADIAN:7110.0
75th PERCENTILE:19700.0
MAX:1808570
days_since_lastclaim
MIN:0
25th PERCENTILE:3.0
MEADIAN:12.0
75th PERCENTILE:39.0
MAX:381
active_days
MIN:0
25th PERCENTILE:286.0
MEADIAN:340.0
75th PERCENTILE:359.0
MAX:384


In [38]:
def assign_segment(row):
    if row['total_claim']>66 and row['total_revenue']>19700 and row['days_since_lastclaim']<30:
        return 'champion'
    elif row['total_claim']>24 and row['total_revenue']>7110 and row['days_since_lastclaim']<15:
        return 'rising stars'
    elif row['total_revenue']>15000 and row['days_since_lastclaim']>50:
        return 'dormant gaints'
    else:
        return 'standard'

In [39]:
provider_df['segment']=provider_df.apply(assign_segment,axis=1)

In [40]:
provider_df['segment'].value_counts()

segment
standard          2963
champion          1117
rising stars       861
dormant gaints       2
Name: count, dtype: int64

In [46]:
segment_profile=provider_df.groupby('segment').agg({
    'Provider':'count',
    'total_claim':['sum','mean'],
    'total_revenue':['sum','mean'],
    'unique_patients':['sum','mean'],
    'days_since_lastclaim':'mean',
    'active_days':'mean' 
}).round(0)

In [53]:
segment_profile

Unnamed: 0_level_0,Provider,total_claim,total_claim,total_revenue,total_revenue,unique_patients,unique_patients,days_since_lastclaim,active_days
Unnamed: 0_level_1,count,sum,mean,sum,mean,sum,mean,mean,mean
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
champion,1117,308238,276.0,91357180,81788.0,198440,178.0,3.0,365.0
dormant gaints,2,28,14.0,112500,56250.0,20,10.0,220.0,156.0
rising stars,861,42023,49.0,12236660,14212.0,33462,39.0,5.0,354.0
standard,2963,41867,14.0,11968710,4039.0,36171,12.0,58.0,259.0


In [52]:
total_rev=provider_df["total_revenue"].sum()
rev_by_seg=provider_df.groupby('segment')["total_revenue"].sum()
rev_pct_by_seg=(rev_by_seg/total_rev*100).round(1)
print(f"REVENUE PERCENTAGE BY EACH SEGMENT :\n{rev_pct_by_seg}")

REVENUE PERCENTAGE BY EACH SEGMENT :
segment
champion          79.0
dormant gaints     0.1
rising stars      10.6
standard          10.3
Name: total_revenue, dtype: float64


In [54]:
def assign_visits(segment):
    if segment =='champion':
        return 6
    elif segment =='rising stars':
        return 4
    elif segment =='dormant giants':
        return 3
    else:
        return 1

In [57]:
provider_df['visits_per_qtr']= provider_df['segment'].apply(assign_visits)

In [59]:
provider_df['quaterly_cost']=provider_df['visits_per_qtr']*200

In [73]:
roi_analysis=provider_df.groupby('segment').agg({
    'Provider':'count',
    'total_revenue':'sum',
    'quaterly_cost':'sum',
    'visits_per_qtr':['sum','mean']}).round(0)

In [74]:
roi_analysis

Unnamed: 0_level_0,Provider,total_revenue,quaterly_cost,visits_per_qtr,visits_per_qtr
Unnamed: 0_level_1,count,sum,sum,sum,mean
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
champion,1117,91357180,1340400,6702,6.0
dormant gaints,2,112500,400,2,1.0
rising stars,861,12236660,688800,3444,4.0
standard,2963,11968710,592600,2963,1.0


In [75]:
roi_analysis['returns']=(roi_analysis['total_revenue']/roi_analysis['quaterly_cost']).round(2)

In [76]:
roi_analysis

Unnamed: 0_level_0,Provider,total_revenue,quaterly_cost,visits_per_qtr,visits_per_qtr,returns
Unnamed: 0_level_1,count,sum,sum,sum,mean,Unnamed: 6_level_1
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
champion,1117,91357180,1340400,6702,6.0,68.16
dormant gaints,2,112500,400,2,1.0,281.25
rising stars,861,12236660,688800,3444,4.0,17.77
standard,2963,11968710,592600,2963,1.0,20.2


# FINAL TABLES


In [84]:
provider_df.head()

Unnamed: 0,Provider,total_claim,total_revenue,avg_claim,unique_patients,first_claim,last_claim,active_days,claims_per_day,days_since_lastclaim,PotentialFraud,segment,visits_per_qtr,cost_of_visit,quaterly_cost
0,PRV51001,16,6250,390.625,15,2009-01-04,2009-12-17,347,0.045977,14,No,standard,1,200,200
1,PRV51003,56,25260,451.071429,54,2009-01-09,2009-12-20,345,0.16185,11,Yes,rising stars,4,800,800
2,PRV51004,107,35460,331.401869,96,2009-01-03,2009-12-27,358,0.29805,4,No,champion,6,1200,1200
3,PRV51005,903,230010,254.717608,435,2008-12-23,2009-12-30,372,2.420912,1,Yes,champion,6,1200,1200
4,PRV51007,55,14080,256.0,49,2009-01-01,2009-12-23,356,0.154062,8,No,rising stars,4,800,800


In [85]:
segment_profile

Unnamed: 0_level_0,Provider,total_claim,total_claim,total_revenue,total_revenue,unique_patients,unique_patients,days_since_lastclaim,active_days
Unnamed: 0_level_1,count,sum,mean,sum,mean,sum,mean,mean,mean
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
champion,1117,308238,276.0,91357180,81788.0,198440,178.0,3.0,365.0
dormant gaints,2,28,14.0,112500,56250.0,20,10.0,220.0,156.0
rising stars,861,42023,49.0,12236660,14212.0,33462,39.0,5.0,354.0
standard,2963,41867,14.0,11968710,4039.0,36171,12.0,58.0,259.0


In [86]:
roi_analysis

Unnamed: 0_level_0,Provider,total_revenue,quaterly_cost,visits_per_qtr,visits_per_qtr,returns
Unnamed: 0_level_1,count,sum,sum,sum,mean,Unnamed: 6_level_1
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
champion,1117,91357180,1340400,6702,6.0,68.16
dormant gaints,2,112500,400,2,1.0,281.25
rising stars,861,12236660,688800,3444,4.0,17.77
standard,2963,11968710,592600,2963,1.0,20.2


In [89]:
provider_df.to_csv("provider_segmentated_final.csv",index=False)

In [91]:
print(f"Final dataset: {len(provider_df)} providers")
print(f"Columns: {provider_df.columns.tolist()}")

Final dataset: 4943 providers
Columns: ['Provider', 'total_claim', 'total_revenue', 'avg_claim', 'unique_patients', 'first_claim', 'last_claim', 'active_days', 'claims_per_day', 'days_since_lastclaim', 'PotentialFraud', 'segment', 'visits_per_qtr', 'cost_of_visit', 'quaterly_cost']
