## Exporting Data Slices

In [28]:
import pandas as pd
import os
import numpy as np

pd.set_option("display.max_rows", 40)
pd.set_option("display.max_columns", None)

### Import Datasets

In [29]:
path = r"/Users/patel/Documents/CF-Data Anaylst Course/portfolio_projects/mbs_analysis/datasets/"

df_mbs_sa3_service_1_np = pd.read_pickle(
    os.path.join(
        path, "clean_datasets/2014-22_combined_service_1_categories_no_pivot.pkl"
    )
)
df_mbs_sa3_service_1_np.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 103592 entries, 0 to 229389
Data columns (total 52 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   Year                            103592 non-null  int64  
 1   StateTerritory                  103592 non-null  object 
 2   GeographicCode                  103592 non-null  object 
 3   GeographicAreaName              103592 non-null  object 
 4   GeographicGroup                 103592 non-null  object 
 5   ServiceLevel                    103592 non-null  object 
 6   Service                         103592 non-null  object 
 7   DemographicGroup                103592 non-null  object 
 8   MBS_per_100                     103592 non-null  float64
 9   No_of_patients                  103592 non-null  Int32  
 10  No_of_services                  103592 non-null  Int64  
 11  %_People_had_service            103592 non-null  float64
 12  Services_100_peo

In [30]:
df_mbs_sa3_service_1_np["Population_demographic"].value_counts()

All persons    14860
Males          14858
Females        14857
25-44          14802
45-64          14762
65+            14738
0-24           14715
Name: Population_demographic, dtype: int64

In [31]:
df_mbs_sa3_service_1_np.columns

Index(['Year', 'StateTerritory', 'GeographicCode', 'GeographicAreaName',
       'GeographicGroup', 'ServiceLevel', 'Service', 'DemographicGroup',
       'MBS_per_100', 'No_of_patients', 'No_of_services',
       '%_People_had_service', 'Services_100_people', 'Total_mbs_paid_$',
       'Total_provider_fees_$', 'ERP', 'key_x', 'Out_of_Pocket',
       'Out_of_pocket_cost_%', 'Out_of_pocket_cost_per_person',
       'No_of_service_per_person', 'Out_of_pocket_cost_per_service',
       'Patient_ERP_Flag', 'negative_income', 'no_income_or_not_applicable',
       'average_income_$5200', 'average_income_$13000',
       'average_income_$18200', 'average_income_$26000',
       'average_income_$36400', 'average_income_$46800',
       'average_income_$58500', 'average_income_$71500',
       'average_income_$91000', 'average_income_$130000',
       'average_income_$169000+', 'Population_demographic', 'Population',
       '%_out_of_pocket_by_$5200', '%_out_of_pocket_by_$13000',
       '%_out_of_pocket_

In [32]:
df_mbs_sa3_gender = df_mbs_sa3_service_1_np[
    df_mbs_sa3_service_1_np["DemographicGroup"].isin(["Males", "Females"])
]
df_mbs_sa3_gender.shape

(29715, 52)

In [33]:
df_mbs_sa3_gender_state = df_mbs_sa3_gender.groupby(
    by=["Year", "StateTerritory", "Service", "DemographicGroup"], as_index=False
).agg(
    {
        "No_of_patients": "sum",
        "No_of_services": "sum",
        "Total_mbs_paid_$": "sum",
        "Total_provider_fees_$": "sum",
        "ERP": "sum",
    }
)
df_mbs_sa3_gender_state

Unnamed: 0,Year,StateTerritory,Service,DemographicGroup,No_of_patients,No_of_services,Total_mbs_paid_$,Total_provider_fees_$,ERP
0,2014,ACT,Allied Health attendances (total),Females,62252,118779,8384876.0,10326728.0,180388
1,2014,ACT,Allied Health attendances (total),Males,43186,73376,4992689.0,5981999.0,178263
2,2014,ACT,Diagnostic Imaging (total),Females,65437,149416,18213343.0,25456047.0,180388
3,2014,ACT,Diagnostic Imaging (total),Males,42704,85077,11829864.0,15140564.0,178263
4,2014,ACT,GP attendances (total),Females,162985,943789,43097542.0,58052404.0,180388
...,...,...,...,...,...,...,...,...,...
715,2022,WA,GP attendances (total),Males,1170330,7689233,366405018.0,410156938.0,1376477
716,2022,WA,Nursing and Aboriginal Health Workers (total),Females,157781,387070,10944618.0,12585735.0,1373387
717,2022,WA,Nursing and Aboriginal Health Workers (total),Males,113162,226467,4466057.0,5043581.0,1376477
718,2022,WA,Specialist attendances (total),Females,388240,1082965,94179555.0,166710104.0,1350358


In [34]:
# Calculate additional variable to understand state level mbs datapoints
df_mbs_sa3_gender_state["%_People_had_service"] = (
    df_mbs_sa3_gender_state["No_of_patients"] / df_mbs_sa3_gender_state["ERP"] * 100
)
df_mbs_sa3_gender_state["Out_of_Pocket"] = (
    df_mbs_sa3_gender_state["Total_provider_fees_$"]
    - df_mbs_sa3_gender_state["Total_mbs_paid_$"]
)
df_mbs_sa3_gender_state["Out_of_pocket_cost_per_person"] = (
    df_mbs_sa3_gender_state["Out_of_Pocket"] / df_mbs_sa3_gender_state["No_of_patients"]
)
df_mbs_sa3_gender_state["No_of_service_per_person"] = (
    df_mbs_sa3_gender_state["No_of_services"]
    / df_mbs_sa3_gender_state["No_of_patients"]
)
df_mbs_sa3_gender_state["Out_of_pocket_pp_per_service"] = df_mbs_sa3_gender_state[
    "Out_of_Pocket"
] / (
    df_mbs_sa3_gender_state["No_of_patients"]
    * df_mbs_sa3_gender_state["No_of_service_per_person"]
)
df_mbs_sa3_gender_state["Total_Provider_pp_per_service"] = df_mbs_sa3_gender_state[
    "Total_provider_fees_$"
] / (
    df_mbs_sa3_gender_state["No_of_patients"]
    * df_mbs_sa3_gender_state["No_of_service_per_person"]
)
df_mbs_sa3_gender_state["MBS_Rebate_pp_per_service"] = df_mbs_sa3_gender_state[
    "Total_mbs_paid_$"
] / (
    df_mbs_sa3_gender_state["No_of_patients"]
    * df_mbs_sa3_gender_state["No_of_service_per_person"]
)

df_mbs_sa3_gender_state.head(10)

Unnamed: 0,Year,StateTerritory,Service,DemographicGroup,No_of_patients,No_of_services,Total_mbs_paid_$,Total_provider_fees_$,ERP,%_People_had_service,Out_of_Pocket,Out_of_pocket_cost_per_person,No_of_service_per_person,Out_of_pocket_pp_per_service,Total_Provider_pp_per_service,MBS_Rebate_pp_per_service
0,2014,ACT,Allied Health attendances (total),Females,62252,118779,8384876.0,10326728.0,180388,34.510056,1941852.0,31.193407,1.908035,16.348445,86.940688,70.592243
1,2014,ACT,Allied Health attendances (total),Males,43186,73376,4992689.0,5981999.0,178263,24.226003,989310.0,22.908118,1.699069,13.482746,81.525281,68.042534
2,2014,ACT,Diagnostic Imaging (total),Females,65437,149416,18213343.0,25456047.0,180388,36.275695,7242704.0,110.682091,2.283357,48.473417,170.370288,121.896872
3,2014,ACT,Diagnostic Imaging (total),Males,42704,85077,11829864.0,15140564.0,178263,23.955616,3310700.0,77.526695,1.992249,38.91416,177.963069,139.048909
4,2014,ACT,GP attendances (total),Females,162985,943789,43097542.0,58052404.0,180388,90.352462,14954862.0,91.756063,5.790649,15.845557,61.509939,45.664383
5,2014,ACT,GP attendances (total),Males,138771,657560,29308427.0,38762072.0,178263,77.846216,9453645.0,68.124068,4.738454,14.376855,58.948342,44.571487
6,2014,ACT,Nursing and Aboriginal Health Workers (total),Females,3236,5707,154251.0,156243.0,179903,1.798747,1992.0,0.615575,1.763597,0.349045,27.377431,27.028386
7,2014,ACT,Nursing and Aboriginal Health Workers (total),Males,2360,3876,94782.0,97461.0,177236,1.331558,2679.0,1.135169,1.642373,0.691176,25.144737,24.45356
8,2014,ACT,Specialist attendances (total),Females,52544,141913,11996339.0,19184915.0,180388,29.128323,7188576.0,136.810597,2.700841,50.65481,135.187862,84.533052
9,2014,ACT,Specialist attendances (total),Males,39337,101079,8451509.0,13474790.0,178263,22.066834,5023281.0,127.69863,2.569566,49.696584,133.309491,83.612907


In [35]:
# exporting MBS-Census-State-Year-Service Level 1 (No demographic Data)
df_mbs_sa3_gender_state.to_csv(
    os.path.join(
        path, "clean_datasets/cleaned_csv/mbs_census_state_year_service_1_gender.csv"
    )
)