# 2025 NPS

## Set up

### Import packages

In [434]:
import pandas as pd
import numpy as np

### Upload files
* National Pain Survey 2025 responses
* [Australian Postcodes](https://www.matthewproctor.com/australian_postcodes)

In [435]:
# Read the survey responses. 
df = pd.read_csv('ChronicPainAustralia_DATA_2025-06-03_1138.csv')
print(df.shape)
df.head(5)

(4651, 259)


Unnamed: 0,record_id,cpa_nps_2025_timestamp,respondent_type,age,gender,gender_txt,state,postcode,income_weekly,first_nations,...,scm_fb,scm_inst,scm_tiktok,scm_tw,scm_yt,scm_pin,scm_reddit,scm_li,consent_marketing,cpa_nps_2025_complete
0,1,[not completed],connection,18-24,male,,NSW,2137.0,ns,no,...,,,,,,,,,,0
1,2,2025-05-05 14:38:00,peer,35-44,female,,TAS,,3000-3999,ns,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2
2,3,2025-05-05 16:50:24,peer,55-64,male,,NSW,2540.0,2000-2499,ns,...,,,,,4.0,,,,0.0,2
3,4,[not completed],peer,65-74,female,,NSW,2067.0,0500-999,no,...,0.0,0.0,,,3.0,0.0,0.0,0.0,,0
4,5,2025-05-05 16:53:47,peer,55-64,female,,NSW,2113.0,0500-999,no,...,4.0,0.0,0.0,,2.0,0.0,1.0,1.0,1.0,2


In [436]:
df_labels = pd.read_csv('ChronicPainAustralia_DATA_LABELS_2025-06-03_1139.csv')
print(df_labels.shape)
df_labels.head(5)

(4651, 259)


Unnamed: 0,Record ID,Survey Timestamp,Please select from the following,What is your age?,What gender do you identify with?,"If not listed, please specify",What State or Territory do you live in?,What is your postcode?,"What is your weekly household income, after tax?",Do you identify as a First Nations person?,...,Facebook,Instagram,TikTok,Twitter,YouTube,Pinterest,Reddit,LinkedIn,"Would you like to become a free member of Chronic Pain Australia and get email updates of the latest chronic pain advocacy, news, research and events?",Complete?
0,1,[not completed],I know someone living with pain (e.g. someone ...,18-24,Male,,NSW,2137.0,Prefer not to say,No,...,,,,,,,,,,Incomplete
1,2,2025-05-05 14:38:00,I am someone living with pain,35-44,Female,,TAS,,$3000 - $3999,Prefer not to say,...,Monthly,Monthly,Monthly,Monthly,Monthly,Monthly,Monthly,Monthly,Yes,Complete
2,3,2025-05-05 16:50:24,I am someone living with pain,55-64,Male,,NSW,2540.0,$2000 - $2499,Prefer not to say,...,,,,,Daily,,,,No,Complete
3,4,[not completed],I am someone living with pain,65-74,Female,,NSW,2067.0,$500 - $999,No,...,Never,Never,,,Weekly,Never,Never,Never,,Incomplete
4,5,2025-05-05 16:53:47,I am someone living with pain,55-64,Female,,NSW,2113.0,$500 - $999,No,...,Daily,Never,Never,,Monthly,Never,Less frequently,Less frequently,Yes,Complete


In [437]:
postcode_lu = pd.read_csv('australian_postcodes.csv')
print(postcode_lu.shape)
postcode_lu.sample(5)

(18526, 41)


Unnamed: 0,id,postcode,locality,state,long,lat,dc,type,status,sa3,...,altitude,chargezone,phn_code,phn_name,lgaregion,lgacode,electorate,electoraterating,sed_code,sed_name
9441,8547,3898,BINGO,VIC,147.464799,-37.066169,OMEO,Delivery Area,Updated 6-Feb-2020,20502.0,...,550.958069,V2,PHN205,Murray,Alpine,20110.0,Indi,Rural,26404.0,Ovens Valley (Northern Victoria)
14369,15201,5244,CHARLESTON,SA,138.946384,-34.938664,VERDUN DC,Delivery Area,Updated 6-Feb-2020,40703.0,...,,S2,PHN402,Country SA,Mid Murray,44210.0,Mayo,Rural,40040.0,Schubert
10368,11969,4285,VERESDALE,QLD,152.975131,-28.056979,BEAUDESERT,Delivery Area,Updated 6-Feb-2020,31104.0,...,92.215569,Q1,PHN303,Gold Coast,Logan,34590.0,Wright,Rural,30075.0,Scenic Rim
1820,3289,2324,RAYMOND TERRACE,NSW,152.332617,-32.623108,RAYMOND TERRACE EAST LPO,Delivery Area,Updated 6-Feb-2020,10801.0,...,21.19322,N2,PHN108,Hunter New England and Central Coast,Maitland,15050.0,Paterson,Provincial,10083.0,Upper Hunter
14810,21453,5353,PENRICE,SA,139.067325,-34.477462,Angaston,Delivery Area,Updated 4-Dec-2022,40703.0,...,357.647827,S2,PHN402,Country SA,Mid Murray,44210.0,Barker,Rural,40040.0,Schubert


### Pre-processing responses

**Label completion rates.**
1. Create a new field 'completion_rate'.
2. Label COMPLETED responses. i.e. with a timestamp.
3. Label PARTIAL COMPLETION if responses have Respondent Type, Age, and either Gender or Postcode.
4. Label INCOMPLETE to everything else. 

In [438]:
# LABEL Completion Rates. 
df['completion_rate'] = np.nan      # Create new field.
df.loc[df['cpa_nps_2025_timestamp'] != '[not completed]', 'completion_rate'] = 'Completed'  # Label COMPLETED.

# Label PARTIAL COMPLETION
df.loc[(df['completion_rate'].isna() & df['respondent_type'].notna() & df['age'].notna() & df['gender'].notna()), 'completion_rate'] = 'Partial completion'                
df.loc[(df['completion_rate'].isna() & df['respondent_type'].notna() & df['age'].notna() & df['postcode'].notna()), 'completion_rate'] = 'Partial completion'

df.loc[(df['completion_rate'].isna()), 'completion_rate'] = 'Incomplete'    # Label INCOMPLETE. 

In [439]:
df['completion_rate'].value_counts()

Completed             3276
Partial completion     757
Incomplete             618
Name: completion_rate, dtype: int64

Drop incomplete responses

In [440]:
# Drop incomplete responses.
df = df.loc[df['completion_rate'] != 'Incomplete']
df.shape

(4033, 260)

In [441]:
# Corrected State = WA.
df.loc[df['state'] == '8', 'state'] = 'WA'
df['state'].unique()

array(['NSW', 'TAS', 'SA', 'QLD', 'VIC', 'ACT', 'WA', 'NT', nan],
      dtype=object)

### Postcode Lookup

In [442]:
# Filter postcode lookup to relevant columns and remove duplicate postcode and state.
postcode_lu = postcode_lu[['postcode', 'state', 'region', 'electoraterating', 'long', 'lat',   'lgaregion', 'electorate']]
postcode_lu = postcode_lu.drop_duplicates(subset=['postcode','state'], keep='last')
postcode_lu.shape

(3192, 8)

In [443]:
# Append Postcode Lookup.
print('df shape before:', df.shape)
df['postcode'] = df['postcode'].astype('Int64')     # Change postcode dtype.
df.loc[df['postcode'].isna(), 'postcode'] = 0       # Remove NaN values in postcode and replace with 0.
df = pd.merge(df, postcode_lu, on=['postcode', 'state'], how='left')    # Left join lookup postcodes to DF. 
print('df shape after:', df.shape)


df shape before: (4033, 260)
df shape after: (4033, 266)


## Who is in pain?

In [444]:
df_who = df[['record_id',
 'respondent_type',
 'completion_rate',
 'age',
 'gender',
 'gender_txt',
 'state',
 'postcode',
 'income_weekly',
 'first_nations',
 'lgbtq',
 'multi_cultural',
 'dx___autism',
 'dx___adhd',
 'dx___dyscalculia',
 'dx___dyslexia',
 'dx___dyspraxia',
 'dx___epilepsy',
 'dx___apd',
 'dx___id',
 'dx___ts',
 'dx___na',
 'dx___ns',
 'dx___nl',
 'dx_txt',]]

In [445]:
df_who

Unnamed: 0,record_id,respondent_type,completion_rate,age,gender,gender_txt,state,postcode,income_weekly,first_nations,...,dx___dyslexia,dx___dyspraxia,dx___epilepsy,dx___apd,dx___id,dx___ts,dx___na,dx___ns,dx___nl,dx_txt
0,1,connection,Partial completion,18-24,male,,NSW,2137,ns,no,...,0,0,0,0,0,0,1,0,0,
1,2,peer,Completed,35-44,female,,TAS,0,3000-3999,ns,...,1,1,0,1,0,0,0,0,0,
2,3,peer,Completed,55-64,male,,NSW,2540,2000-2499,ns,...,0,0,0,0,0,0,1,0,0,
3,4,peer,Partial completion,65-74,female,,NSW,2067,0500-999,no,...,0,0,0,0,0,0,0,0,1,
4,5,peer,Completed,55-64,female,,NSW,2113,0500-999,no,...,0,0,0,0,0,0,1,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4028,4645,peer,Partial completion,75-84,female,,TAS,7277,2500-2999,no,...,0,0,0,0,0,0,1,0,0,
4029,4646,peer,Partial completion,55-64,female,,VIC,3551,1500-1999,yes,...,0,0,0,0,0,0,1,0,0,
4030,4647,peer,Partial completion,55-64,female,,QLD,4670,0001-499,yes,...,0,0,1,0,0,0,0,0,0,
4031,4648,both,Partial completion,55-64,non_binary,,VIC,3011,0500-999,no,...,0,0,0,0,0,0,0,0,0,


## The Impact of Pain

In [432]:
df_impact = df[['record_id',
 'respondent_type',
 'completion_rate',
 'age',
 'gender',
 'state',
 'postcode',
 'income_weekly',
 'pn_type___back',
 'pn_type___neck',
 'pn_type___joint',
 'pn_type___migraines',
 'pn_type___widespread',
 'pn_type___osteoarthritis',
 'pn_type___rheumatoid_arthritis',
 'pn_type___pelvic',
 'pn_type___abdominal',
 'pn_type___crps',
 'pn_type___nerve',
 'pn_type___connective_tissue_disorder',
 'pn_type___na',
 'pn_type___nl',
 'pn_type_txt',
 'pn_impact',
 'hc_dx_length',
 'work',
 'life_xp___mental_health',
 'life_xp___ptsd',
 'life_xp___dims',
 'life_xp___unable_work',
 'life_xp___low_productivity',
 'life_xp___strain_financial',
 'life_xp___strain_family',
 'life_xp___strain_friends',
 'life_xp___unable_family',
 'life_xp___less_intimacy',
 'life_xp___inactivity',
 'life_xp___changed_eating',
 'life_xp___less_independence',
 'life_xp___unable_drive',
 'life_xp___na',
 'life_xp___nl',
 'life_xp_txt',
 'hc_sleep_tests',
 'hc_sleep_strategies___reg_schedule',
 'hc_sleep_strategies___less_caffeine_alcohol',
 'hc_sleep_strategies___comfort',
 'hc_sleep_strategies___relaxation',
 'hc_sleep_strategies___less_screen',
 'hc_sleep_strategies___medication',
 'hc_sleep_strategies___bed_mattress',
 'hc_sleep_strategies___na',
 'hc_sleep_strategies___nl',
 'hc_sleep_strategies_txt',
 'dx_covid_status',
 'dx_covid_long',
 'dx_covid_long_pn'
]]

## Accessing Healthcare

In [433]:
df_access = df[['record_id',
 'respondent_type',
 'completion_rate',
 'age',
 'gender',
 'state',
 'postcode',
 'hc_dx_length',
 'hc_xp_pn_ref_type',
 'hc_xp_pn_wait',
 'hc_xp_pn_appt_type',
 'hc_xp_pn_appt_type_txt',
 'hc_xp_pn_appt_approp',
 'hc_xp_pn_appt_comfort',
 'hc_xp_pn_appt_safe',
 'hc_xp_pn_appt_tailor',
 'hc_xp_pn_appt_txt',
 'hc_xp_pn_appt_rec',
 'hc_xp_pn_appt_rec_txt',
 'cd_mgmt_plan',
 'mh_mgmt_plan',
 'hc_appt___unavailable',
 'hc_appt___closed_books',
 'hc_appt___no_new',
 'hc_appt___no_chronic',
 'hc_appt___na',
 'hc_appt___nl',
 'hc_appt_txt',
 'hc_pn_care_review',
 'hc_pn_care_modal_inp_self',
 'hc_pn_care_modal_inp_supp',
 'hc_pn_care_modal_inp_hcp',
 'hc_pn_care_modal_tlh_self',
 'hc_pn_care_modal_tlh_supp',
 'hc_pn_care_modal_tlh_hcp',
 'hc_pn_care_modal_txt',
 'hc_tlh_location',
 'hc_tlh_location_txt',
 'hc_tlh_opinion',
 'hc_tlh_opinion_txt',
 'hc_opioid_rx___not_aware',
 'hc_opioid_rx___no_change',
 'hc_opioid_rx___choice_reduce',
 'hc_opioid_rx___no_choice_reduce',
 'hc_opioid_rx___ref_physio_psych',
 'hc_opioid_rx___ref_pn_program',
 'hc_opioid_rx___ref_pn_clinic',
 'hc_opioid_rx___na',
 'hc_opioid_rx___nl',
 'hc_opioid_rx_txt',
 'hc_opioid_sub___med_cannabis',
 'hc_opioid_sub___antidepressant',
 'hc_opioid_sub___anticonvulsants',
 'hc_opioid_sub___sedatives',
 'hc_opioid_sub___antiinflammatory',
 'hc_opioid_sub___na',
 'hc_opioid_sub___nl',
 'hc_opioid_sub_txt',
 'hc_pn_mgmt_alt___alcohol',
 'hc_pn_mgmt_alt___cigarettes',
 'hc_pn_mgmt_alt___vaping',
 'hc_pn_mgmt_alt___cannabis_nonmeds',
 'hc_pn_mgmt_alt___recr_substances',
 'hc_pn_mgmt_alt___ns',
 'hc_pn_mgmt_alt___na',
 'hc_pn_mgmt_alt___nl',
 'hc_pn_mgmt_alt_rec_txt',
 'hc_pn_mgmt_alt_txt',
 'hc_pn_mgmt_cnb_meds_acs',
 'hc_pn_mgmt_cnb_meds_help',
 'hc_pn_mgmt_cnb_meds_brr___na',
 'hc_pn_mgmt_cnb_meds_brr___cost',
 'hc_pn_mgmt_cnb_meds_brr___limited_hc_prescriber',
 'hc_pn_mgmt_cnb_meds_brr___limited_availability',
 'hc_pn_mgmt_cnb_meds_brr___limited_dispensaries',
 'hc_pn_mgmt_cnb_meds_brr___stigma',
 'hc_pn_mgmt_cnb_meds_brr___driving_restrictions',
 'hc_pn_mgmt_cnb_meds_brr___side_effects',
 'hc_pn_mgmt_cnb_meds_brr___nl',
 'pwp_ac_mc3_other'
]]

## Affording Healthcare

In [412]:
df.columns.to_list()

['record_id',
 'cpa_nps_2025_timestamp',
 'respondent_type',
 'age',
 'gender',
 'gender_txt',
 'state',
 'postcode',
 'income_weekly',
 'first_nations',
 'lgbtq',
 'multi_cultural',
 'dx___autism',
 'dx___adhd',
 'dx___dyscalculia',
 'dx___dyslexia',
 'dx___dyspraxia',
 'dx___epilepsy',
 'dx___apd',
 'dx___id',
 'dx___ts',
 'dx___na',
 'dx___ns',
 'dx___nl',
 'dx_txt',
 'pn_type___back',
 'pn_type___neck',
 'pn_type___joint',
 'pn_type___migraines',
 'pn_type___widespread',
 'pn_type___osteoarthritis',
 'pn_type___rheumatoid_arthritis',
 'pn_type___pelvic',
 'pn_type___abdominal',
 'pn_type___crps',
 'pn_type___nerve',
 'pn_type___connective_tissue_disorder',
 'pn_type___na',
 'pn_type___nl',
 'pn_type_txt',
 'pn_impact',
 'hc_dx_length',
 'work',
 'life_xp___mental_health',
 'life_xp___ptsd',
 'life_xp___dims',
 'life_xp___unable_work',
 'life_xp___low_productivity',
 'life_xp___strain_financial',
 'life_xp___strain_family',
 'life_xp___strain_friends',
 'life_xp___unable_family',
 '

In [410]:
df[['gender','gender_txt']].loc[df['gender_txt'].notna()]

Unnamed: 0,gender,gender_txt
1657,non_binary,Agender
1833,non_binary,genderqueer
2635,non_binary,genderqueer (please note male and female are s...
2686,non_binary,transgender
3016,non_binary,Trans masc
4354,non_binary,Transgender


In [411]:
df['gender'].value_counts()

female        3339
male           425
non_binary      99
ns              20
Name: gender, dtype: int64