# 2025 NPS

## Set up

### Import packages

In [84]:
import pandas as pd
import numpy as np

### Upload files
* National Pain Survey 2025 responses
* [Australian Postcodes](https://www.matthewproctor.com/australian_postcodes)

In [155]:
# Read the survey responses. 
df = pd.read_csv('ChronicPainAustralia_DATA_2025-06-03_1138.csv')
df.head()

Unnamed: 0,record_id,cpa_nps_2025_timestamp,respondent_type,age,gender,gender_txt,state,postcode,income_weekly,first_nations,...,scm_fb,scm_inst,scm_tiktok,scm_tw,scm_yt,scm_pin,scm_reddit,scm_li,consent_marketing,cpa_nps_2025_complete
0,1,[not completed],connection,18-24,male,,NSW,2137.0,ns,no,...,,,,,,,,,,0
1,2,2025-05-05 14:38:00,peer,35-44,female,,TAS,,3000-3999,ns,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2
2,3,2025-05-05 16:50:24,peer,55-64,male,,NSW,2540.0,2000-2499,ns,...,,,,,4.0,,,,0.0,2
3,4,[not completed],peer,65-74,female,,NSW,2067.0,0500-999,no,...,0.0,0.0,,,3.0,0.0,0.0,0.0,,0
4,5,2025-05-05 16:53:47,peer,55-64,female,,NSW,2113.0,0500-999,no,...,4.0,0.0,0.0,,2.0,0.0,1.0,1.0,1.0,2


In [156]:
print(df.shape)

(4651, 259)


In [157]:
postcode_lu = pd.read_csv('australian_postcodes.csv')
postcode_lu

Unnamed: 0,id,postcode,locality,state,long,lat,dc,type,status,sa3,...,altitude,chargezone,phn_code,phn_name,lgaregion,lgacode,electorate,electoraterating,sed_code,sed_name
0,230,200,ANU,ACT,149.119000,-35.277700,,,Updated 3-Dec-2022,,...,,N2,,,Unincorporated ACT,89399.0,Durack,,,
1,21820,200,Australian National University,ACT,149.118900,-35.277700,,,Updated 3-Dec-2022,,...,,N2,,,Unincorporated ACT,89399.0,Durack,,,
2,232,800,DARWIN,NT,130.836680,-12.458684,,,Updated 3-Dec-2022,70101.0,...,,NT1,PHN701,Northern Territory,Darwin Waterfront Precinct,71150.0,Solomon,Inner Metropolitan,70022.0,Port Darwin
3,24049,800,DARWIN CITY,NT,130.836680,-12.458684,,,Updated 3-Dec-2022,70101.0,...,,NT1,PHN701,Northern Territory,Darwin Waterfront Precinct,71150.0,Solomon,Inner Metropolitan,70022.0,Port Darwin
4,233,801,DARWIN,NT,130.836680,-12.458684,,,Updated 3-Dec-2022,70101.0,...,,NT1,PHN701,,Darwin,71000.0,Lingiari,Rural,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18521,11186,9013,BRISBANE,QLD,152.823141,-27.603479,CITY DC - BRISBANE,LVR,Updated 25-Mar-2020 SA3,30504.0,...,44.349792,Q1,PHN301,,Brisbane,31000.0,Griffith,,,
18522,11187,9015,BRISBANE,QLD,152.823141,-27.603479,CITY DC - BRISBANE,LVR,Updated 25-Mar-2020 SA3,30504.0,...,44.349792,Q1,PHN301,,Brisbane,31000.0,Griffith,,,
18523,11196,9464,NORTHGATE MC,QLD,153.074982,-27.397055,,,Updated 25-Mar-2020 SA3,30203.0,...,,Q1,PHN301,,Brisbane,31000.0,Griffith,,,
18524,11197,9726,GOLD COAST MC,QLD,153.412197,-28.008783,,,Updated 25-Mar-2020 SA3,30910.0,...,,Q1,PHN303,,Gold Coast,33430.0,McPherson,,,


### Label fields
* Completion Rate
    * Partial completion is when Respondent Type, Age and Gender OR Postcode is not null. 

In [158]:
# LABEL Completion Rates. 
df['completion_rate'] = np.nan
df.loc[df['cpa_nps_2025_timestamp'] != '[not completed]', 'completion_rate'] = 'Completed'
df.loc[(df['completion_rate'].isna() & df['respondent_type'].notna() & df['age'].notna() & df['gender'].notna()), 'completion_rate'] = 'Partial completion'
df.loc[(df['completion_rate'].isna() & df['respondent_type'].notna() & df['age'].notna() & df['postcode'].notna()), 'completion_rate'] = 'Partial completion'
df.loc[(df['completion_rate'].isna()), 'completion_rate'] = 'Incomplete'

In [159]:
df['completion_rate'].value_counts()

Completed             3276
Partial completion     757
Incomplete             618
Name: completion_rate, dtype: int64

Drop incomplete responses

In [160]:
# Drop incomplete responses.
df = df.loc[df['completion_rate'] != 'Incomplete']
df.shape

(4033, 260)

In [161]:
# Corrected State = WA.
df.loc[df['state'] == '8', 'state'] = 'WA'
df['state'].unique()

array(['NSW', 'TAS', 'SA', 'QLD', 'VIC', 'ACT', 'WA', 'NT', nan],
      dtype=object)

### Postcode Lookup

In [None]:
# Change dtype for postcode. 
df['postcode'] = df['postcode'].astype('Int64')


In [None]:
# Filter lookup columns
postcode_lu = postcode_lu[['postcode', 'state', 'region', 'electoraterating', 'long', 'lat',   'lgaregion', 'electorate']]

In [191]:
postcode_lu

Unnamed: 0,postcode,state,region,electoraterating,long,lat,lgaregion,electorate
0,200,ACT,R1,,149.119000,-35.277700,Unincorporated ACT,Durack
1,200,ACT,R1,,149.118900,-35.277700,Unincorporated ACT,Durack
2,800,NT,R1,Inner Metropolitan,130.836680,-12.458684,Darwin Waterfront Precinct,Solomon
3,800,NT,R1,Inner Metropolitan,130.836680,-12.458684,Darwin Waterfront Precinct,Solomon
4,801,NT,R1,Rural,130.836680,-12.458684,Darwin,Lingiari
...,...,...,...,...,...,...,...,...
18521,9013,QLD,R1,,152.823141,-27.603479,Brisbane,Griffith
18522,9015,QLD,R1,,152.823141,-27.603479,Brisbane,Griffith
18523,9464,QLD,R1,,153.074982,-27.397055,Brisbane,Griffith
18524,9726,QLD,R2,,153.412197,-28.008783,Gold Coast,McPherson
