In [11]:
import pandas as pd

# Load the data
# Change path according to the repository
file_path = "/Communities.csv"
data = pd.read_csv(file_path)

# Inspect the data structure
print(data.head())
print(data.info())

                         Community Name                             Region  \
0                   Abbotsford (Suburb)  Northern and Western Metropolitan   
1                   Aberfeldie (Suburb)  Northern and Western Metropolitan   
2  Aireys Inlet - Fairhaven (Catchment)               Barwon-South Western   
3       Aireys Inlet - Fairhaven (Town)               Barwon-South Western   
4              Airport West (Catchment)  Northern and Western Metropolitan   

   Map reference Grid reference               Location  Population Density  \
0              4             C3   3km ENE of Melbourne         3082.440714   
1              4             B2    8km NW of Melbourne         2426.665450   
2              2             D4  108km SW of Melbourne            0.841522   
3              2             D4  105km SW of Melbourne          213.059443   
4              4             B1  11km NNW of Melbourne          210.819042   

   Travel time to GPO (minutes)  Distance to GPO (km)         

In [37]:
# Selecting the relevant columns for the analysis
key_columns = ['Community Name', 'Population Density', 'Public Hospitals', 'Private Hospitals', 
               'Community Health Centres', 'General Practice', 'Pharmacies', 
               'Aged Care (High Care)', 'Aged Care (Low Care)', 'Aged Care (SRS)', 
               '% change, 2007-2012, total', 'Distance to nearest public hospital with emergency department',
               'Presentations to emergency departments due to injury, %', 
               'Category 4 & 5 emergency department presentations, %']

# Subsetting the dataset to include only these columns
data_subset = data[key_columns]

# Display the subset of the data to verify
print(data_subset.head())

                         Community Name  Population Density  Public Hospitals  \
0                   Abbotsford (Suburb)         3082.440714                 0   
1                   Aberfeldie (Suburb)         2426.665450                 0   
2  Aireys Inlet - Fairhaven (Catchment)            0.841522                 0   
3       Aireys Inlet - Fairhaven (Town)          213.059443                 0   
4              Airport West (Catchment)          210.819042                 0   

   Private Hospitals  Community Health Centres  General Practice  Pharmacies  \
0                  0                         0                 4           1   
1                  0                         0                 1           2   
2                  0                         0                 0           0   
3                  0                         0                 1           0   
4                  0                         0                 1           1   

   Aged Care (High Care)  Aged C

In [39]:
# Drop rows with missing values in key healthcare and population columns
data_cleaned = data_subset.dropna(subset=['Population Density', 'Public Hospitals', 'Private Hospitals', 
                                          'Community Health Centres', '% change, 2007-2012, total',
                                          'Distance to nearest public hospital with emergency department',
                                          'Presentations to emergency departments due to injury, %',
                                          'Category 4 & 5 emergency department presentations, %'])

# Check for remaining missing values
print(data_cleaned.isnull().sum())

Community Name                                                   0
Population Density                                               0
Public Hospitals                                                 0
Private Hospitals                                                0
Community Health Centres                                         0
General Practice                                                 0
Pharmacies                                                       0
Aged Care (High Care)                                            0
Aged Care (Low Care)                                             0
Aged Care (SRS)                                                  0
% change, 2007-2012, total                                       0
Distance to nearest public hospital with emergency department    0
Presentations to emergency departments due to injury, %          0
Category 4 & 5 emergency department presentations, %             0
dtype: int64


In [41]:
# Normalize healthcare facility counts per 1000 residents using .loc[]
data_cleaned.loc[:, 'hospitals_per_1000'] = (data_cleaned['Public Hospitals'] + data_cleaned['Private Hospitals']) / data_cleaned['Population Density']
data_cleaned.loc[:, 'clinics_per_1000'] = data_cleaned['Community Health Centres'] / data_cleaned['Population Density']

# Use the existing '% change, 2007-2012, total' column for population growth
data_cleaned.loc[:, 'population_growth_%'] = data_cleaned['% change, 2007-2012, total']

# Display the key results
print(data_cleaned[['Community Name', 'hospitals_per_1000', 'clinics_per_1000', 'population_growth_%',
                    'Distance to nearest public hospital with emergency department',
                    'Presentations to emergency departments due to injury, %',
                    'Category 4 & 5 emergency department presentations, %']].head())

                    Community Name  hospitals_per_1000  clinics_per_1000  \
0              Abbotsford (Suburb)                 0.0               0.0   
1              Aberfeldie (Suburb)                 0.0               0.0   
3  Aireys Inlet - Fairhaven (Town)                 0.0               0.0   
4         Airport West (Catchment)                 0.0               0.0   
5            Airport West (Suburb)                 0.0               0.0   

   population_growth_%  \
0            13.834076   
1            -3.061077   
3            15.921325   
4             5.317718   
5             6.664256   

   Distance to nearest public hospital with emergency department  \
0                                           3.191000               
1                                           7.916626               
3                                          49.052066               
4                                          12.793518               
5                                          13.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned.loc[:, 'hospitals_per_1000'] = (data_cleaned['Public Hospitals'] + data_cleaned['Private Hospitals']) / data_cleaned['Population Density']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned.loc[:, 'clinics_per_1000'] = data_cleaned['Community Health Centres'] / data_cleaned['Population Density']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-doc