# 6. People and Nature Survey Features


The People and Nature Survey for England gathers evidence and trend data through an online survey relating to people’s enjoyment, access, understanding of and attitudes to the natural environment, and it’s contributions to wellbeing.

https://www.gov.uk/government/collections/people-and-nature-survey-for-england

We collect relevant features from this datasource for the modelling purpose. Specifically, we will find mean dog occupancy in buffer areas around each counter site.

People and Nature Survey for England - Year 2 - Quarter 1 to Quarter 4 data
https://www.gov.uk/government/statistics/the-people-and-nature-survey-for-england-year-2-annual-report-data-and-publications-april-2021-march-2022-official-statistics-main-findings

In [None]:
%load_ext autoreload
%autoreload 2

# All the variables are defined in the Config file
from model_config import *
from model_packages import *
from model_utils import *

In [None]:
os.chdir('..')

# Survey Data Wrangling

In [None]:
# read in data
df_y_2_survey=pd.ExcelFile(survey+'PANS_Y2_Q1_Q4.xlsx')
df_y_2_survey_data = pd.read_excel(df_y_2_survey, 'Data')

In [None]:
# data wrangling to get year an month of visit
df_y_2_survey_data['Visit_Week']=pd.to_datetime(df_y_2_survey_data['Visit_Week'].astype(str),errors='coerce')
df_y_2_survey_data=df_y_2_survey_data[~df_y_2_survey_data.Visit_Week.isnull()].reset_index(drop=True)
df_y_2_survey_data['Visit_Week']=df_y_2_survey_data['Visit_Week'].dt.to_period('M')
df_y_2_survey_data.rename(columns={'Visit_Week':'Date'},inplace=True)

In [None]:
# visualise numbe of visitors in each month for the data set
df_y_2_survey_data.groupby('Date')['No_Of_Visits'].sum().plot()

In [None]:
df_y_2_survey_data_sbset=df_y_2_survey_data[ftrs_selection]

#drop rows with missing values in visited locations
df_y_2_survey_data_sbset=df_y_2_survey_data_sbset.dropna(subset=['No_Of_Visits','Date',\
                                                                 'Visit_Latitude','Visit_Longitude']).\
reset_index(drop=True)

## Intersect Survey data and buffer zones for people counters

In [None]:
# Get the in-land geometries for buffer zones around each people monitoring sites

# load uk boundaries
world = gpd.read_file(world_boundaries)
uk = world[world.name == 'U.K. of Great Britain and Northern Ireland'] 
uk=uk.to_crs(crs_mtr)
print(uk.crs)

# load 5km buffer zones
sites_df=gpd.read_file(data_folder+'accessibility.shp')
sites_df=sites_df[sites_df['geom_type']=='5km buffer'].reset_index(drop=True)
del sites_df['area']
# overlay buffer geometries and uk boundaries
sites_df = sites_df[[x for x in sites_df.columns if x not in ['area']]].to_crs(crs_mtr).\
overlay(uk.to_crs(crs_mtr), how='intersection')

# visualise 
ax=uk.to_crs(crs_deg).plot(alpha=0.1)
sites_df.to_crs(crs_deg).plot(ax=ax)

In [None]:
# convert survey data to geodataframe
df=df_y_2_survey_data_sbset.copy()

del df['Visit_Easting']

del df['Visit_Northing']

df.rename(columns={'Visit_Longitude':'Longitude',\
                   'Visit_Latitude':'Latitude'},inplace=True)

# use visit latitude and longitude to create geodataframe
gdf = gpd.GeoDataFrame(df,geometry=gpd.points_from_xy(df.Longitude,df.Latitude)).set_crs(crs_deg)
# ensure crs of survey and buffer zones match 
gdf=gdf.to_crs(crs_mtr)
print(sites_df.crs)
print(gdf.crs)

# Get the intersection between buffer around people counter sites and visits recorded in People and Nature Survey.
visit_df_y2=gdf.to_crs(crs_mtr).sjoin(sites_df.to_crs(crs_mtr),\
                                           how="left",op='intersects').dropna(subset=['counter'])
visit_df_y2=visit_df_y2.to_crs(crs_deg)

In [None]:
# Visualisation:  People and Nature Survey visits intersection with people counter sites

ax=sites_df.centroid.to_crs(crs_deg).plot(color='blue',marker='o',label='People counter locations')
# contextily.add_basemap(ax,crs=crs_deg,source=contextily.providers.OpenStreetMap.Mapnik)
visit_df_y2.plot(ax=ax,c='red',alpha=0.25,marker='+',label='People and Nature survey visits')
contextily.add_basemap(ax,crs= crs_deg,source=contextily.providers.CartoDB.Positron)
plt.legend(loc='lower right', frameon=1)
plt.axis('off')

plt.savefig(f"./outputs/counters_pnas.png", format= 'png', dpi=300, bbox_inches='tight')

In [None]:
# select features of interest from PNAS survey
visit_df_y2=visit_df_y2.reset_index(drop=True)

# Focus on a few selected features
few_ftrs=['Wave','Date','No_Of_Visits','Latitude','Longitude','Dog',\
'Home_Rural_Urban_Asked','Home_IMD_Decile','Visit_IMD_Decile','geometry']

# Investigate Dog Ownership data

In [None]:
# Dog occupancy is a feature of interest. This feature records if the visitor had a dog with their visit
visit_df_y2['Dog']=visit_df_y2['Dog'].eq('Yes').mul(1)

# data type wrangling
visit_df_y2['No_Of_Visits']=visit_df_y2['No_Of_Visits'].astype(float)
visit_df_y2['Home_IMD_Decile']=visit_df_y2['Home_IMD_Decile'].astype(float)
visit_df_y2['Visit_IMD_Decile']=visit_df_y2['Visit_IMD_Decile'].astype(float)

In [None]:
# visualise visits across the counter sites
visit_df_y2.groupby('Date')['No_Of_Visits'].mean().plot(style='-o')

# save time series of number of visits for each site
visit_df_y2.to_pickle(data_folder+'nature_survey_time_series_data.pkl')

In [None]:
# Count the mean number of visits and dog occupancy for each spatial site
ftr='Dog'
visits_with_dogs_df=visit_df_y2.groupby('counter')[['No_Of_Visits',ftr]].mean().reset_index()
visits_with_dogs_df=sites_df[['counter','geometry']].merge(visits_with_dogs_df,on=['counter'])
visits_with_dogs_df['area_sq_km']=visits_with_dogs_df.to_crs(crs_mtr).geometry.area/10**6
visits_with_dogs_df['geometry']=visits_with_dogs_df['geometry'].centroid
visits_with_dogs_df=visits_with_dogs_df.to_crs(crs_deg)
visits_with_dogs_df['latitude'] = visits_with_dogs_df.geometry.apply(lambda p: p.y)
visits_with_dogs_df['longitude'] = visits_with_dogs_df.geometry.apply(lambda p: p.x)

In [None]:
# Visualisation of mean dog occupancy and number of visits.
fig = px.scatter_mapbox(visits_with_dogs_df, lat="latitude", lon="longitude",\
                        color="No_Of_Visits", size=ftr,
                        color_continuous_scale="RdYlGn_r",
                        center={"lat": visits_with_dogs_df['latitude'].mean(),\
                                "lon": visits_with_dogs_df['longitude'].mean()}, zoom=3.5,
                        mapbox_style="carto-positron", hover_name="counter")
fig.update_layout(margin=dict(l=0, r=0, t=30, b=10))
fig.show()

In [None]:
# Visualisation of number of visits and dog occupancy: averaged over time 

visit_df_y2_agg=visit_df_y2.groupby(['counter'])[['No_Of_Visits','Dog']].mean().reset_index()


df=visit_df_y2_agg

g = sns.pairplot(df, kind='reg', height=2,
                 plot_kws={'line_kws': {'color': 'black'}})
g.map_lower(r2)
for i, j in zip(*np.triu_indices_from(g.axes, 1)):
    g.axes[i, j].set_visible(False)
plt.show()

## Save and export dog ownership data 

In [None]:
df_mean_dog_occupncy=visit_df_y2_agg.copy()

df_mean_dog_occupncy.rename(columns={'Dog':'Mean_dog_occupancy'},inplace=True)

df_mean_dog_occupncy.to_pickle(data_folder+'dog_occupancy_sites.pkl')