# Import relevant libraries

In [1]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os
import datetime
import pandas as pd
from rasterio import features
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio, xarray_to_rasterio_by_band
import rasterstats
import fiona
from tqdm import tqdm
from dateutil.parser import parse
from rasterstats.io import read_features

In [None]:
# Load in progress bars so the progress of each part of the code can be seen
from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler, ProgressBar
pbar = ProgressBar()
pbar.register()

# Read NetCDF files in to create dataset with all data in

In [2]:
PM25 = xr.open_mfdataset(r'C:\MAIACData\nc_monthly_daily\*PM25.nc')['data']

#  Time-series analysis: Extract data over one pixel
- Find the x and y location of the pixels within the image
- Extract the PM2.5 data over the whole time period for the pixels corresponding to each site
- Save the PM2.5 estimates out as a csv for each site
- Merge the PM2.5 timeseries data for the three sites based on the dimension 'time'
- Create summary statistics for each site
- Create a graph showing the variation in PM2.5 for all sites over time

In [None]:
# Code to use the affine information associated with the images to find the x and y location 
# of the pixels within the images using the Easting and Northing coordinates
# University of Southampton: 442389, 115316
# Bolderwood: 424469, 108204
#West Lulworth: 382611, 80604

a = PM25.attrs['affine']
a = rasterio.Affine.from_gdal(*a)
~a * (382611, 80604)

In [None]:
# Sort PM2.5 data along the time dimension
PM25 = PM25.isel(time=np.argsort(PM25.time))

In [None]:
# Select PM2.5 data based on the pixel x and y coordinates
ts = PM25.isel(x=1058, y=1073).load()

In [None]:
ts

In [None]:
# Transform the data into a dataframe
result = ts.to_dataframe()

In [None]:
# Remove nan values
result.dropna()

In [None]:
# Save data as CSV and repeat steps for other two sites
result.to_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Westlulworth.csv')

### Merging timeseries data for the three sites

In [None]:
# Read in data for each site, renaming the columns
Unisoton = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\UniSoton.csv', parse_dates=['time'])

In [None]:
Unisoton[:10]

In [None]:
Unisoton = Unisoton.set_index('time').rename(columns={'data':'Uni_of_Soton'})

In [None]:
Bolderwood = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Bolderwood.csv', parse_dates=['time'])

In [None]:
Bolderwood[:10]

In [None]:
Bolderwood = Bolderwood.set_index('time').rename(columns={'data':'Bolderwood'})

In [None]:
result = pd.merge(Unisoton, Bolderwood, left_index=True, right_index=True)

In [None]:
result[:10]

In [None]:
Westlulworth = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Westlulworth.csv', parse_dates=['time'])

In [None]:
#View data
Westlulworth[:10]

In [None]:
Westlulworth = Westlulworth.set_index('time').rename(columns={'data':'West_Lulworth'})

In [None]:
# Merge data
result_3 = pd.merge(result, Westlulworth, left_index=True, right_index=True)

In [None]:
# Remove all columns apart from the ones containing PM2.5 information for the three pixels
result_3 = result_3[['Uni_of_Soton', 'Bolderwood', 'West_Lulworth']]

In [None]:
# Get summary statistics for the pixels
result_3.Uni_of_Soton.describe()

In [None]:
result_3.Bolderwood.describe()

In [None]:
result_3.West_Lulworth.describe()

In [None]:
# Save data
result_3.dropna().to_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries.csv')

### Create a graph to show the variation in PM2.5 for the three sites over time

In [None]:
result_3.plot(figsize=(20,8))
plt.ylabel('PM2.5')
plt.xlabel('Date')
plt.savefig(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries_2000_2014.jpeg')

In [None]:
result_3_sub = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries_subset.csv')

In [None]:
result_3_sub[:5]

In [None]:
result_3_sub = result_3_sub.set_index('time')

In [None]:
result_3_sub.plot(figsize=(20,8))
plt.ylabel('PM2.5')
plt.xlabel('Date')
plt.savefig(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries_2009_2014.jpeg')

## overall PM2.5 average of every pixel

In [3]:
#Calculate mean along the time dimension and save
om = PM25.mean(dim='time', keep_attrs=True)

In [None]:
xarray_to_rasterio(overall_mean, r'D:\Annies_Dissertation\Analysis\overall_mean.tif')

## PM2.5 Averages
### Monthly

In [None]:
# Calculate mean grouping the data by month
monthly_mean = PM25.groupby('time.month').mean(dim='time', keep_attrs=True)

In [None]:
#View the data coordinates
monthly_mean.coords

In [None]:
# View that the data has the attributes still attached
monthly_mean.attrs

In [None]:
# Plot the data
figure(figsize=(20, 12))
monthly_mean.plot(col='month', robust=True)

In [None]:
#Save each of the monthly means
xarray_to_rasterio_by_band(monthly_mean, r'D:\Annies_Dissertation\Analysis\monthly\monthly_mean_', dim='month')

### Seasonal

In [8]:
seasonal_mean = PM25.groupby('time.season').mean(dim='time', keep_attrs=True)

In [None]:
seasonal_mean.coords

In [None]:
figure(figsize=(20, 12))
seasonal_mean.plot(col='season', robust=True)

In [None]:
xarray_to_rasterio_by_band(seasonal_mean, r'D:\Annies_Dissertation\Analysis\seasonal\seasonal_mean_', dim='season')

### Yearly

In [None]:
yearly_mean = PM25.groupby('time.year').mean(dim='time', keep_attrs=True)

In [None]:
yearly_mean.coords

In [None]:
figure(figsize=(20, 12))
yearly_mean.plot(col='year', robust=True)

In [None]:
xarray_to_rasterio_by_band(yearly_mean, r'D:\Annies_Dissertation\Analysis\yearly\yearly_mean_', dim='year')

## Standard deviation of AP for each pixel over time

In [None]:
# Use the std function to calculate the standard deviation
std = PM25.std(dim = 'time', keep_attrs=True)

In [None]:
std.coords

In [None]:
figure(figsize=(20, 12))
std.plot(col='year', robust=True)

In [None]:
xarray_to_rasterio(std, r'D:\Annies_Dissertation\Analysis\overall_std.tif')

# Count of values that aren't missing

In [None]:
# Use the count function to count all of the observations per pixel
observations = PM25.count(dim='time', keep_attrs=True)

In [None]:
xarray_to_rasterio(observations, r'D:\Annies_Dissertation\Analysis\observations_count.tif')

### Read in subset count image- just Wessex

In [None]:
# Use the subset observation count image
subset_count = rasterio_to_xarray(r'D:\Annies_Dissertation\Analysis\Clipped\obs_arc1.tif')

In [None]:
#Create a histogram of all the count of observations in Wessex
count_histo = subset_count.plot.hist()
plt.xlabel('Number of Observations')
plt.savefig(r'D:\Annies_Dissertation\Analysis\count_histogram_subset.jpeg')

## Buncefield fire

In [4]:
PM25 = PM25.isel(time=np.argsort(PM25.time))

In [7]:
B_fire = PM25.sel(time='2005-12-10')

In [None]:
B_fire

In [None]:
xarray_to_rasterio(B_fire, r'D:\Annies_Dissertation\Analysis\Specific_events\Buncefield\B_fire\B_fire_', dim='time')

## Number of observations each year greater than legislation
- WHO (2006) suggested annual guideline value of 10ug/m3

In [None]:
# Sort the PM2.5 data by time
PM25 = PM25.isel(time=np.argsort(PM25.time))

In [None]:
# Create a function to find pixels with PM2.5 concentrations greater than 10 
# and count the number of times this occurred during each year
def over_ten(x):
    greater_than_ten = x > 10
    count_greater_than_ten = greater_than_ten.sum(dim='time')
    
    return count_greater_than_ten

In [None]:
# Select the years want to analyse the data for
After2010 = PM25.sel(time=slice('2010', '2016'))

In [None]:
#Group the data by year and apply the function
res = After2010.groupby('time.year').apply(over_ten)

In [None]:
res.attrs = PM25.attrs

In [None]:
res

In [None]:
# Save
xarray_to_rasterio_by_band(res, r'D:\Annies_Dissertation\Analysis\legislation\count_over_10_', dim='year')

- EU Directive (2008) limit of 25ug/m3 to not be exceeded more than 35 times in a calendar year by 2015

In [None]:
def over_25(x):
    greater_than_25 = x > 25
    count_greater_than_25 = greater_than_25.sum(dim='time')
    
    return count_greater_than_25

In [None]:
res_25 = After2010.groupby('time.year').apply(over_25)

In [None]:
res_25.attrs = PM25.attrs

In [None]:
xarray_to_rasterio_by_band(res_25, r'D:\Annies_Dissertation\Analysis\legislation\count_over_25_', dim='year')