# Import relevant libraries/modules etc

In [1]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os
import datetime
import pandas as pd
from rasterio import features
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio, xarray_to_rasterio_by_band
import rasterstats
import fiona
from tqdm import tqdm
from dateutil.parser import parse
from shapely.geometry import shape
from rasterstats.io import read_features

In [4]:
from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler, ProgressBar
pbar = ProgressBar()
pbar.register()

# Read NetCDF files in to create dataset with all data in

In [2]:
PM25 = xr.open_mfdataset(r'C:\MAIACData\nc_monthly_daily\*PM25.nc')['data']

# Extract data over one pixel
- Find the x and y location of the pixels within the image
- Extract the PM2.5 data over the whole time period for the pixels corresponding to each site
- Save the PM2.5 estimates out as a csv for each site
- Merge the PM2.5 timeseries data for the three sites based on the dimension 'time'
- Create summary statistics for each site
- Create a graph showing the variation in PM2.5 for all sites over time

In [None]:
# Code to use the affine information associated with the images to find the x and y location 
# of the pixels within the images using the Easting and Northing coordinates
# University of Southampton: 442389, 115316
# Bolderwood: 424469, 108204
#West Lulworth: 382611, 80604

a = PM25.attrs['affine']
a = rasterio.Affine.from_gdal(*a)
~a * (382611, 80604)

In [None]:
PM25 = PM25.isel(time=np.argsort(PM25.time))

In [None]:
ts = PM25.isel(x=1058, y=1073).load()

In [None]:
ts

In [None]:
result = ts.to_dataframe()

In [None]:
result.dropna()

In [None]:
result.to_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Westlulworth.csv')

### Merging timeseries data for the three sites

In [2]:
Unisoton = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\UniSoton.csv', parse_dates=['time'])

In [3]:
Unisoton[:10]

Unnamed: 0,time,x,y,data
0,2000-02-25,442096.976259,116190.330929,10.683363
1,2000-02-26,442096.976259,116190.330929,
2,2000-02-27,442096.976259,116190.330929,
3,2000-02-28,442096.976259,116190.330929,16.978916
4,2000-02-29,442096.976259,116190.330929,
5,2000-03-01,442096.976259,116190.330929,
6,2000-03-02,442096.976259,116190.330929,
7,2000-03-03,442096.976259,116190.330929,16.339218
8,2000-03-04,442096.976259,116190.330929,16.877872
9,2000-03-05,442096.976259,116190.330929,


In [4]:
Unisoton = Unisoton.set_index('time').rename(columns={'data':'Uni_of_Soton'})

In [5]:
Bolderwood = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Bolderwood.csv', parse_dates=['time'])

In [6]:
Bolderwood[:10]

Unnamed: 0,time,x,y,data
0,2000-02-25,423248.830598,108651.072665,4.444452
1,2000-02-26,423248.830598,108651.072665,15.886553
2,2000-02-27,423248.830598,108651.072665,
3,2000-02-28,423248.830598,108651.072665,15.319175
4,2000-02-29,423248.830598,108651.072665,
5,2000-03-01,423248.830598,108651.072665,
6,2000-03-02,423248.830598,108651.072665,
7,2000-03-03,423248.830598,108651.072665,
8,2000-03-04,423248.830598,108651.072665,
9,2000-03-05,423248.830598,108651.072665,15.506694


In [7]:
Bolderwood = Bolderwood.set_index('time').rename(columns={'data':'Bolderwood'})

In [8]:
result = pd.merge(Unisoton, Bolderwood, left_index=True, right_index=True)

In [9]:
result[:10]

Unnamed: 0_level_0,x_x,y_x,Uni_of_Soton,x_y,y_y,Bolderwood
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-02-25,442096.976259,116190.330929,10.683363,423248.830598,108651.072665,4.444452
2000-02-26,442096.976259,116190.330929,,423248.830598,108651.072665,15.886553
2000-02-27,442096.976259,116190.330929,,423248.830598,108651.072665,
2000-02-28,442096.976259,116190.330929,16.978916,423248.830598,108651.072665,15.319175
2000-02-29,442096.976259,116190.330929,,423248.830598,108651.072665,
2000-03-01,442096.976259,116190.330929,,423248.830598,108651.072665,
2000-03-02,442096.976259,116190.330929,,423248.830598,108651.072665,
2000-03-03,442096.976259,116190.330929,16.339218,423248.830598,108651.072665,
2000-03-04,442096.976259,116190.330929,16.877872,423248.830598,108651.072665,
2000-03-05,442096.976259,116190.330929,,423248.830598,108651.072665,15.506694


In [10]:
Westlulworth = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Westlulworth.csv', parse_dates=['time'])

In [11]:
Westlulworth[:10]

Unnamed: 0,time,x,y,data
0,2000-02-25,381782.910143,81007.125695,21.285307
1,2000-02-26,381782.910143,81007.125695,
2,2000-02-27,381782.910143,81007.125695,
3,2000-02-28,381782.910143,81007.125695,
4,2000-02-29,381782.910143,81007.125695,
5,2000-03-01,381782.910143,81007.125695,
6,2000-03-02,381782.910143,81007.125695,
7,2000-03-03,381782.910143,81007.125695,
8,2000-03-04,381782.910143,81007.125695,23.822201
9,2000-03-05,381782.910143,81007.125695,


In [12]:
Westlulworth = Westlulworth.set_index('time').rename(columns={'data':'West_Lulworth'})

In [13]:
result_3 = pd.merge(result, Westlulworth, left_index=True, right_index=True)

In [14]:
result_3 = result_3[['Uni_of_Soton', 'Bolderwood', 'West_Lulworth']]

In [18]:
result_3.Uni_of_Soton.describe()



count    1263.000000
mean       13.716494
std         9.897066
min         0.000000
25%              NaN
50%              NaN
75%              NaN
max        76.848190
Name: Uni_of_Soton, dtype: float64

In [19]:
result_3.Bolderwood.describe()



count    1174.000000
mean       10.883351
std         9.309018
min         0.000000
25%              NaN
50%              NaN
75%              NaN
max        62.078014
Name: Bolderwood, dtype: float64

In [20]:
result_3.West_Lulworth.describe()



count    1293.000000
mean       16.029405
std        10.457219
min         0.000000
25%              NaN
50%              NaN
75%              NaN
max        95.888641
Name: West_Lulworth, dtype: float64

In [None]:
result_3.dropna().to_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries.csv')

In [None]:
result_3[:10]

### Create a graph to show the variation in PM2.5 for the three sites over time

In [None]:
result_3.plot(figsize=(20,8))
plt.ylabel('PM2.5')
plt.xlabel('Date')
plt.savefig(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries_2000_2014.jpeg')

In [None]:
result_3_sub = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries_subset.csv')

In [None]:
result_3_sub[:5]

In [None]:
result_3_sub = result_3_sub.set_index('time')

In [None]:
result_3_sub.plot(figsize=(20,8))
plt.ylabel('PM2.5')
plt.xlabel('Date')
plt.savefig(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries_2009_2014.jpeg')

# Analysis
## overall PM2.5 average of every pixel

In [None]:
om = PM25.mean(dim='time', keep_attrs=True)

In [None]:
xarray_to_rasterio(overall_mean, r'D:\Annies_Dissertation\Analysis\overall_mean.tif')

## PM2.5 Averages
### Monthly

In [None]:
monthly_mean = PM25.groupby('time.month').mean(dim='time', keep_attrs=True)

In [None]:
monthly_mean.coords

In [None]:
monthly_mean.attrs

In [None]:
figure(figsize=(20, 12))
monthly_mean.plot(col='month', robust=True)

In [None]:
xarray_to_rasterio_by_band(monthly_mean, r'D:\Annies_Dissertation\Analysis\monthly\monthly_mean_', dim='month')

### Seasonal

In [None]:
seasonal_mean = PM25.groupby('time.season').mean(dim='time', keep_attrs=True)

In [None]:
seasonal_mean.coords

In [None]:
figure(figsize=(20, 12))
seasonal_mean.plot(col='season', robust=True)

In [None]:
xarray_to_rasterio_by_band(seasonal_mean, r'D:\Annies_Dissertation\Analysis\seasonal\seasonal_mean_', dim='season')

### Yearly

In [16]:
yearly_mean = PM25.groupby('time.year').mean(dim='time', keep_attrs=True)

In [19]:
yearly_mean.coords

Coordinates:
  * x        (x) float64 -9.476e+05 -9.464e+05 -9.451e+05 -9.439e+05 ...
  * y        (y) float64 1.429e+06 1.428e+06 1.427e+06 1.426e+06 1.424e+06 ...
  * year     (year) int64 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 ...

In [None]:
figure(figsize=(20, 12))
yearly_mean.plot(col='year', robust=True)

In [None]:
xarray_to_rasterio_by_band(yearly_mean, r'D:\Annies_Dissertation\Analysis\yearly\yearly_mean_', dim='year')

## Standard deviation of AP for each pixel over time

In [None]:
std = PM25.groupby('time.year').std(dim = 'time', keep_attrs=True)

In [None]:
std = PM25.std(dim = 'time', keep_attrs=True)

In [None]:
std.coords

In [None]:
figure(figsize=(20, 12))
std.plot(col='year', robust=True)

In [None]:
xarray_to_rasterio(std, r'D:\Annies_Dissertation\Analysis\overall_std.tif')

# Count of values that aren't missing

In [None]:
observations = PM25.count(dim='time', keep_attrs=True)

In [None]:
xarray_to_rasterio(observations, r'D:\Annies_Dissertation\Analysis\observations_count.tif')

## Read in subset count image- just Wessex

In [None]:
subset_count = rasterio_to_xarray(r'D:\Annies_Dissertation\Analysis\Clipped\obs_arc1.tif')

In [None]:
count_histo = subset_count.plot.hist()
plt.xlabel('Number of Observations')
plt.savefig(r'D:\Annies_Dissertation\Analysis\count_histogram_subset.jpeg')

## Buncefield fire

In [None]:
PM25 = PM25.isel(time=np.argsort(PM25.time))

In [None]:
B_fire = PM25.sel(time='2005-12-15')

In [None]:
B_fire

In [None]:
xarray_to_rasterio_by_band(B_fire, r'D:\Annies_Dissertation\Analysis\Specific_events\Buncefield\B_fire\B_fire_', dim='time')

## Number of observations each year greater than legislation
- EU Directive (2008) limit of 25ug/m3 to not be exceeded more than 35 times in a calendar year by 2015
- WHO (2006) suggested annual guideline value of 10ug/m3

In [5]:
PM25 = PM25.isel(time=np.argsort(PM25.time))

In [12]:
def over_ten(x):
    greater_than_ten = x > 10
    count_greater_than_ten = greater_than_ten.sum(dim='time')
    
    return count_greater_than_ten

In [13]:
After2010 = PM25.sel(time=slice('2010', '2016'))

In [21]:
res = After2010.groupby('time.year').apply(over_ten)

In [22]:
res.attrs = PM25.attrs

In [23]:
res

<xarray.DataArray 'data' (year: 5, y: 1162, x: 1240)>
dask.array<transpo..., shape=(5, 1162, 1240), dtype=int32, chunksize=(1, 1162, 1240)>
Coordinates:
  * x        (x) float64 -9.476e+05 -9.464e+05 -9.451e+05 -9.439e+05 ...
  * y        (y) float64 1.429e+06 1.428e+06 1.427e+06 1.426e+06 1.424e+06 ...
  * year     (year) int64 2010 2011 2012 2013 2014
Attributes:
    affine: [ -9.47639631e+05   1.25654304e+03   0.00000000e+00   1.42927781e+06
   0.00000000e+00  -1.25654304e+03]
    crs: +init=epsg:27700

In [24]:
xarray_to_rasterio_by_band(res, r'D:\Annies_Dissertation\Analysis\legislation\count_over_10_', dim='year')

[                                        ] | 0% Completed | 13min 27.3s

  return function(*args2, **kwargs)


[########################################] | 100% Completed | 13min 28.3s
Exported 2010
[########################################] | 100% Completed | 13min 12.8s
Exported 2011
[########################################] | 100% Completed | 13min 28.3s
Exported 2012
[########################################] | 100% Completed | 13min 48.8s
Exported 2013
[########################################] | 100% Completed | 14min  6.2s
Exported 2014


In [27]:
def over_25(x):
    greater_than_25 = x > 25
    count_greater_than_25 = greater_than_25.sum(dim='time')
    
    return count_greater_than_25

In [31]:
res_25 = After2010.groupby('time.year').apply(over_25)

In [32]:
res_25.attrs = PM25.attrs

In [33]:
xarray_to_rasterio_by_band(res_25, r'D:\Annies_Dissertation\Analysis\legislation\count_over_25_', dim='year')

[                                        ] | 0% Completed | 13min 26.3s

  return function(*args2, **kwargs)


[########################################] | 100% Completed | 13min 27.3s
Exported 2010
[########################################] | 100% Completed | 13min 50.5s
Exported 2011
[########################################] | 100% Completed | 13min 29.8s
Exported 2012
[########################################] | 100% Completed | 13min 12.7s
Exported 2013
[########################################] | 100% Completed | 13min 33.3s
Exported 2014
