# Import relevant libraries/modules etc

In [1]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import os
import datetime
import pandas as pd
from rasterio import features
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio, xarray_to_rasterio_by_band
import rasterstats
import fiona
from tqdm import tqdm

from shapely.geometry import shape
from rasterstats.io import read_features

from dask.diagnostics import ProgressBar

In [2]:
from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler, ProgressBar
pbar = ProgressBar()
pbar.register()

# Read NetCDF files in to create dataset with all data in

In [2]:
PM25 = xr.open_mfdataset(r'C:\MAIACData\nc_monthly_daily\*PM25.nc')['data']

In [5]:
PM25

<xarray.DataArray 'data' (time: 5191, y: 1162, x: 1240)>
dask.array<concate..., shape=(5191, 1162, 1240), dtype=float32, chunksize=(30, 1162, 1240)>
Coordinates:
  * x        (x) float64 -9.476e+05 -9.464e+05 -9.451e+05 -9.439e+05 ...
  * y        (y) float64 1.429e+06 1.428e+06 1.427e+06 1.426e+06 1.424e+06 ...
  * time     (time) datetime64[ns] 2000-10-01 2000-10-02 2000-10-03 ...
Attributes:
    affine: [ -9.47639631e+05   1.25654304e+03   0.00000000e+00   1.42927781e+06
   0.00000000e+00  -1.25654304e+03]
    crs: +init=epsg:27700

# Extract data over one pixel

In [None]:
a = PM25.attrs['affine']
a = rasterio.Affine.from_gdal(*a)
~a * (382611, 80604)

In [None]:
PM25 = PM25.isel(time=np.argsort(PM25.time))

After2009 = PM25.sel(time=slice('2009', '2016'))

monthly_data = After2009.resample('M', dim='time', how='mean', keep_attrs=True)

In [None]:
ts = PM25.isel(x=1058, y=1073).load()

In [None]:
ts

In [None]:
result = ts.to_dataframe()

In [None]:
result.dropna()

In [None]:
result.to_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Westlulworth.csv')

## Merging timeseries data so can create a graph

In [None]:
from dateutil.parser import parse

In [None]:
Unisoton = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\UniSoton.csv', parse_dates=['time'])

In [None]:
Unisoton[:10]

In [None]:
Unisoton = Unisoton.set_index('time').rename(columns={'data':'Unisoton_data'})

In [None]:
Bolderwood = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Bolderwood.csv', parse_dates=['time'])

In [None]:
Bolderwood[:10]

In [None]:
Bolderwood = Bolderwood.set_index('time').rename(columns={'data':'Bolderwood_data'})

In [None]:
result = pd.merge(Unisoton, Bolderwood, left_index=True, right_index=True)

In [None]:
result[:10]

In [None]:
Westlulworth = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Westlulworth.csv', parse_dates=['time'])

In [None]:
Westlulworth[:10]

In [None]:
Westlulworth = Westlulworth.set_index('time').rename(columns={'data':'Westlulworth_data'})

In [None]:
result_3 = pd.merge(result, Westlulworth, left_index=True, right_index=True)

In [None]:
result_3[:10]

In [None]:
result_3 = result_3[['Unisoton_data', 'Bolderwood_data', 'Westlulworth_data']]

In [None]:
result_3.dropna().to_csv(r'D:\Annies_Dissertation\Analysis\Timeseries\Timeseries.csv')

In [None]:
result[:10]

# Group data
- merge multiple images per day using the maximum AOT estimates

In [None]:
PM25 = PM25.isel(time=np.argsort(PM25.time))

In [None]:
PM25.time

In [None]:
m = PM25.mean(dim='time', keep_attrs=True)

In [None]:
m = m.load()

In [None]:
figure(figsize=(20, 8))
m.plot()

In [None]:
from dask.dot import dot_graph

In [None]:
##dot_graph(, format='svg')

In [None]:
from dask.dot import to_graphviz

In [None]:
dot_graph(m.data.dask)

In [None]:
dot_graph(m.data.dask, filename='robin2.pdf', format='pdf')

In [None]:
reordered_PM25 = PM25.isel(time=np.argsort(PM25.time))

In [None]:
Daily_PM25 = reordered_PM25.resample('D', dim='time', how='max')
Daily_PM25 = Daily_PM25.dropna(dim='time', how='all')

In [None]:
Daily_PM25.time

In [None]:
Daily_PM25.attrs

# Analysis
## overall average of every pixel

The next four cells were added by Robin for profiling - ignore them for the moment :P

In [None]:
om = PM25.mean(dim='time', keep_attrs=True)

In [None]:
from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler

In [None]:
with Profiler() as prof, ResourceProfiler(dt=0.25) as rprof:
    om.load()

In [None]:
from dask.diagnostics import visualize
visualize([prof, rprof], show=True)

In [None]:
overall_mean = PM25.mean(dim='time', keep_attrs=True)

In [None]:
overall_mean.attrs

In [None]:
with ProgressBar():
    res = overall_mean.load()

In [None]:
xarray_to_rasterio(overall_mean, r'D:\Annies_Dissertation\Analysis\overall_mean.tif')

## Averages
### Monthly

In [None]:
monthly_mean = PM25.groupby('time.month').mean(dim='time', keep_attrs=True)

In [None]:
monthly_mean.coords

In [None]:
monthly_mean.attrs

In [None]:
figure(figsize=(20, 12))
monthly.plot(col='month', robust=True)

In [None]:
xarray_to_rasterio_by_band(monthly_mean, r'D:\Annies_Dissertation\Analysis\monthly\monthly_mean_', dim='month')

### Seasonal

In [None]:
seasonal_mean = PM25.groupby('time.season').mean(dim='time', keep_attrs=True)

In [None]:
seasonal_mean.coords

In [None]:
figure(figsize=(20, 12))
seasonal_mean.plot(col='season', robust=True)

In [None]:
xarray_to_rasterio_by_band(seasonal_mean, r'D:\Annies_Dissertation\Analysis\seasonal\seasonal_mean_', dim='season')

### Yearly

In [None]:
yearly_mean = PM25.groupby('time.year').mean(dim='time', keep_attrs=True)

In [None]:
yearly_mean.coords

In [None]:
figure(figsize=(20, 12))
yearly_mean.plot(col='year', robust=True)

In [None]:
xarray_to_rasterio_by_band(yearly_mean, r'D:\Annies_Dissertation\Analysis\yearly\yearly_mean_', dim='year')

## Standard deviation of AP for each pixel over time- not currently what i want to do!

In [None]:
std = PM25.groupby('time.year').std(dim = 'time', keep_attrs=True)

In [None]:
std = PM25.std(dim = 'time', keep_attrs=True)

In [None]:
std.coords

In [None]:
figure(figsize=(20, 12))
std.plot(col='year', robust=True)

In [None]:
xarray_to_rasterio(std, r'D:\Annies_Dissertation\Analysis\overall_std.tif')

# Count of values that aren't missing

In [3]:
observations = PM25.count(dim='time', keep_attrs=True)

In [None]:
xarray_to_rasterio(observations, r'D:\Annies_Dissertation\Analysis\observations_count.tif')

In [None]:
count_histo = observations.plot.hist()
savefig(r'D:\Annies_Dissertation\Analysis\count_histogram.jpeg')

In [None]:
figure(figsize=(20, 12))
observations.plot(robust=True)