In [1]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import os
import datetime

import pandas as pd

from rasterio import features

from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio

import rasterstats
import fiona

from tqdm import tqdm

from shapely.geometry import shape
from rasterstats.io import read_features

#from dask.diagnostics import ProgressBar

pbar = ProgressBar()
pbar.register()

pbar.unregister()

In [2]:
data = xr.open_mfdataset(r'C:\MAIACData\nc_monthly_daily\*.nc')['data']

In [3]:
data

<xarray.DataArray 'data' (time: 5191, y: 1162, x: 1240)>
dask.array<concate..., shape=(5191, 1162, 1240), dtype=float32, chunksize=(30, 1162, 1240)>
Coordinates:
  * x        (x) float64 -9.476e+05 -9.464e+05 -9.451e+05 -9.439e+05 ...
  * y        (y) float64 1.429e+06 1.428e+06 1.427e+06 1.426e+06 1.424e+06 ...
  * time     (time) datetime64[ns] 2000-10-01 2000-10-02 2000-10-03 ...
Attributes:
    affine: [ -9.47639631e+05   1.25654304e+03   0.00000000e+00   1.42927781e+06
   0.00000000e+00  -1.25654304e+03]
    crs: +init=epsg:27700

In [4]:
subset = data.isel(x=slice(950, None), y=slice(950, None))

In [5]:
subset = subset.isel(time=np.argsort(subset.time))

In [10]:
After2009 = subset.sel(time=slice('2009', '2016'))

In [11]:
monthly_data = After2009.resample('M', dim='time', how='mean', keep_attrs=True)

In [12]:
monthly_data

<xarray.DataArray 'data' (time: 66, y: 212, x: 290)>
dask.array<transpo..., shape=(66, 212, 290), dtype=float32, chunksize=(1, 212, 290)>
Coordinates:
  * x        (x) float64 2.461e+05 2.473e+05 2.486e+05 2.498e+05 2.511e+05 ...
  * y        (y) float64 2.356e+05 2.343e+05 2.33e+05 2.318e+05 2.305e+05 ...
  * time     (time) datetime64[ns] 2009-01-31 2009-02-28 2009-03-31 ...
Attributes:
    affine: [ -9.47639631e+05   1.25654304e+03   0.00000000e+00   1.42927781e+06
   0.00000000e+00  -1.25654304e+03]
    crs: +init=epsg:27700

In [13]:
data = monthly_data

In [None]:
# Image to rasterize the polygons in to
rasterized_image = np.zeros(data.isel(time=0).shape, dtype=np.int)

# List to store dataframes in
dfs = []

# Get the actual Affine object from the data stored in the attrs
aff = rasterio.Affine.from_gdal(*data.attrs['affine'])

feats = read_features(r'D:\Annies_Dissertation\Data\Boundaries\LSOA_Wessex.shp')


# Loop over features (polygons) in the shapefile
for f in tqdm(feats):
    # Rasterize the polygon into an array
    rasterized_image = features.rasterize([(shape(f['geometry']),1)],
                                          out_shape=data.isel(time=0).shape,
                                          transform=aff,
                                          fill=0,
                                          all_touched=True)

    # Extract from the xarray where the rasterized polygon is
    region = data.where(rasterized_image == 1)
    
    # Combine x and y into a new dimension called allpoints and calculate the mean over it
    # and then convert to a dataframe with an appropriate name
    res = region.stack(allpoints=['x','y']).mean(dim='allpoints').to_dataframe(name=f['properties']['LSOA11CD'])
    
    # Append to the list of data frames so we can concatenate them all at the end
    dfs.append(res)
    
stats = pd.concat(dfs, axis=1)

0it [00:00, ?it/s]

In [None]:
stats

In [None]:
stats = stats.dropna(how='all')

In [None]:
melted_stats = pd.melt(stats.reset_index(), id_vars='time', var_name='LSOA').dropna()

In [None]:
melted_stats

In [None]:
melted_stats['month'] = melted_stats.time.dt.month

In [None]:
melted_stats['year'] = melted_stats.time.dt.year

In [None]:
melted_stats.head()