In [1]:
%matplotlib inline

In [2]:
import xarray as xr
import rasterio

# Formatting temperature data
- Reading in Temperature data
- Resampling temp data monthly
- Save monthly temp data as netcdf

In [None]:
data = xr.open_dataset('http://opendap.knmi.nl/knmi/thredds/dodsC/e-obs_0.25regular/tg_stderr_0.25deg_reg_v13.1.nc')

In [None]:
tg = data['tg']

In [None]:
tg.coords['latitude'].max(), tg.coords['latitude'].min()

In [None]:
tg.coords['longitude'].max(), tg.coords['longitude'].min()

In [None]:
tg.coords

In [None]:
aff = rasterio.Affine.from_gdal(-40.375, 0.25, 0.0, 25.375, 0.0, 0.25)

In [None]:
crs = '+init=epsg:4326'

In [None]:
tg.attrs['affine'] = aff.to_gdal()
tg.attrs['crs'] = crs

In [None]:
tg.coords

In [None]:
oneday = tg.sel(time='2009-01-01')

In [None]:
oneday.plot()

In [None]:
from rasterio_to_xarray import xarray_to_rasterio

In [None]:
import numpy as np

In [None]:
xarray_to_rasterio(oneday, 'OneDay5.tif')

In [None]:
%time monthly = tg.sel(time=slice('2009-01-01','2016-07-01')).resample('M', dim='time', how='mean')

In [None]:
tg.coords

In [None]:
uk = tg.sel(longitude=slice(-10,3), latitude=slice(50,54))

In [None]:
%time uk_monthly = uk.sel(time=slice('2009-01-01','2016-07-01')).resample('M', dim='time', how='mean')

In [None]:
uk_monthly.isel(time=0).plot()

In [None]:
uk_ds = uk_monthly.to_dataset(name='data')

In [None]:
uk_ds

In [None]:
uk_ds.to_netcdf(r'D:\Annies_Dissertation\Analysis\weather\UK_Temperature.nc')

# Rasterstats

In [3]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import os
import datetime

import pandas as pd

from rasterio import features

from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio

import rasterstats
import fiona

from tqdm import tqdm

from shapely.geometry import shape
from rasterstats.io import read_features


In [4]:
data = xr.open_mfdataset(r'D:\Annies_Dissertation\Analysis\weather\UK_Temperature.nc')['data']

In [5]:
# Image to rasterize the polygons in to
rasterized_image = np.zeros(data.isel(time=0).shape, dtype=np.int)

# List to store dataframes in
dfs = []

feats = read_features(r'D:\Annies_Dissertation\Data\Boundaries\LSOA_WGS.shp')

out_shape = data.isel(time=0).shape

In [6]:
data = data.load()

In [7]:
data

<xarray.DataArray 'data' (time: 84, latitude: 16, longitude: 52)>
array([[[        nan,         nan,         nan, ...,  1.40935481,
          1.42354836,  1.43129029],
        [        nan,         nan,         nan, ...,  1.466129  ,
          1.44580642,  1.41064513],
        [        nan,         nan,         nan, ...,  1.35774191,
          1.35903223,  1.39677416],
        ..., 
        [        nan,  1.47838706,  1.33387094, ...,         nan,
                 nan,         nan],
        [ 1.45516126,  1.40709674,  1.31580642, ...,         nan,
                 nan,         nan],
        [        nan,  1.42516126,  1.36225803, ...,         nan,
                 nan,         nan]],

       [[        nan,         nan,         nan, ...,  1.31035711,
          1.2960714 ,  1.28749997],
        [        nan,         nan,         nan, ...,  1.31214283,
          1.30035711,  1.30499997],
        [        nan,         nan,         nan, ...,  1.27821426,
          1.27785711,  1.30321426],


In [8]:
# Loop over features (polygons) in the shapefile
for f in tqdm(feats):
    # Rasterize the polygon into an array
    rasterized_image = features.rasterize([(shape(f['geometry']),1)],
                                          out_shape=out_shape,
                                          fill=0,
                                          all_touched=True)

    # Extract from the xarray where the rasterized polygon is
    region = data.where(rasterized_image == 1)
    
    # Combine x and y into a new dimension called allpoints and calculate the mean over it
    # and then convert to a dataframe with an appropriate name
    res = region.stack(allpoints=['longitude','latitude']).mean(dim='allpoints').to_dataframe(name=f['properties']['LSOA11CD'])
    
    # Append to the list of data frames so we can concatenate them all at the end
    dfs.append(res)
    
stats = pd.concat(dfs, axis=1)

2578it [00:17, 144.40it/s]


In [9]:
stats

Unnamed: 0_level_0,E01014869,E01014890,E01014891,E01015272,E01015273,E01015274,E01015275,E01015276,E01015277,E01015279,...,E01033241,E01033242,E01033283,E01033285,E01033286,E01033288,E01033380,E01033381,E01033383,E01033384
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-01-31,,,,,,,,,,,...,,,,,,,,,,
2009-02-28,,,,,,,,,,,...,,,,,,,,,,
2009-03-31,,,,,,,,,,,...,,,,,,,,,,
2009-04-30,,,,,,,,,,,...,,,,,,,,,,
2009-05-31,,,,,,,,,,,...,,,,,,,,,,
2009-06-30,,,,,,,,,,,...,,,,,,,,,,
2009-07-31,,,,,,,,,,,...,,,,,,,,,,
2009-08-31,,,,,,,,,,,...,,,,,,,,,,
2009-09-30,,,,,,,,,,,...,,,,,,,,,,
2009-10-31,,,,,,,,,,,...,,,,,,,,,,


In [11]:
stats.min()

E01014869   NaN
E01014890   NaN
E01014891   NaN
E01015272   NaN
E01015273   NaN
E01015274   NaN
E01015275   NaN
E01015276   NaN
E01015277   NaN
E01015279   NaN
E01015280   NaN
E01015281   NaN
E01015282   NaN
E01015283   NaN
E01015285   NaN
E01015286   NaN
E01015287   NaN
E01015288   NaN
E01015289   NaN
E01015290   NaN
E01015291   NaN
E01015292   NaN
E01015293   NaN
E01015294   NaN
E01015295   NaN
E01015296   NaN
E01015297   NaN
E01015298   NaN
E01015299   NaN
E01015300   NaN
             ..
E01032860   NaN
E01032870   NaN
E01032871   NaN
E01032877   NaN
E01032878   NaN
E01032879   NaN
E01032880   NaN
E01032881   NaN
E01032882   NaN
E01032883   NaN
E01032884   NaN
E01033078   NaN
E01033154   NaN
E01033156   NaN
E01033158   NaN
E01033159   NaN
E01033200   NaN
E01033229   NaN
E01033237   NaN
E01033239   NaN
E01033241   NaN
E01033242   NaN
E01033283   NaN
E01033285   NaN
E01033286   NaN
E01033288   NaN
E01033380   NaN
E01033381   NaN
E01033383   NaN
E01033384   NaN
dtype: float64