# Get some data in

## Set libraries

In [1]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import os
import datetime
import pandas as pd
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio

## Function to read in the MAIAC files into a data array

In [2]:
def maiac_file_to_da(filename):
    da = rasterio_to_xarray(filename)
    
    time_str = os.path.basename(filename)[17:-13]
    time_obj = datetime.datetime.strptime(time_str, '%Y%j%H%M')
    da.coords['time'] = time_obj
    
    return da

In [3]:
files = glob(r'D:\Annies_Dissertation\MAIAC_Test\Projected\2014\*_proj.tif')

## Map runs function on every element of a list... creating a list of 2D arrays (the date)

In [4]:
list_of_das = map(maiac_file_to_da, files)

## Make the dataset 3D over the dimension time

In [5]:
MAIAC_AOT = xr.concat(list_of_das, 'time')

## Reorder time- because the A and T mess the time variable up and resampling wont work otherwise

In [6]:
reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))

## Put into DASK

In [7]:
ds = reordered_MAIAC_AOT.to_dataset(name='data')
ds.to_netcdf(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\test17.nc')
all_data = xr.open_mfdataset(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\test17.nc', 
                             chunks={'time':10}, concat_dim= 'time')
all_data = all_data['data']

In [8]:
all_data.chunks

((10, 10, 3), (1162,), (1240,))

In [9]:
all_data.mean(dim='time')

<xarray.DataArray 'data' (y: 1162, x: 1240)>
dask.array<mean_ag..., shape=(1162, 1240), dtype=float32, chunksize=(1162, 1240)>
Coordinates:
  * y        (y) float64 1.429e+06 1.428e+06 1.427e+06 1.426e+06 1.424e+06 ...
  * x        (x) float64 -9.476e+05 -9.464e+05 -9.451e+05 -9.439e+05 ...

In [None]:
type(all_data.mean(dim='time'))

all_data.mean(dim='time').dtype

In [10]:
xarray_to_rasterio(all_data.mean(dim='time', keep_attrs=True), 
                   r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\test_dask_overallmean17.tif')

  x = np.divide(x1, x2, out)


In [None]:
Daily_MAIAC_AOT = MAIAC_AOT.resample('D', dim='time', how='max')

In [None]:
Daily_MAIAC_AOT

# Get AOT values for all of the years for Chilbolton pixel from MAIAC data

- find location of chilbolton pixel
 - get OS co-ordinates from https://www.ordnancesurvey.co.uk/gps/transformation
 - convert Lat and Long into Easting and Northing
 - Put Easting and Northing in and use attributes to convert into projection that the images are in to find cell row and column
 - extract data for that pixel

In [None]:
a = reordered_MAIAC_AOT.attrs['affine']

In [None]:
~a * (439480.737, 138506.177)

In [None]:
Measurements = Daily_MAIAC_AOT.isel(x=1103, y=1027).dropna('time').to_dataframe(name='MAIAC_AOT')

In [None]:
Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\Comparision\MAIAC\ALL_MAIAC_2005.csv')

- loop through all of the years data and save as csv of the AOT measurements (or appending if better)

# Read in AERONET data file into pandas DataFrame

In [None]:
import pandas as pd

In [None]:
def read_aeronet(filename):
    """Read a given AERONET AOT data file, and return it as a dataframe.
    
    This returns a DataFrame containing the AERONET data, with the index
    set to the timestamp of the AERONET observations. Rows or columns
    consisting entirely of missing data are removed. All other columns
    are left as-is.
    """
    dateparse = lambda x: pd.datetime.strptime(x, "%d:%m:%Y %H:%M:%S")
    aeronet = pd.read_csv(filename, skiprows=4, na_values=['N/A'],
                          parse_dates={'times':[0,1]},
                          date_parser=dateparse)

    aeronet = aeronet.set_index('times')
    del aeronet['Julian_Day']
    
    # Drop any rows that are all NaN and any cols that are all NaN
    # & then sort by the index
    an = (aeronet.dropna(axis=1, how='all')
                .dropna(axis=0, how='all')
                .rename(columns={'Last_Processing_Date(dd/mm/yyyy)': 'Last_Processing_Date'})
                .sort_index())

    return an

In [None]:
Aeronet = read_aeronet(r'D:\Annies_Dissertation\Methodology\Validation\Comparision\050101_140630_Chilbolton.lev20')

In [None]:
Aeronet[:10]