# Get some data in

## Set libraries

In [28]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import os
import datetime
import pandas as pd
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio

## Function to read in the MAIAC files into a data array

In [29]:
def maiac_file_to_da(filename):
    da = rasterio_to_xarray(filename)
    
    time_str = os.path.basename(filename)[17:-13]
    time_obj = datetime.datetime.strptime(time_str, '%Y%j%H%M')
    da.coords['time'] = time_obj
    
    return da

In [30]:
files = glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\2011\*_proj.tif')

## Map runs function on every element of a list... creating a list of 2D arrays (the date)

In [31]:
list_of_das = map(maiac_file_to_da, files)

## Make the dataset 3D over the dimension time

In [32]:
MAIAC_AOT = xr.concat(list_of_das, 'time')

## Reorder time- because the A and T mess the time variable up and resampling wont work otherwise

In [33]:
reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))

# Get AOT values for all of the years for Chilbolton pixel from MAIAC data

In [34]:
Measurements = reordered_MAIAC_AOT.isel(x=1103, y=1027).dropna('time').to_dataframe(name='MAIAC_AOT')

In [35]:
Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\2011.csv')

- loop through all of the years data and save as csv of the AOT measurements (or appending if better)

# Put all in loop so it goes through each folder and produces the csv file

In [None]:
folders = glob.glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\20*')

for folder in folders:
    files = glob.glob(r'{fname}\*_proj.tif'.format(fname=folder))
    list_of_das = map(maiac_file_to_da, files)
    MAIAC_AOT = xr.concat(list_of_das, 'time')
    MAIAC_AOT.attrs.clear()
    reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
    Measurements = reordered_MAIAC_AOT.isel(x=1103, y=1027).dropna('time').to_dataframe(name='MAIAC_AOT')
    Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Alldata.csv', mode='a', header=False)
    print('Processed {fname}'.format(fname=folder))

# Read in AERONET data file into pandas DataFrame

import pandas as pd

def read_aeronet(filename):
    """Read a given AERONET AOT data file, and return it as a dataframe.
    
    This returns a DataFrame containing the AERONET data, with the index
    set to the timestamp of the AERONET observations. Rows or columns
    consisting entirely of missing data are removed. All other columns
    are left as-is.
    """
    dateparse = lambda x: pd.datetime.strptime(x, "%d:%m:%Y %H:%M:%S")
    aeronet = pd.read_csv(filename, skiprows=4, na_values=['N/A'],
                          parse_dates={'times':[0,1]},
                          date_parser=dateparse)

    aeronet = aeronet.set_index('times')
    del aeronet['Julian_Day']
    
    # Drop any rows that are all NaN and any cols that are all NaN
    # & then sort by the index
    an = (aeronet.dropna(axis=1, how='all')
                .dropna(axis=0, how='all')
                .rename(columns={'Last_Processing_Date(dd/mm/yyyy)': 'Last_Processing_Date'})
                .sort_index())

    return an

Aeronet = read_aeronet(r'D:\Annies_Dissertation\Methodology\Validation\Comparision\050101_140630_Chilbolton.lev20')

Aeronet[:10]