## Set Libraries

In [5]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import os
import datetime
import glob
import pandas as pd
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio

## Function to read in the MAIAC files into a data array

In [6]:
def maiac_file_to_da(filename):
    da = rasterio_to_xarray(filename)
    
    time_str = os.path.basename(filename)[17:-13]
    time_obj = datetime.datetime.strptime(time_str, '%Y%j%H%M')
    da.coords['time'] = time_obj
    
    return da

files = glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\????\*_proj.tif')

In [10]:
folders = glob.glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\????')

In [7]:
#folders = glob.glob(r'D:\Annies_Dissertation\MAIAC_Test\Projected\2014')

In [11]:
folders

['D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2000',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2001',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2002',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2003',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2004',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2005',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2006',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2007',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2008',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2009',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2010',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2011',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2012',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2013',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2014']

for folder in folders:
    files = glob.glob('{fname}\*_proj.tif'.format(fname=folder))
    print('Processing {fname}'.format(fname=folder))
    print(files)

## For loop to read MAIAC files in, year by year and create NetCDF files.

### AOT

In [13]:
for folder in folders:
    files = glob.glob(r'{fname}\*_proj.tif'.format(fname=folder))
    list_of_das = map(maiac_file_to_da, files)
    MAIAC_AOT = xr.concat(list_of_das, 'time')
    reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
    ds = reordered_MAIAC_AOT.to_dataset(name = 'data')
    ds.to_netcdf(r'D:\MAIAC_Data\Europe\h00v01\nc_files\{fol_name}_AOT.nc'.format(fol_name=os.path.basename(folder)))
    
    print('Processed {fname}'.format(fname=folder))

Processed D:\MAIAC_Data\Europe\h00v01\Projected\2000
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2001
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2002
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2003
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2004
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2005
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2006
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2007
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2008
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2009
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2010
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2011
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2012
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2013
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2014


### PM2.5

In [14]:
for folder in folders:
    files = glob.glob(r'{fname}\*_PM25.tif'.format(fname=folder))
    list_of_das = map(maiac_file_to_da, files)
    MAIAC_AOT = xr.concat(list_of_das, 'time')
    reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
    ds = reordered_MAIAC_AOT.to_dataset(name = 'data')
    ds.to_netcdf(r'D:\MAIAC_Data\Europe\h00v01\nc_files\{fol_name}_PM25.nc'.format(fol_name=os.path.basename(folder)))
    
    print('Processed {fname}'.format(fname=folder))

Processed D:\MAIAC_Data\Europe\h00v01\Projected\2000
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2001
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2002
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2003
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2004
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2005
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2006
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2007
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2008
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2009
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2010
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2011
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2012
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2013
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2014


all_data = xr.open_mfdataset(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\*.nc', 
                                 chunks={'time':10}, concat_dim='time')
                                 
all_data = all_data['data']

## For loop to read the MAIAC data in and extract AOT for a specific pixel (Chilbolton) and export as csv

In [8]:
for folder in folders:
    files = glob.glob(r'{fname}\*_proj.tif'.format(fname=folder))
    list_of_das = map(maiac_file_to_da, files)
    MAIAC_AOT = xr.concat(list_of_das, 'time')
    MAIAC_AOT.attrs.clear()
    reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
    Measurements = reordered_MAIAC_AOT.isel(x=1103, y=1027).dropna('time').to_dataframe(name='MAIAC_AOT')
    Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\test.csv', mode='a')
    print('Processed {fname}'.format(fname=folder))

Processed D:\MAIAC_Data\Europe\h00v01\Projected\2007


Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Alldata.csv')

In [None]:
    doc = open(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Alldata.csv', 'a')
    doc.write(Measurements)
    doc.close()