## Set Libraries

In [1]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import os
import datetime
import glob
import pandas as pd
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio

## Function to read in the MAIAC files into a data array
- create a function which reads in the files, determines the no data values, and creates a third dim 'time'
- create a list with all of the files to be put in the 3d array
- run the function on every element of list (map)

In [3]:
def maiac_file_to_da(filename):
    da = rasterio_to_xarray(filename)
    
    time_str = os.path.basename(filename)[17:-13]
    time_obj = datetime.datetime.strptime(time_str, '%Y%j%H%M')
    da.coords['time'] = time_obj
    
    return da

In [2]:
folders = glob.glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\????')

In [3]:
folders

['D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2000',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2001',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2002',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2003',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2004',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2005',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2006',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2007',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2008',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2009',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2010',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2011',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2012',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2013',
 'D:\\MAIAC_Data\\Europe\\h00v01\\Projected\\2014']

## For loop to read MAIAC files in, year by year and create NetCDF files for each month of each year.
- Map runs function on every element of a list, creating a list of 2D arrays (the time)
- Make the dataset 3D over the dim time
- Reorder time because otherwise the resampling wont work
- Calculate daily median values 
- Group by month
- Save daily median images as a NetCDF for each month

### AOT

In [None]:
for folder in folders:
    files = glob.glob(r'{fname}\*_proj.tif'.format(fname=folder))
    list_of_das = map(maiac_file_to_da, files)
    MAIAC_AOT = xr.concat(list_of_das, 'time')
    reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
    Daily_AOT = reordered_MAIAC_AOT.resample('D', dim='time', how='median')
    Daily_AOT = Daily_AOT.dropna(dim='time', how='all')
    print('Loaded all data for folder: {folder}'.format(folder=folder))
    g = Daily_AOT.groupby('time.month')
    
    for month, indices in g.groups.items():
        print('Processing month: {month}'.format(month=month))
        subset = Daily_AOT.isel(time=indices)
        sub_ds = subset.to_dataset(name = 'data')
        filename = r'D:\MAIAC_Data\Europe\h00v01\nc_monthly\{fol_name}_{month}_AOT.nc'.format(fol_name=os.path.basename(folder), month=month)
        sub_ds.to_netcdf(filename)
        print(filename)
        
        print('Processed {fname}'.format(fname=folder))

### PM2.5

In [10]:
for folder in folders:
    files = glob.glob(r'{fname}\*_PM25.tif'.format(fname=folder))
    list_of_das = map(maiac_file_to_da, files)
    MAIAC_PM25 = xr.concat(list_of_das, 'time')
    reordered_MAIAC_PM25 = MAIAC_PM25.isel(time=np.argsort(MAIAC_PM25.time))
    Daily_PM25 = reordered_MAIAC_PM25.resample('D', dim='time', how='median')
    Daily_PM25 = Daily_PM25.dropna(dim='time', how='all')
    print('Loaded all data for folder: {folder}'.format(folder=folder))
    g = Daily_PM25.groupby('time.month')
    
    for month, indices in g.groups.items():
        print('Processing month: {month}'.format(month=month))
        subset = Daily_PM25.isel(time=indices)
        print(subset.x)
        sub_ds = subset.to_dataset(name = 'data')
        filename = r'C:\MAIACData\nc_monthly_daily_new\{fol_name}_{month}_PM25.nc'.format(fol_name=os.path.basename(folder), month=month)
        sub_ds.to_netcdf(filename)
        print(filename)
        
        print('Processed {fname}'.format(fname=folder))

Loaded all data for folder: D:\MAIAC_Data\Europe\h00v01\Projected\2009
Processing month: 1
<xarray.DataArray 'x' (x: 1240)>
array([   0,    1,    2, ..., 1237, 1238, 1239])
Coordinates:
  * x        (x) int32 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ...
C:\MAIACData\nc_monthly_daily_new\2009_1_PM25.nc
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2009
Processing month: 2
<xarray.DataArray 'x' (x: 1240)>
array([   0,    1,    2, ..., 1237, 1238, 1239])
Coordinates:
  * x        (x) int32 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ...
C:\MAIACData\nc_monthly_daily_new\2009_2_PM25.nc
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2009
Processing month: 3
<xarray.DataArray 'x' (x: 1240)>
array([   0,    1,    2, ..., 1237, 1238, 1239])
Coordinates:
  * x        (x) int32 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ...
C:\MAIACData\nc_monthly_daily_new\2009_3_PM25.nc
Processed D:\MAIAC_Data\Europe\h00v01\Projected\2009
Processing month: 4
<xarray.DataArray 