# Get some data in

## Set libraries

In [1]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import glob
import os
import datetime
import pandas as pd
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio

## Function to read in the MAIAC files into a data array

In [2]:
def maiac_file_to_da(filename):
    da = rasterio_to_xarray(filename)
    
    time_str = os.path.basename(filename)[17:-13]
    time_obj = datetime.datetime.strptime(time_str, '%Y%j%H%M')
    da.coords['time'] = time_obj
    
    return da

In [None]:
# get a list of files
files = glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\2011\*_proj.tif')
# Map runs function on every element of a list... creating a list of 2D arrays (the date)
list_of_das = map(maiac_file_to_da, files)
# Make the dataset 3D over the dimension time
MAIAC_AOT = xr.concat(list_of_das, 'time')
# Reorder time- because the A and T mess the time variable up and resampling wont work otherwise
reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
# Get AOT values for all of the years for Chilbolton pixel from MAIAC data
Measurements = reordered_MAIAC_AOT.isel(x=1103, y=1027).dropna('time').to_dataframe(name='MAIAC_AOT')
Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\2011.csv')

# Put all in loop so it goes through each folder and produces the csv file

In [7]:
folders = glob.glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\20*')

In [8]:
for folder in folders:
    files = glob.glob(r'{fname}\*_proj.tif'.format(fname=folder))
    list_of_das = map(maiac_file_to_da, files)
    MAIAC_AOT = xr.concat(list_of_das, 'time')
    MAIAC_AOT.attrs.clear()
    reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
    Measurements = reordered_MAIAC_AOT.isel(x=1103, y=1027).dropna('time').to_dataframe(name='MAIAC_AOT')
    Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Alldata.csv', mode='a')
    print('Processed {fname}'.format(fname=folder))

Processed D:\MAIAC_Data\Europe\h00v01\Projected\2007


In [2]:
MAIAC_Chil = pd.read_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Alldata_FINAL.csv')

In [21]:
MAIAC_Chil[:10]

Unnamed: 0_level_0,x,y,AOT,AOT_New,time
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
25/02/2000 10:50,138808.1057,438327.3471,83,0.083,25/02/2000 10:50
26/02/2000 11:35,138808.1057,438327.3471,164,0.164,26/02/2000 11:35
12/03/2000 12:30,138808.1057,438327.3471,205,0.205,12/03/2000 12:30
15/03/2000 11:25,138808.1057,438327.3471,186,0.186,15/03/2000 11:25
22/03/2000 11:30,138808.1057,438327.3471,591,0.591,22/03/2000 11:30
06/04/2000 10:45,138808.1057,438327.3471,331,0.331,06/04/2000 10:45
07/04/2000 11:30,138808.1057,438327.3471,210,0.21,07/04/2000 11:30
08/04/2000 10:35,138808.1057,438327.3471,311,0.311,08/04/2000 10:35
08/04/2000 12:15,138808.1057,438327.3471,319,0.319,08/04/2000 12:15
10/04/2000 12:00,138808.1057,438327.3471,149,0.149,10/04/2000 12:00


In [19]:
MAIAC_Chil = MAIAC_Chil.set_index('Time')

In [4]:
MAIAC_Chil['AOT_New']= MAIAC_Chil.AOT/1000

In [20]:
MAIAC_Chil['time'] = MAIAC_Chil.index

In [22]:
MAIAC_Chil = MAIAC_Chil.drop_duplicates(subset='time')

In [23]:
MAIAC_Chil = MAIAC_Chil.sort_index()

# Read in AERONET data file into pandas DataFrame

In [6]:
import pandas as pd

In [8]:
def read_aeronet(filename):
    """Read a given AERONET AOT data file, and return it as a dataframe.
    
    This returns a DataFrame containing the AERONET data, with the index
    set to the timestamp of the AERONET observations. Rows or columns
    consisting entirely of missing data are removed. All other columns
    are left as-is.
    """
    dateparse = lambda x: pd.datetime.strptime(x, "%d:%m:%Y %H:%M:%S")
    aeronet = pd.read_csv(filename, skiprows=4, na_values=['N/A'],
                          parse_dates={'times':[0,1]},
                          date_parser=dateparse)

    aeronet = aeronet.set_index('times')
    del aeronet['Julian_Day']
    
    # Drop any rows that are all NaN and any cols that are all NaN
    # & then sort by the index
    an = (aeronet.dropna(axis=1, how='all')
                .dropna(axis=0, how='all')
                .rename(columns={'Last_Processing_Date(dd/mm/yyyy)': 'Last_Processing_Date'})
                .sort_index())

    return an

In [9]:
Aeronet = read_aeronet(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\All_data050101_161231_Chilbolton.lev20')

In [13]:
Aeronet[:10]

Unnamed: 0_level_0,AOT_440
times,Unnamed: 1_level_1
2005-10-10 12:38:46,0.301443
2005-10-10 12:53:45,0.306848
2005-10-10 12:59:46,0.316427
2005-10-10 13:08:45,0.309554
2005-10-10 13:23:46,0.313247
2005-10-10 13:38:45,0.330463
2005-10-10 13:53:45,0.329825
2005-10-10 14:23:45,0.346456
2005-10-10 15:05:06,0.365371
2005-10-10 15:11:24,0.362893


In [12]:
Aeronet = Aeronet[['AOT_440']].dropna()

In [14]:
Aeronet.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Aeronet.csv', mode='w')

In [15]:
Aeronet_440 = pd.read_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Aeronet440.csv')

In [29]:
Aeronet_440[:10]

Unnamed: 0_level_0,AOT_440
times,Unnamed: 1_level_1
10/10/2005 12:38,0.301443
10/10/2005 12:53,0.306848
10/10/2005 12:59,0.316427
10/10/2005 13:08,0.309554
10/10/2005 13:23,0.313247
10/10/2005 13:38,0.330463
10/10/2005 13:53,0.329825
10/10/2005 14:23,0.346456
10/10/2005 15:05,0.365371
10/10/2005 15:11,0.362893


In [28]:
Aeronet_440 = Aeronet_440.set_index('times')

In [31]:
Aeronet_440 = Aeronet_440.sort_index()

In [32]:
indexed = MAIAC_Chil.reindex(Aeronet_440.index, method='nearest', tolerance='10Min').dropna()

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [13]:
merged_inner = Aeronet_440.merge(MAIAC_Chil, left_on='times', right_on='Time', how='inner')

In [None]:
pd.merge(Aer)

In [15]:
merged_inner

Unnamed: 0,times,AOT_440,Time,x,y,AOT
0,09/11/2005 11:50,0.053741,09/11/2005 11:50,138808.1057,438327.3471,55
1,06/04/2006 13:15,0.093942,06/04/2006 13:15,138808.1057,438327.3471,41
2,07/06/2006 11:50,0.52293,07/06/2006 11:50,138808.1057,438327.3471,438
3,08/06/2006 12:20,0.456957,08/06/2006 12:20,138808.1057,438327.3471,449
4,10/06/2006 12:20,0.163951,10/06/2006 12:20,138808.1057,438327.3471,133
5,01/07/2006 12:40,0.163668,01/07/2006 12:40,138808.1057,438327.3471,103
6,07/09/2006 12:05,0.108979,07/09/2006 12:05,138808.1057,438327.3471,48
7,07/09/2006 13:50,0.12149,07/09/2006 13:50,138808.1057,438327.3471,63
8,21/09/2006 12:15,0.257615,21/09/2006 12:15,138808.1057,438327.3471,137
9,17/12/2006 12:20,0.126042,17/12/2006 12:20,138808.1057,438327.3471,28


In [18]:
merged_inner.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Aeronet_MAIAC_Merged.csv', mode='w')