# Get some data in

## Set libraries

In [3]:
import numpy as np
import xarray as xr
import rasterio
%matplotlib inline
from matplotlib.pyplot import *
from glob import glob
import glob
import os
import datetime
import pandas as pd
from rasterio_to_xarray import rasterio_to_xarray, xarray_to_rasterio

## Function to read in the MAIAC files into a data array

In [2]:
def maiac_file_to_da(filename):
    da = rasterio_to_xarray(filename)
    
    time_str = os.path.basename(filename)[17:-13]
    time_obj = datetime.datetime.strptime(time_str, '%Y%j%H%M')
    da.coords['time'] = time_obj
    
    return da

In [None]:
# get a list of files
files = glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\2011\*_proj.tif')
# Map runs function on every element of a list... creating a list of 2D arrays (the date)
list_of_das = map(maiac_file_to_da, files)
# Make the dataset 3D over the dimension time
MAIAC_AOT = xr.concat(list_of_das, 'time')
# Reorder time- because the A and T mess the time variable up and resampling wont work otherwise
reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
# Get AOT values for all of the years for Chilbolton pixel from MAIAC data
Measurements = reordered_MAIAC_AOT.isel(x=1103, y=1027).dropna('time').to_dataframe(name='MAIAC_AOT')
Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\2011.csv')

# Put all in loop so it goes through each folder and produces the csv file

In [7]:
folders = glob.glob(r'D:\MAIAC_Data\Europe\h00v01\Projected\20*')

In [8]:
for folder in folders:
    files = glob.glob(r'{fname}\*_proj.tif'.format(fname=folder))
    list_of_das = map(maiac_file_to_da, files)
    MAIAC_AOT = xr.concat(list_of_das, 'time')
    MAIAC_AOT.attrs.clear()
    reordered_MAIAC_AOT = MAIAC_AOT.isel(time=np.argsort(MAIAC_AOT.time))
    Measurements = reordered_MAIAC_AOT.isel(x=1103, y=1027).dropna('time').to_dataframe(name='MAIAC_AOT')
    Measurements.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Alldata.csv', mode='a')
    print('Processed {fname}'.format(fname=folder))

Processed D:\MAIAC_Data\Europe\h00v01\Projected\2007


In [4]:
MAIAC_Chil = pd.read_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Alldata.csv')

In [5]:
MAIAC_Chil[:10]

Unnamed: 0,time,x,y,MAIAC_AOT
0,25/02/2000 10:50,138808.1057,438327.3471,83
1,26/02/2000 11:35,138808.1057,438327.3471,164
2,12/03/2000 12:30,138808.1057,438327.3471,205
3,15/03/2000 11:25,138808.1057,438327.3471,186
4,22/03/2000 11:30,138808.1057,438327.3471,591
5,06/04/2000 10:45,138808.1057,438327.3471,331
6,07/04/2000 11:30,138808.1057,438327.3471,210
7,08/04/2000 10:35,138808.1057,438327.3471,311
8,08/04/2000 12:15,138808.1057,438327.3471,319
9,10/04/2000 12:00,138808.1057,438327.3471,149


# Read in AERONET data file into pandas DataFrame

In [6]:
import pandas as pd

In [7]:
def read_aeronet(filename):
    """Read a given AERONET AOT data file, and return it as a dataframe.
    
    This returns a DataFrame containing the AERONET data, with the index
    set to the timestamp of the AERONET observations. Rows or columns
    consisting entirely of missing data are removed. All other columns
    are left as-is.
    """
    dateparse = lambda x: pd.datetime.strptime(x, "%d:%m:%Y %H:%M:%S")
    aeronet = pd.read_csv(filename, skiprows=4, na_values=['N/A'],
                          parse_dates={'times':[0,1]},
                          date_parser=dateparse)

    aeronet = aeronet.set_index('times')
    del aeronet['Julian_Day']
    
    # Drop any rows that are all NaN and any cols that are all NaN
    # & then sort by the index
    an = (aeronet.dropna(axis=1, how='all')
                .dropna(axis=0, how='all')
                .rename(columns={'Last_Processing_Date(dd/mm/yyyy)': 'Last_Processing_Date'})
                .sort_index())

    return an

In [12]:
Aeronet = read_aeronet(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\All_data050101_161231_Chilbolton.lev20')

In [24]:
Aeronet[:10]

Unnamed: 0_level_0,AOT_1640,AOT_1020,AOT_870,AOT_675,AOT_500,AOT_440,AOT_380,AOT_340,Water(cm),%TripletVar_1640,...,%TripletVar_440,%TripletVar_380,%TripletVar_340,440-870Angstrom,380-500Angstrom,440-675Angstrom,500-870Angstrom,340-440Angstrom,Last_Processing_Date,Solar_Zenith_Angle
times,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-10-10 12:38:46,,0.079535,0.090636,0.143492,0.246959,0.301443,0.373063,0.43035,2.115728,,...,0.196419,0.181772,0.532137,1.776185,1.495202,1.757222,1.808187,1.368259,17/10/2006,58.758553
2005-10-10 12:53:45,,0.076091,0.088152,0.142735,0.25009,0.306848,0.381753,0.442439,2.15121,,...,0.268519,0.575421,0.669254,1.842931,1.532986,1.812417,1.88065,1.406191,17/10/2006,59.391479
2005-10-10 12:59:46,,0.080473,0.092602,0.148098,0.258315,0.316427,0.393984,0.45695,2.158919,,...,0.207976,0.198821,0.368519,1.816853,1.530241,1.797861,1.850624,1.412028,17/10/2006,59.692552
2005-10-10 13:08:45,,0.078049,0.090091,0.145124,0.252962,0.309554,0.385898,0.447138,2.045271,,...,0.259162,0.420704,0.170024,1.824157,1.531183,1.794243,1.862053,1.41339,17/10/2006,60.190763
2005-10-10 13:23:46,,0.07752,0.090136,0.145518,0.255085,0.313247,0.390174,0.452175,1.993235,,...,0.465213,0.445026,0.148857,1.840785,1.54041,1.815273,1.876348,1.410697,17/10/2006,61.149727
2005-10-10 13:38:45,,0.079958,0.094025,0.152432,0.268651,0.330463,0.413113,0.478949,1.90113,,...,0.20801,0.193379,0.396386,1.857612,1.559833,1.832172,1.893654,1.426591,17/10/2006,62.25673
2005-10-10 13:53:45,,0.080028,0.094143,0.152165,0.268086,0.329825,0.412917,0.479721,1.880605,,...,0.656559,0.769271,0.795513,1.853189,1.565855,1.831613,1.887726,1.439973,17/10/2006,63.506309
2005-10-10 14:23:45,,0.094797,0.109809,0.167836,0.284558,0.346456,0.428425,0.49393,1.868666,,...,0.695588,0.652941,0.543619,1.700525,1.483255,1.714417,1.718805,1.362961,17/10/2006,66.39197
2005-10-10 15:05:06,,0.100339,0.117901,0.179424,0.301536,0.365371,0.450319,0.515718,1.969662,,...,0.511912,0.196935,0.279,1.674118,1.454016,1.682888,1.69492,1.325517,17/10/2006,71.092634
2005-10-10 15:11:24,,0.106938,0.123496,0.182839,0.301331,0.362893,0.44584,0.509917,1.943579,,...,0.210172,0.648805,0.788035,1.596785,1.420574,1.621683,1.610606,1.308019,17/10/2006,71.871554


In [21]:
col_list = [' ', 'AOT_440']

In [22]:
Aeronet_440 = Aeronet[col_list]

KeyError: "[' '] not in index"

In [25]:
Aeronet_440 = Aeronet.drop(['AOT_1640', 'AOT_1020', 'AOT_870', 'AOT_675', 'AOT_555', 'AOT_551', 'AOT_500', 'AOT_490', 'AOT_443', 'AOT_412', 
                            'AOT_380', 'AOT_340', 'Water(cm)'], axis=1)

ValueError: labels ['AOT_555' 'AOT_551' 'AOT_490' 'AOT_443' 'AOT_412'] not contained in axis

In [26]:
Aeronet.to_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Test\Aeronet.csv', mode='w')
#then delete columns in excel!!

In [17]:
Aeronet_440 = pd.read_csv(r'D:\Annies_Dissertation\Methodology\Validation\AERONET\Aeronet440.csv')

In [18]:
Aeronet_440[:10]

Unnamed: 0,times,AOT_440
0,10/10/2005 12:38,0.301443
1,10/10/2005 12:53,0.306848
2,10/10/2005 12:59,0.316427
3,10/10/2005 13:08,0.309554
4,10/10/2005 13:23,0.313247
5,10/10/2005 13:38,0.330463
6,10/10/2005 13:53,0.329825
7,10/10/2005 14:23,0.346456
8,10/10/2005 15:05,0.365371
9,10/10/2005 15:11,0.362893


In [21]:
merged_inner = AERONET_440.merge(MAIAC_Chil, left_on='times', right_on='time', how='inner')

NameError: name 'AERONET_440' is not defined