<center><h2><font color="red" size="+3">Reading Aeronet Data Files with Pandas</font></h2></center>

In [None]:
import datetime
import numpy as np
import pandas as pd
print('Using pandas version ',pd.__version__)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
# Use seaborn style defaults and set the default figure size
sns.set(rc={'figure.figsize':(11, 4)})

### Reading the data

- The file contains 6 metadata rows (to be skipped).
- Values are separated by commas (`,`).

In [None]:
url = "https://github.com/astg606/py_materials/blob/master/aeronet/"
filename = "19930101_20190209_Dakar.ONEILL_lev20"

#filename = "19930101_20190209_GSFC.lev20"

In [None]:
aero_df = pd.read_csv(filename, skiprows=6, sep=",")

In [None]:
aero_df

Delete last column:

In [None]:
aero_df.columns[len(aero_df.columns)-1]

In [None]:
aero_df.drop(aero_df.columns[len(aero_df.columns)-1], axis=1, inplace=True)

In [None]:
aero_df

In [None]:
aero_df.info

In [None]:
aero_df.describe().transpose()

#### Combine the first two columns into a `datetime` object

- We combine the first two columns (Date and Time) into one unique `Datetime` object that will now be the column of indices. This will allows us to easily manipualte the data.
- Set all `-999` as missing values (`NaN`).

In [None]:
dateparse = lambda x: datetime.datetime.strptime(x, '%d:%m:%Y %H:%M:%S')
aero_df = pd.read_csv(filename, skiprows=6, na_values=-999,
                      parse_dates={'datetime': [0, 1]}, 
                      date_parser=dateparse, index_col=0, squeeze=True)

In [None]:
aero_df

#### Drop any rows that are all `NaN` and any cols that are all `NaN`

In [None]:
aero_df = aero_df.dropna(axis=1, how='all').dropna(axis=0, how='all')

In [None]:
aero_df

#### Select four columns to form a new DataFrame

In [None]:
list(aero_df.columns)

In [None]:
new_df = aero_df[['Total_AOD_500nm[tau_a]',
                  'Fine_Mode_AOD_500nm[tau_f]',
                  'Coarse_Mode_AOD_500nm[tau_c]',
                  'FineModeFraction_500nm[eta]']]

In [None]:
new_df

Drop column or row with all `NaN`

In [None]:
new_df = new_df.dropna(axis=1, how='all').dropna(axis=0, how='all')

Rename the column:

In [None]:
new_df.columns = ['aot', 'aotc', 'faotf', 'faotc']

In [None]:
new_df.describe().transpose()

#### Determine the daily means:

In [None]:
new_df_dm = new_df.resample("D").mean()

In [None]:
new_df_dm

In [None]:
new_df_dm = new_df_dm.dropna(axis=1, how='all').dropna(axis=0, how='all')

In [None]:
new_df_dm

Perform various plots

In [None]:
new_df_dm.plot(linewidth=0.5, subplots=True);

In [None]:
new_df_dm['2012':'2018'].plot(linewidth=0.9, subplots=True);

In [None]:
new_df_dm['2012':'2018'].plot.area(linewidth=0, subplots=True);

In [None]:
import matplotlib.dates as mdates

df = new_df_dm['2012':'2018']

fig, ax = plt.subplots(nrows=4, ncols=1, sharex=True)

ax[0].plot(df.aot, marker='.', 
           markersize=2, color='0.6', linestyle='None', label='aot')
#ax[0].xaxis.set_major_locator(mdates.YearLocator())
ax[0].legend()
ax[0].set_xlabel('Year')

ax[1].plot(df.aotc, marker='.', 
           markersize=2, color='0.6', linestyle='None', label='aotc')
#ax[1].xaxis.set_major_locator(mdates.YearLocator())
ax[1].legend()
ax[1].set_xlabel('Year')

ax[2].plot(df.faotf, marker='.', 
           markersize=2, color='0.6', linestyle='None', label='faotf')
#ax[2].xaxis.set_major_locator(mdates.YearLocator())
ax[2].legend()
ax[2].set_xlabel('Year')

ax[3].plot(df.faotc, marker='.', 
           markersize=2, color='0.6', linestyle='None', label='faotc')
#ax[3].xaxis.set_major_locator(mdates.YearLocator())
ax[3].legend()
ax[3].set_xlabel('Year')

plt.tight_layout()

#### Determine the annual means:

In [None]:
new_df_am = new_df.resample("A").mean()

In [None]:
new_df_am

In [None]:
new_df_am.plot(style='b--', subplots=True);

#### Extract the values for 2017 only

In [None]:
new_df_mm

In [None]:
new_df.aot['2010-02':'2010-11'].plot();

In [None]:
new_df['2010-02':'2010-11'].plot(subplots=True);

In [None]:
df2017 = new_df[new_df.index.year == 2017]

In [None]:
df2017

#### Extract the values for April 2017 only

In [None]:
df201704 = df2017[df2017.index.month == 4]

In [None]:
#df201704 = new_df[new_df.index.year == 2017 & new_df.index.month == 4]

In [None]:
df201704

In [None]:
df201704.plot(kind='scatter', x='aot', y='aotc');

#### Average daily values of April 2017

In [None]:
df201704_dm = df201704.resample("D").mean()

In [None]:
df201704_dm

In [None]:
df201704_dm.describe().transpose()