In [None]:
import sys
import csv
import pandas as pd
import numpy as np
import pytz
import glob
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import datetime

import bokeh
from bokeh.models import DatetimeTickFormatter
from bokeh.plotting import figure
from bokeh.layouts import gridplot, row, column
import bokeh.io
import bokeh.plotting

import holoviews as hv
# from holoviews.operation.datashader import datashade 
import holoviews.operation.datashader
hv.extension('bokeh')

bokeh.io.output_notebook()

### Check CH4 aircore
Just want to compare the ch4 from the aircore samples used in tccon to the 4x5 geoschem runs to see if the version of the geos chem you are using produces satisfactory vertical distribution. We are testing it with methane (before we use ethane/propane vertical profiles from geos chem). 

First, going to check a single file called the following on ccycle:  

```/oco2-data/tccon/analysis/jlaugh/Notebooks/TCCON/GGG2020-AICF/Data/aircores/v20201223/level2/icartt/AC_GMD007_202012171701_R0.ict ```

That you downloaded onto local and extracted relevant variables and converted to csv on 2/16/22. 

Questions
- do these files even have the right location (Lamont)? There are lots of possible files I could use 
- should I just pull a couple or use all of the files from the tccon directory 


FIRST, test how off the time is when you read it directly from UTC 
for this first file, the top of it says it starts 12,17,2020 and ends 2020,12,18



In [None]:
test = pd.read_csv('/Users/arianatribby/git/oklahoma_propane/data/tccon_aircore/AC_GMD007_202012171701_R0.ict.csv')

In [None]:
test.head()

In [None]:
datetime.datetime.utcfromtimestamp(test['Mid_UTC'].values[0])

Yup, very off. Need to use the start time in the file.

In [None]:
def converttodf(inputs):
    fieldnames = []
    for filename in inputs:
      with open(filename, "r", newline="") as f_in:
        reader = csv.reader(f_in)
        headers = next(reader)
        for h in headers:
          if h not in fieldnames:
            fieldnames.append(h)


    # # # Then copy the data
    with open("out.csv", "w", newline="") as f_out:   # Comment 2 below
      writer = csv.DictWriter(f_out, fieldnames=fieldnames)
      writer.writeheader()
      for filename in inputs:
        with open(filename, "r", newline="") as f_in:
          reader = csv.DictReader(f_in)  # Uses the field names in this file
          for line in reader:
            writer.writerow(line)

    df = pd.read_csv("out.csv")

    # insert NaN for -9999 values (be careful of lon values that are negative) 
    df.loc[:,:] < -99998 
    df_raw = df.where(~(df.loc[:,:] < -99998), other= np.NaN)
    
    complete_time_all_campaigns = []
    # insert completetime ..........................................
    for i,x in enumerate(inputs):
        
#         dateyr = x.split('AC_GMD007_')[1].rsplit('_')[0][0:4]
#         datemn = x.split('AC_GMD007_')[1].rsplit('_')[0][4:6]
#         datedy = x.split('AC_GMD007_')[1].rsplit('_')[0][6:8]
        dateyr = x.rsplit("_",2)[1].rsplit('_')[0][0:4]
        datemn = x.rsplit("_",2)[1].rsplit('_')[0][4:6]
        datedy = x.rsplit("_",2)[1].rsplit('_')[0][6:8]
        

        file_df = pd.read_csv(inputs[i])
        file_time_list = file_df['Mid_UTC'].to_list()
        
        
        # add completetime
        utc = pytz.UTC
        file_name_dtdelta = datetime.datetime(int(dateyr), int(datemn), int(datedy), 0, 0, 0)
        file_name_dtdelta = utc.localize(file_name_dtdelta)
        total_time = [datetime.timedelta(0,x)+file_name_dtdelta for x in file_time_list]
        complete_time_all_campaigns.append(total_time)

    complete_time_all_campaigns = np.concatenate(complete_time_all_campaigns).ravel()
    df_raw['completetime'] = complete_time_all_campaigns
    
    return df_raw

In [None]:
path = "/Users/arianatribby/git/oklahoma_propane/data/tccon_aircore/AC_GMD007_202012171701_R0.ict.csv"
inputs = sorted(glob.glob(path))
print(path)
print(inputs)

In [None]:
df_raw = converttodf(inputs)

In [None]:
# https://stackoverflow.com/questions/35337299/python-datetime-to-float-with-millisecond-precision
res = [i.timestamp() for i in df_raw['completetime']]

df_raw['completetime'] = res

In [None]:
df_raw.head()

In [None]:
# datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
datetime.datetime.utcfromtimestamp(df_raw['completetime'].values[0])

Looks good.

Now I want to know what locations this dataset contains. 

You can add in features with cartopy 
https://scitools.org.uk/cartopy/docs/latest/gallery/lines_and_polygons/features.html 
help(cfeature)
here is what is available: 
```
    BORDERS = <cartopy.feature.NaturalEarthFeature object>
    COASTLINE = <cartopy.feature.NaturalEarthFeature object>
    COLORS = {'land': array([0.9375  , 0.9375  , 0.859375]), 'land_alt1': ...
    LAKES = <cartopy.feature.NaturalEarthFeature object>
    LAND = <cartopy.feature.NaturalEarthFeature object>
    OCEAN = <cartopy.feature.NaturalEarthFeature object>
    RIVERS = <cartopy.feature.NaturalEarthFeature object>
    STATES = <cartopy.feature.NaturalEarthFeature object>
    absolute_import = _Feature((2, 5, 0, 'alpha', 1), (3, 0, 0, 'alpha', 0...
    division = _Feature((2, 2, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0), 8192...
    print_function = _Feature((2, 6, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0)...
```

Some tips for improving the clarity of your figure where you plot atom and hippo https://stackoverflow.com/questions/67508054/improve-resolution-of-cartopy-map

In [None]:
# from IPython.display import set_matplotlib_formats
# set_matplotlib_formats('svg')

# when activate that, also do this: 
# ax.coastlines(resolution='50m', color='black', linewidth=1)
# it makes the figures beautiful but also slow. 

In [None]:
ax = plt.axes(projection=ccrs.PlateCarree())
ax.stock_img()

plt.plot(df_raw.LON.values, df_raw.LAT.values,
         color='blue', linewidth=2, marker='o',
         transform=ccrs.PlateCarree(),
         )
# ax.coastlines(resolution='50m', color='black', linewidth=1)
land_50m = cfeature.NaturalEarthFeature('physical', 'land', '50m',
                                        edgecolor='face',
                                        facecolor=cfeature.COLORS['land'])
ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,linewidth=2, color='gray', alpha=0.5, linestyle='--')

plt.show()

In [None]:
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([-110, -95, 33, 44])

# Put a background image on for nice sea rendering.
ax.stock_img()
plt.plot(df_raw.LON.values, df_raw.LAT.values,
         color='blue', linewidth=2, marker='o',
         transform=ccrs.PlateCarree(),
         )


# Create a feature for States/Admin 1 regions at 1:50m from Natural Earth
states_provinces = cfeature.NaturalEarthFeature(
    category='cultural',
    name='admin_1_states_provinces_lines',
    scale='50m',
    facecolor='none')

SOURCE = 'Natural Earth'
LICENSE = 'public domain'

ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(states_provinces, edgecolor='gray')
ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,linewidth=2, color='gray', alpha=0.5, linestyle='--')

plt.text(-97.56, 36.69, 'Lamont',
         horizontalalignment='right',
         transform=ccrs.Geodetic())
plt.show()

So this first file is not what you are looking for. All of the data is outside of Lamont. 

I don't know how to search within files that are not nc files. So just trying to figure out what the headings mean for each file. 

```
AC_ROO002_202001301731_R0.ict 39.57759, -103.67529
AC_BIG002_201201141836_R0.ict 36.60038, -97.70751
AC_GMD001_201201151416_R0.ict 36.65505, -97.45487
AC_GMD001_201210241711_R0.ict 36.68544, -97.46158
AC_GMD002_201409161626_R0.ict 36.72353, -97.67488
AC_GMD003_201312171755_R0.ict 39.53062, -104.34730
AC_GMD004_201407160823_R0.ict 67.60209, 25.96934
AC_GMD007_201610261629_R0.ict 39.51848, -104.34775
AC_GMD008_201609201500_R0.ict 40.09800, -104.41011
AC_GMD008_202011161659_R0.ict 39.60709, -103.87532
AC_GMD009_201807301659_R0.ict 45.91471, -90.33893
AC_GMD009_202012171701_R0.ict 39.48189, -103.84781
AC_GMD010_201707141832_R0.ict 40.24480, -103.99572
AC_GMD010_201709261826_R0.ict 40.54869, -104.55073
AC_GMD010_201712121814_R0.ict 39.92086, -104.03193
AC_GMD010_201806191322_R0.ict 67.79564, 26.87452
AC_GMD010_201807171359_R0.ict 34.64097, -117.35879
AC_GMD010_201807172129_R0.ict 34.65950, -117.49775
AC_GMD010_201807181759_R0.ict 34.67067, -117.80473
AC_GMD010_201807251658_R0.ict 36.69109, -97.41193
AC_GMD010_201807311658_R0.ict 46.11343, -90.14798
AC_GMD010_201811291759_R0.ict 39.96087, -103.91843
AC_GMD010_201905141700_R0.ict 39.96019, -104.83448
AC_GMD010_201906120843_R0.ict 48.59355,  1.06712
AC_GMD010_201907111701_R0.ict 40.04709, -104.24512
AC_GMD010_201911051700_R0.ict 39.64377, -104.18397
AC_ROO002_202001301731_R0.ict 39.57759, -103.67529
AC_roo003_201912181801_R0.ict 40.10531, -104.11123
```



### Work with actual aircore file here
I think that I should focus on the recent files. 2018 is during atom which I have the gc simulations for. 

In [None]:
path = "/Users/arianatribby/git/oklahoma_propane/data/tccon_aircore/AC_GMD010_201807251658_R0.ict.csv"
inputs = sorted(glob.glob(path))

In [None]:
aircore_df = converttodf(inputs)

In [None]:
# https://stackoverflow.com/questions/35337299/python-datetime-to-float-with-millisecond-precision
res = [i.timestamp() for i in aircore_df['completetime']]

aircore_df['completetime'] = res

Double check the location

In [None]:
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([-110, -95, 33, 44])

# Put a background image on for nice sea rendering.
ax.stock_img()
plt.plot(aircore_df.LON.values, aircore_df.LAT.values,
         color='blue', linewidth=2, marker='o',
         transform=ccrs.PlateCarree(),
         )


# Create a feature for States/Admin 1 regions at 1:50m from Natural Earth
states_provinces = cfeature.NaturalEarthFeature(
    category='cultural',
    name='admin_1_states_provinces_lines',
    scale='50m',
    facecolor='none')

SOURCE = 'Natural Earth'
LICENSE = 'public domain'

ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(states_provinces, edgecolor='gray')
ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,linewidth=2, color='gray', alpha=0.5, linestyle='--')

plt.text(-97.56, 36.69, 'Lamont',
         horizontalalignment='right',
         transform=ccrs.Geodetic())
plt.show()

Now plot geos chem profile vs aircore data. Need to grab the data over lamont in geos chem. This is not interpolated to the plane, just in oklahoma. 

In [None]:
gclamont = pd.read_csv('/Users/arianatribby/git/oklahoma_propane/data/geoschem_hpc_downloads/gc_Lamont_20180401_20180601.csv')

Convert from units of carbon

In [None]:
gclamont['ch4_ppb'] = gclamont.SpeciesConc_CH4.values*1e9
gclamont['c2h6_ppb'] = gclamont.SpeciesConc_C2H6.values*1e9/2
gclamont['c3h8_ppb'] = gclamont.SpeciesConc_C3H8.values*1e9/3
R = 286.13
g = 9.8
gclamont['height'] = (np.divide(-R*gclamont['Met_T'],g)*np.log(np.divide(gclamont['Met_PMID'],1013.25))).copy()


In [None]:
print(max(gclamont.height))
print(min(gclamont.height))
print(max(aircore_df.GPS_ALT))
print(min(aircore_df.GPS_ALT))

In [None]:
print(max(gclamont.Met_PMID))
print(min(gclamont.Met_PMID))
print(max(aircore_df.P))
print(min(aircore_df.P))

Filter out GC values less than 29hPa pressure. 

In [None]:
gclamont_rightalt = gclamont.loc[gclamont['Met_PMID'] > 29]

In [None]:
print(len(gclamont))
print(len(gclamont_rightalt))

Plot GC along with air core on a map to compare location

In [None]:
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([-110, -88, 28, 44])

# Put a background image on for nice sea rendering.
ax.stock_img()
plt.plot(aircore_df.LON.values, aircore_df.LAT.values,
         color='blue', linewidth=2, marker='o',
         transform=ccrs.PlateCarree(),
         )

plt.scatter(gclamont_rightalt.lon.values[::1000],gclamont_rightalt.lat.values[::1000],
        color='red', marker='o',
        transform=ccrs.PlateCarree())
# Create a feature for States/Admin 1 regions at 1:50m from Natural Earth
states_provinces = cfeature.NaturalEarthFeature(
    category='cultural',
    name='admin_1_states_provinces_lines',
    scale='50m',
    facecolor='none')

SOURCE = 'Natural Earth'
LICENSE = 'public domain'

ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(states_provinces, edgecolor='gray')
ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,linewidth=2, color='gray', alpha=0.5, linestyle='--')

plt.text(-97.56, 36.69, 'Lamont',
         horizontalalignment='right',
         transform=ccrs.Geodetic())
plt.show()

Now, going to select one profile for every week during this campaign. Select the profile that took place during similar time of day as aircore. What time did the aircore take place?

In [None]:
from datetime import datetime

In [None]:
aircore_df['datetime_complete'] = [datetime.fromtimestamp(single_timestamp) for single_timestamp in aircore_df['completetime'].values]

In [None]:
print(aircore_df['datetime_complete'][0])
print(aircore_df['datetime_complete'][225])
print(aircore_df['datetime_complete'][len(aircore_df)-1])

Going to select 1 GC simulation/week over the two months surrounding this measurement and pick noon times. Problem is that your geos chem simulations occured over the spring, while this aircore is in the summer. 

In [None]:
gclamont_weeklynoon = gclamont_rightalt.loc[(gclamont_rightalt['time'] == '2018-04-01 12:30:00') | 
                    (gclamont_rightalt['time'] == '2018-04-07 11:30:00') |
                    (gclamont_rightalt['time'] == '2018-04-14 11:30:00') | 
                    (gclamont_rightalt['time'] == '2018-04-21 11:30:00') | 
                    (gclamont_rightalt['time'] == '2018-04-28 11:30:00') |
                    (gclamont_rightalt['time'] == '2018-05-05 11:30:00') |
                    (gclamont_rightalt['time'] == '2018-05-12 11:30:00') |
                    (gclamont_rightalt['time'] == '2018-05-19 11:30:00') |
                    (gclamont_rightalt['time'] == '2018-05-26 11:30:00')]

Now, separate geos chem data into 4 corners around lamont to more easily plot and see differences. 

In [None]:
gcupperleft = gclamont_weeklynoon.loc[(gclamont_weeklynoon['lon'] < -99.) & (gclamont_weeklynoon['lat'] > 37.5)]
gcbottomleft = gclamont_weeklynoon.loc[(gclamont_weeklynoon['lon'] < -99.) & (gclamont_weeklynoon['lat'] < 35.)]
gcupperright = gclamont_weeklynoon.loc[(gclamont_weeklynoon['lon'] > -96.) & (gclamont_weeklynoon['lat'] > 37.5)]
gcbottomright = gclamont_weeklynoon.loc[(gclamont_weeklynoon['lon'] > -96.) & (gclamont_weeklynoon['lat'] < 35.)]


In [None]:
fh = 400
fw = 400
colors = bokeh.palettes.d3['Category20'][20]


p = bokeh.plotting.figure(frame_height=fh, frame_width=fw, title='')

p.circle(gcupperleft.Met_THETA.values,
         gcupperleft.ch4_ppb.values, size=5, color=colors[1],legend_label='NW Kansas')

p.circle(gcbottomleft.Met_THETA.values,
         gcbottomleft.ch4_ppb.values, size=5, color=colors[2],legend_label='SW ~OK')

p.circle(gcupperright.Met_THETA.values,
         gcupperright.ch4_ppb.values, size=5, color=colors[3],legend_label='NE Kansas')

p.circle(gcbottomright.Met_THETA.values,
         gcbottomright.ch4_ppb.values, size=5, color=colors[4],legend_label='SE OK')

p.circle(aircore_df.THETA.values,
         aircore_df.CH4.values, size=5, color=colors[6],legend_label='AirCore')

p.xaxis.axis_label = "theta (K)"
p.yaxis.axis_label = "CH4 (ppb)"
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
p.xaxis.major_label_text_font_size = "15pt"
p.yaxis.major_label_text_font_size = "15pt"
p.xaxis.major_tick_line_width = 3
p.yaxis.major_tick_line_width = 3
p.axis.axis_label_text_font_style = 'bold'
p.legend.label_text_font_size = '14pt'
p.legend.location = "top_right"
# p.output_backend = "svg"
bokeh.io.show(p)

Fine, check the tropopause (not a good relationship with theta). Plot below theta = 400

In [None]:
fh = 400
fw = 400
colors = bokeh.palettes.d3['Category20'][20]


p = bokeh.plotting.figure(frame_height=fh, frame_width=fw, title='')

p.circle(gcupperleft.loc[gcupperleft['Met_THETA'] < 400].height.values,
         gcupperleft.loc[gcupperleft['Met_THETA'] < 400].ch4_ppb.values, 
         size=5, color=colors[1],legend_label='NW Kansas')

p.circle(gcbottomleft.loc[gcbottomleft['Met_THETA'] < 400].height.values,
         gcbottomleft.loc[gcbottomleft['Met_THETA'] < 400].ch4_ppb.values, 
         size=5, color=colors[2],legend_label='SW ~OK')

p.circle(gcupperright.loc[gcupperright['Met_THETA'] < 400].height.values,
         gcupperright.loc[gcupperright['Met_THETA'] < 400].ch4_ppb.values, 
         size=5, color=colors[3],legend_label='NE Kansas')

p.circle(gcbottomright.loc[gcbottomright['Met_THETA'] < 400].height.values,
         gcbottomright.loc[gcbottomright['Met_THETA'] < 400].ch4_ppb.values, 
         size=5, color=colors[4],legend_label='SE OK')

p.circle(aircore_df.loc[aircore_df['THETA'] < 400].GPS_ALT.values,
         aircore_df.loc[aircore_df['THETA'] < 400].CH4.values, 
         size=5, color=colors[6],legend_label='AirCore')

p.xaxis.axis_label = "altitude (m)"
p.yaxis.axis_label = "CH4 (ppb)"
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
p.xaxis.major_label_text_font_size = "15pt"
p.yaxis.major_label_text_font_size = "15pt"
p.xaxis.major_tick_line_width = 3
p.yaxis.major_tick_line_width = 3
p.axis.axis_label_text_font_style = 'bold'
p.legend.label_text_font_size = '14pt'
p.legend.location = "bottom_left"
# p.output_backend = "svg"
bokeh.io.show(p)

Josh: geos chem is getting the methane fluxes right but the profiles are wrong bc the transport in MERRA is wrong. Try plotting geos chem potential temperature vs aircore potential temperature to test this. 

In [None]:
fh = 400
fw = 400
colors = bokeh.palettes.d3['Category20'][20]

p = bokeh.plotting.figure(frame_height=fh, frame_width=fw, title='')

p.circle(aircore_df.P.values,
         aircore_df.THETA.values, 
         size=5, color=colors[1], legend_label='air core')

p.circle(gcupperleft.Met_PMID.values,
         gcupperleft.Met_THETA.values, 
         size=5, color=colors[0], legend_label='geos chem')

p.xaxis.axis_label = "pressure"
p.yaxis.axis_label = "theta"

bokeh.io.show(p)