# Notebook Setup

In [1]:
if 'google.colab' in str(get_ipython()):
    IN_COLLAB = True
else:
    IN_COLLAB = False

if IN_COLLAB:
    #TODO: CHANGE THIS BASED ON YOUR OWN LOCAL SETTINGS
    # MY_HOME_ABS_PATH = "/content/drive/MyDrive/W210/co2-flux-hourly-gpp-modeling"
    MY_HOME_ABS_PATH = "/content/drive/MyDrive/W210/co2-flux-hourly-gpp-modeling"
    from google.colab import drive
    drive.mount('/content/drive/')
else:
    # MY_HOME_ABS_PATH = "/root/co2-flux-hourly-gpp-modeling/"
    MY_HOME_ABS_PATH = "/root/co2-flux-hourly-gpp-modeling"

Mounted at /content/drive/


## Import Modules

In [2]:
required_packages = ['azure-storage-blob', 'kaleido', 'nbformat']  # Might need to restart kernel after installing nbformat

for p in required_packages: 
    try:
         __import__(p)
    except ImportError:
          %pip install {p} --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.0/388.0 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.9/173.9 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import os
os.chdir(MY_HOME_ABS_PATH)

import sys
import warnings
warnings.filterwarnings("ignore")
import copy
import json
from pathlib import Path
import numpy as np
import pandas as pd

# required plotly libs
import kaleido
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.express.colors import sample_colorscale
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio

# from pytorch_forecasting import TemporalFusionTransformer

from datetime import datetime
import gc
import pickle

# Load locale custome modules
os.chdir(MY_HOME_ABS_PATH)
if IN_COLLAB:
     sys.path.insert(0,os.path.abspath("./code/src/tools"))
else:
    sys.path.append('./.cred')
    sys.path.append('./code/src/tools')
    sys.path.append(os.path.abspath("./code/src/tools"))

from CloudIO.AzStorageClient import AzStorageClient

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

## Define Local File System Constants

In [4]:
root_dir =  MY_HOME_ABS_PATH
tmp_dir =  root_dir + os.sep + '.tmp'
raw_data_dir = tmp_dir
data_dir = root_dir + os.sep + 'data'
img_dir = data_dir + os.sep + 'figures'
cred_dir = root_dir + os.sep + '.cred'
az_cred_file = cred_dir + os.sep + 'azblobcred.json'
model_objects_dir = root_dir + os.sep + 'code/src/modeling/model_objects'

# Plot (Paper)

In [5]:
# Load data from .tmp folder
ENCODER_LEN = 24*14
plot1_df = pd.read_csv(tmp_dir + os.sep + "encoder_fi_df_US-AR1_GPP_TFT_EN14_2010_7_15_12.csv")
plot2_df = pd.read_csv(tmp_dir + os.sep + "encoder_fi_df_US-AR1_GPP_TFT_EN14_2010_1_15_12.csv")
plot3_df = pd.read_csv(tmp_dir + os.sep + "encoder_fi_df_US-Bar_GPP_TFT_EN14_2010_7_15_12.csv")
plot4_df = pd.read_csv(tmp_dir + os.sep + "encoder_fi_df_US-AR1_GPP_TFT_EN14_2010_7_15_12.csv") # TODO: replace this file for NO-GPP-TFT
all_plot_df = [plot1_df, plot2_df, plot3_df, plot4_df]
print(f"Encoder Length({ENCODER_LEN})")


Encoder Length(336)


In [6]:
plot1_df.head()

Unnamed: 0,encoder_index,month,day,hour,gap_flag_month,gap_flag_hour,timestep_idx_global,TA_ERA,SW_IN_ERA,LW_IN_ERA,VPD_ERA,P_ERA,PA_ERA,EVI,NDVI,NIRv,b1,b2,b3,b4,b5,b6,b7,BESS-PAR,BESS-PARdiff,BESS-RSDN,CSIF-SIFdaily,PET,Ts,ESACCI-sm,NDWI,Percent_Snow,Fpar,Lai,LST_Day,LST_Night,relative_time_idx,GPP_NT_VUT_REF,encoder_attention
0,-336,0.00824,0.00884,0.00483,0.00572,0.1227,0.03552,0.17463,0.01586,0.01571,0.01915,0.03652,0.01978,0.01588,0.02013,0.01413,0.03135,0.00383,0.00358,0.02381,0.01519,0.01507,0.00351,0.02154,0.01526,0.01186,0.00916,0.02082,0.007,0.00216,0.01981,0.00243,0.04356,0.02447,0.01322,0.01689,0.11537,0.06247,0.01875
1,-335,0.00661,0.00818,0.00191,0.00324,0.01478,0.01119,0.0737,0.00824,0.00814,0.00992,0.01193,0.00602,0.00815,0.01234,0.00782,0.01427,0.00815,0.00146,0.01102,0.00749,0.0079,0.00154,0.00991,0.00357,0.00664,0.00666,0.00984,0.01967,0.00358,0.0086,0.00333,0.02068,0.00851,0.01017,0.00845,0.6138,0.02261,0.09956
2,-334,0.00998,0.00477,0.01837,0.01552,0.11011,0.03958,0.06942,0.02804,0.02018,0.01189,0.03987,0.02742,0.01843,0.00549,0.025,0.02021,0.00255,0.01885,0.11025,0.01764,0.01977,0.02443,0.02821,0.0217,0.01691,0.01322,0.02585,0.00188,0.00191,0.0359,0.00245,0.02637,0.03546,0.00891,0.01947,0.00736,0.09665,0.00025
3,-333,0.00796,0.0097,0.00285,0.00356,0.05088,0.02161,0.1322,0.01166,0.01211,0.01519,0.02153,0.01104,0.0121,0.02299,0.01103,0.02493,0.00709,0.00209,0.01736,0.01162,0.01157,0.00241,0.01547,0.00616,0.00933,0.00881,0.01528,0.01572,0.00308,0.01396,0.00322,0.03336,0.01462,0.01235,0.01359,0.37065,0.04091,0.01356
4,-332,0.00805,0.0066,0.0083,0.00631,0.16627,0.04215,0.13363,0.0172,0.01716,0.01765,0.04565,0.02511,0.01723,0.01342,0.01844,0.02774,0.00298,0.00627,0.0367,0.01552,0.01641,0.00605,0.02474,0.01825,0.01283,0.01027,0.02299,0.00387,0.00167,0.02417,0.00209,0.04153,0.03103,0.01203,0.01656,0.0423,0.08083,6e-05


In [11]:
features=['b4', 'TA_ERA', 'P_ERA', 'VPD_ERA', 'BESS-PAR', 'GPP_NT_VUT_REF']
xticks = [i for i in range(-ENCODER_LEN,0, 24)]

readable_feature_names = {
    'GPP_NT_VUT_REF': 'GPP',
    'BESS-PAR': 'BESS-PAR',
    #'ESACCI-sm': 'Soil Moisture', 
    'b4': 'MODIS Band 4',
    'VPD_ERA': 'Vapor Pressure Deficit',
    'P_ERA': 'Precipitation ', 
    'TA_ERA': 'Air Temperature',
}

# Select color scale from: https://plotly.com/python/builtin-colorscales/#builtin-sequential-color-scales
colors = sample_colorscale('tempo_r', np.linspace(0.2, 0.85, len(features)))

# Plot feature importance time-series
fig = make_subplots(rows=2, cols=2,
                    specs=[[{"secondary_y": True}, {"secondary_y": True}], [{"secondary_y": True}, {"secondary_y": True}]],
                    subplot_titles=('(a) GPP-TFT: US-AR1 (12PM, 7/15/2020)', '(b) GPP-TFT: US-AR1 (12PM, 1/15/2020)',
                                    '(c) GPP-TFT: US-Bar (12PM, 7/15/2020)', '(d) No-GPP-TFT: US-AR1 (12PM, 7/15/2020)'),
                    vertical_spacing = 0.2, horizontal_spacing = 0.125)

for p, plot_df in enumerate(all_plot_df):
    r = int(p/2) + 1
    c = (p % 2) +1
    
    if r == 1 and c == 1:
      show_lengend = True
    else:
      show_lengend = False

    for i,  f in enumerate(features):
        fig.add_trace( 
            go.Scatter( x=plot_df['encoder_index'], y=plot_df[f],
                      name=f, mode='lines',
                      line_color = '#AAA', line_width = 1,
                      fillcolor =  colors[i],
                      stackgroup='one' , # define stack group
                      hovertemplate = '%{y:.4f}',
                      #groupnorm = "percent",
                      showlegend = show_lengend,
                      ),
            row=r, col=c,
            secondary_y=False,
        )
        

# Update lengend name to readable feature  names
fig.for_each_trace(lambda t: t.update(name = readable_feature_names[t.name],
                                      legendgroup = readable_feature_names[t.name],                                                        
                                      #hovertemplate = t.hovertemplate.replace(t.name, readable_feature_names[t.name])
                )
)

for p, plot_df in enumerate(all_plot_df):
    r = int(p/2) + 1
    c = (p % 2) + 1
    # Add attention line
    fig.add_trace(
        go.Scatter(
            x=plot_df['encoder_index'], y=plot_df['encoder_attention'],
            mode='lines', line_color = 'white', line_width = 3,
            name = 'Attention',
            showlegend = False,
            hovertemplate = '%{y:.4e}',
        ),
        row=r, col=c,
        secondary_y=True
    )


# Other formattings stuff

# Available plotly template/theme: https://plotly.com/python/templates/
# ['ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white', 'plotly_dark', 'presentation', 'xgridoff', 'ygridoff', 'gridon', 'none']
fig.update_layout(title={'text': "Feature Importance by Encoder Index", 
                         'y':0.975,'x':0.5},
                  margin={"r":10,"t":90,"l":50,"b":50},
                  height = 500, width = 900,
                  legend={'title':{'text' :"Features"}, 'orientation':"h", 
                          'y':1.15, 'tracegroupgap':0,
                          'font':{'size': 12}, 'traceorder':'reversed'
                          #'x': 0.1,  'itemwidth':30,
                          },
                  font = {'size':12},
                  hovermode="x unified", # or just "x"
                  template='seaborn')
fig.update_xaxes(title={'text': "Encoder Index: # of Time Step (Hour) Before Prediction", 
                        'font_size': 12, 'standoff': 10},   #autorange='reversed',
                 tickvals=xticks,
                 rangeslider_visible=False, # show time sliders,
                 range=[-(24*7), 0]
                )
fig.update_yaxes(title={'text': "Importance",  'font_size': 12, 'standoff':0},
                 range=[0, 1], secondary_y=False)
fig.update_yaxes(title={'text': "Avergae Attention",  'font_size': 12, 'standoff':0}, showgrid=False, secondary_y=True,
                 range=[0, 0.1])
fig.update_annotations(font_size=12)
    
fig.show()

# Plot (Website)

In [None]:
features=['GPP_NT_VUT_REF','VPD_ERA','P_ERA','TA_ERA', 'ESACCI-sm','BESS-PAR','b4'] #TODO: Put in preferred order
xticks = [i for i in range(-ENCODER_LEN,0, 24)]

readable_feature_names = {
    'GPP_NT_VUT_REF': 'GPP',
    'BESS-PAR': 'Photosynthetic Active Radiation',
    'ESACCI-sm': 'Soil Moisture', 
    'b4': 'MODIS Band 4',
    'VPD_ERA': 'Vapor Pressure Deficit',
    'P_ERA': 'Precipitation ', 
    'TA_ERA': 'Air Temperature',
}

# Select color scale from: https://plotly.com/python/builtin-colorscales/#builtin-sequential-color-scales
colors = sample_colorscale('tempo', np.linspace(0.2, 0.85, len(features)))

# Plot feature importance time-series
fig = make_subplots(specs=[[{"secondary_y": True}]])
for i,  f in enumerate(features):
    fig.add_trace( 
        go.Scatter( x=plot_df['encoder_index'], y=plot_df[f],
                   name=f, mode='lines',
                   line_color = '#AAA', line_width = 1,
                   fillcolor =  colors[i],
                   stackgroup='one' , # define stack group
                   hovertemplate = '%{y:.4f}'
                  ),
        secondary_y=False,
    )
    
# Update lengend name to readable feature  names
fig.for_each_trace(lambda t: t.update(name = readable_feature_names[t.name],
                                      legendgroup = readable_feature_names[t.name],
                                      #hovertemplate = t.hovertemplate.replace(t.name, readable_feature_names[t.name])
                 )
)

# Add attention line
fig.add_trace(
    go.Scatter(
        x=plot_df['encoder_index'], y=plot_df['encoder_attention'],
        mode='lines', line_color = 'white', line_width = 3,
        name = 'Average Attention',
        showlegend = False,
        hovertemplate = '%{y:.4e}'
    ),
    secondary_y=True
)

# Other formattings stuff

# Available plotly template/theme: https://plotly.com/python/templates/
# ['ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white', 'plotly_dark', 'presentation', 'xgridoff', 'ygridoff', 'gridon', 'none']
fig.update_layout(title={'text': "Feature Importance by Time Steps Before Prediction", 'y':0.965,'x':0.5},
                  margin={"r":10,"t":75,"l":60,"b":50},
                  height = 500, width = 1000,
                  legend={ 'title':{'text' :"Features"}, 'orientation':"h", 
                          'y':0.95, 'itemwidth':50,
                          #'x': 0.1,  'itemwidth':30,
                          },
                  hovermode="x unified", # or just "x"
                  template='plotly_white')
fig.update_xaxes(title={'text': "Encoder Index: # of Time Step (Hour) Before Prediction", 
                        'font_size': 14, 'standoff': 0},   #autorange='reversed',
                 tickvals=xticks,
                 rangeslider_visible=True, # show time sliders
                )
fig.update_yaxes(title={'text': "Importance",  'font_size': 14, 'standoff':0}, secondary_y=False)
fig.update_yaxes(title={'text': "Avergae Attention",  'font_size': 14, 'standoff':0}, showgrid=False, secondary_y=True)


# y-scaling button (removable)
showToggle = True
target_trace_ids = [i for i in range(len(features))]
if showToggle:
    fig.update_layout(
        updatemenus=[
            dict(
                type = "buttons",
                direction = "left",
                buttons=list([
                    dict(
                        args=[{"groupnorm": "percent", 'hovertemplate':'%{y:.2f}%'},target_trace_ids],
                        label="Relvative(%)",
                        method="restyle"
                    ),
                    dict(
                         args=[{"groupnorm": "", 'hovertemplate':'%{y:.4f}'},target_trace_ids],
                        label="Absolute",
                        method="restyle"
                    )
                ]),
                pad={"r": 0, "t": 0},
                active=1,
                x=-0.05, xanchor="left",
                y=1.3,
                font_size=10,
            ),
        ]
    )
    
fig.show()

In [None]:
# for website
MAX_WIDTH  = 740

fig.update_layout(title={'text': "Feature Importance by Time Steps Before Prediction", 'y':0.94,'x':0.5},
                  margin={"r":10,"t":150,"l":10,"b":50},
                  height = int(MAX_WIDTH*0.75), width = MAX_WIDTH,
                  legend={ 'tracegroupgap':0.5, 'traceorder':"reversed", 'y':1.025},
                  # 'title':{'text' :"Features"}, 'orientation':"h", 'x': 0.95, 'xanchor': "left", 'itemwidth':50,
                  hovermode="x unified", # or just "x"
                  hoverlabel = { 'bgcolor':"#333"},
                  template='plotly_dark')
fig.update_yaxes(title={'text': "Importance",  'font_size': 14, 'standoff':0}, secondary_y=False)
fig.update_yaxes(title={'text': "Avergae Attention",  'font_size': 14, 'standoff':0}, showgrid=False, secondary_y=True)

# y-scaling button (removable)
showToggle = True
if showToggle:
    fig.update_layout(
        updatemenus=[
            dict(
                type = "buttons",
                direction = "left",
                buttons=list([
                    dict(
                        args=[{"groupnorm": "percent", 'hovertemplate':'%{y:.2f}%'},target_trace_ids],
                        label="%",
                        method="restyle"
                    ),
                    dict(
                         args=[{"groupnorm": "", 'hovertemplate':'%{y:.4f}'},target_trace_ids],
                        label="#",
                        method="restyle"
                    )
                ]),
                pad={"r": 0, "t": 0, "l":0, "b":0},
                active=1,
                x=-0.05, xanchor="left",
                y=1.5,
                font_color='teal',
                bgcolor="rgba(0,0,0,0)"
            ),
        ]
    )

# For Dark backgroun
fig.update_layout(paper_bgcolor="rgba(0,0,0,0)", )
    
fig.show()

# Export to HTML
file_name = img_dir + os.sep + "FeatureImportance_plot.html" # TODO: Update name if there are multiple plots
#pio.write_html(fig, file = file_name, include_plotlyjs = 'cdn', include_mathjax='cdn')

# use this way to save to avoid encoding issues on negative sign
fig_json = fig.to_json()
iframe = '<iframe srcdoc="{0}" style="width:100%; height:555px; border:none"></iframe>'.format(fig_json)
with open(file_name, 'w') as f:
    f.write('<html><head><script src="https://cdn.plot.ly/plotly-latest.min.js"></script></head><body>')
    f.write('<div id="plot"></div>')
    f.write('<script>Plotly.newPlot("plot", {0});</script>'.format(fig_json))
    f.write('</body></html>')