In [1]:
# Standard Lib
from datetime import datetime
import time
import io
import os

# UI and Charts
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.ticker as tick
import ipyvuetify as v
import ipywidgets as ipw
from IPython.display import FileLink
from IPython.display import Javascript
import matplotlib.patches as mpatches

# General Data Processing
import pandas as pd
import numpy as np
import numpy.ma as ma

# Modeling and ML
import sesd
from tqdm import tqdm
from IPython.display import clear_output
from statsmodels.tsa.arima.model import ARIMA
from sklearn.linear_model import LinearRegression

# Disable Warnings
import warnings

# Local packages
from cleaning import anomalies as anom
from cleaning import aggregation as agg
from cleaning import remove_nans

from models import sarima
from models import holt_winters
from models import simple_averaging
from models import CNN

warnings.filterwarnings("ignore") #UI can get messed up if any warnings come up.

In [2]:
# Constants and option definitions

time_col='dt'
v.theme.dark = False

# Aggregation Levels
AGG_LEVEL_DAY = "D"
AGG_LEVEL_MONTH = "M"
AGG_LEVEL_WEEK = "W"

# Constants
DAYS_PER_YEAR = 365
MONTHS_PER_YEAR = 12
WEEKS_PER_YEAR = 52

# This list was built from all of the different date formats that were in 
# Duke FMD's original data - If you need more, you can add it here.
fmt_strings = [
    "%m/%d/%y %I:%M %p",
    "%y-%m-%d",
    "%y/%m/%d",
    "%y-%m",
    "%y/%m"
]

In [3]:
file_uploader = ipw.FileUpload(
    accept='.csv',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False,  # True to accept multiple files upload else False
    description="Click to Upload"
)


# the items are read in from file column headers
series_picker_items = ["Please Upload Data"]
series_picker = v.Select(
    items=series_picker_items,
    v_model=series_picker_items[0],
    label="Utility/Building",
    disabled=True
)

#user selects what type of anomaly detection methods they would like to perform
anom_multipicker = v.Select(
    multiple=True,
    outlined=True,
    chips=True,
    v_model=[],
    items=['Nonlocal IQR', 'Rolling Standard Deviation (SD)', 'SESD'],
    label='Data Cleaning',
)

#user enters the multiplier for iqr, if they chose to do iqr anomaly detection
iqr_multiplier = v.TextField(
    type='Number',
    style_='display: none',
    outlined=True,
    v_model=3,
    label='IQR multiplier:',
)

#user enters the multiplier for sd, if they chose to do sd anomaly detection
sd_multiplier = v.TextField(
    type='Number',
    style_='display: none',
    outlined=True,
    v_model=3,
    label='SD multiplier:',
)

#user enters the multiplier for sesd, if they chose to do sesd anomaly detection
sesd_multiplier = v.TextField(
    type='Number',
    style_='display: none',
    outlined=True,
    v_model=3,
    label='SESD multiplier:',
)

#button to display anomaly detection
display_anom_btn = v.Btn(
    color="success",
    class_='ma-2',
    children=[
        v.Icon(left=True, children=[
            'mdi-chart-timeline-variant'
        ]),
        'Display Data Cleaning'
    ],
    disabled=True
)

#button to export anomaly detection
export_anom_btn = v.Btn(
    color="primary",
    class_='ma-2',
    children=[
        v.Icon(left=True, children=[
            'mdi-content-save'
        ]),
        'Export Cleaned Data'
    ],
    disabled=True
)

#user selects what type of forecasting they would like to perform
model_picker = v.Select(
    v_model='Average of Previous Years',
    items=[
        'Average of Previous Years',
        'SARIMA',
        'Holt-Winters Exponential Smoothing',
        'Convolutional Neural Network'
    ],
    label="Forecasting Model",
)

#user selects the frequency of the given data
freq_data_picker = v.Select(
    v_model='Daily',
    items=[
        '15 Minute Data',
        'Daily',
        'Weekly',
        'Monthly'
    ],
    label="Frequency of Input Data",
)

#user enters how much data they would like to forecast for
forecast_len_field = v.TextField(
    type='Number',
    outlined=True,
    v_model=24,
    label='Forecast Length (Months):',
)

#button to display forecast
display_btn = v.Btn(
    color="success",
    class_='ma-2',
    children=[
        v.Icon(left=True, children=[
            'mdi-chart-timeline-variant'
        ]),
        'Display Forecast'
    ],
    disabled=True
)

#button to export the data to a csv
export_btn = v.Btn(
    color="primary",
    class_='ma-2',
    children=[
        v.Icon(left=True, children=[
            'mdi-content-save'
        ]),
        'Export Forecast CSV'
    ],
    disabled=True
)

#button to clear the plots currently on the screen
clear_btn = v.Btn(
    color="error",
    class_='ma-2',
    children=[
        v.Icon(left=True, children=[
            'mdi-delete-forever'
        ]),
        'Clear Output'
    ]
)

figure_display_area = ipw.Output(
    layout={
        'align-items':'center'
    },
)

upload_caption_area = ipw.Output(
    layout={
        'align-items':'left'
    }
)

hidden_dl_link_area = ipw.Output()

# All the plots and output go here
display_col = v.Col(
    tag='div',
    cols=9,
    children=[
        figure_display_area
    ]
)

# All the controls are in one column
upload_caption = ""
controls_col = v.Col(
    tag='div',
    cols=3,
    children=[
        freq_data_picker,
        series_picker,
        anom_multipicker,
        iqr_multiplier, 
        sd_multiplier, 
        sesd_multiplier,
        display_anom_btn,
        export_anom_btn,
        model_picker,
        forecast_len_field,
        display_btn,
        export_btn,
        clear_btn
    ]
)

# Everything together in a row
full_display = v.Row(
    tag='div',
    fluid=True,
    children=[
        controls_col,
        display_col
    ]
)

In [4]:
# Helper/business logic functions

def csv_download_link(df, csv_file_name):
    """
    Display a download link to load a data frame as csv from within a Jupyter notebook
    """
    global fl
    
    df.to_csv(csv_file_name, index=True, index_label=time_col)
    fl = FileLink(csv_file_name)
    with hidden_dl_link_area:
        #display(Javascript('window.open("{url}");'.format(url=fl.path)))
        display(fl)
    # TODO Cleanup old files, if that's important to anyone
    
def clean_data(forecast_key, anom_methods, freq_data, iqr_mult, sd_mult, sesd_mult):
    """
    Handles data cleaning and returns result along with the variables needed to plot the anomalies
    Doesn't take any params, as it accesses information from globals which are edited by various UI functions.
    """
    global util_data
    
    # Get correct period and letter frequency based on the frequency the user gave
    period, freq=get_period_freq(freq_data)
    
    # Create a new dataframe with just the series of interest (only first column)
    current_series_df = util_data[[time_col, forecast_key]].copy()
    current_series_df = current_series_df.set_index(time_col)
    
    # Aggregate data into daily if 15 min data
    if freq_data=='15 Minute Data':
        current_series_df = agg.aggregate(current_series_df, AGG_LEVEL_DAY)
    
    #remove the nans at the beginning of the data and make the rest of the nans 0
    #common for Duke's utility data to have nans at the beginning before the data starts actually being recorded
    current_series_df = remove_nans.drop_beg_nans_rest_0(current_series_df)
    
    #if the length of the data isn't 2 periods, throw error
    if len(current_series_df[current_series_df.columns[0]])<=2*period:
        raise ValueError(f"ERROR: Length of your data (not including blanks) for {forecast_key.strip()} must be greater than 2 years. You may also have the Input Frequency set wrong.")

    doSesd = 'SESD' in anom_methods
    doSD = 'Rolling SD' in anom_methods
    doIQR = 'Nonlocal IQR' in anom_methods
    
    # If the user wants to do any anomaly detection method, do it
    if doSesd or doSD or doIQR:
        cleaned_current_series_df,sesd_anoms,sd_anoms,iqr_anoms,iqr_upper_bound,percent_anomalies=\
            anom.removeAnomaliesAndImpute(
            current_series_df,
            doSesd,
            doSD,
            doIQR,
            period, 
            iqr_mult, 
            sd_mult,
            sesd_mult
        )
        return cleaned_current_series_df,forecast_key,current_series_df,sesd_anoms,sd_anoms,iqr_anoms,iqr_upper_bound,\
                            percent_anomalies
    else:
        return current_series_df,forecast_key,current_series_df,None,None,None,None,None
    
def get_period_freq(freq_data):
    """
    Given the word frequency ("daily", "monthly", etc.) of the data, returns the period and the 
    frequency of the data where frequency is a single character ('D', 'W', or 'M')
    """
    period=None
    freq=None

    #determine what the period and letter frequency should be based on the given frequency of the user
    if freq_data=='15 Minute Data' or freq_data=='Daily':
        period=DAYS_PER_YEAR
        freq=AGG_LEVEL_DAY
    elif freq_data=='Weekly':
        period=WEEKS_PER_YEAR
        freq=AGG_LEVEL_WEEK
    elif freq_data=='Monthly':
        period=MONTHS_PER_YEAR
        freq=AGG_LEVEL_MONTH
        
    return period, freq
    
def agg_data(cleaned_current_series_df):
    """
    Given a cleaned, daily dataframe from the data cleaning function, aggregates that data to monthly.
    Can do this regardless of the frequency of the data (weekly, daily, and even monthly)
    We do this since we do all forecasting at the monthly level
    """
    monthly_df = agg.aggregate(cleaned_current_series_df, AGG_LEVEL_MONTH)
    
    # Double check greater than 2 years (24 months) data after aggregation to monthly level
    if len(monthly_df['data'])<=24:
        raise ValueError('Length of your data (not including blanks) must be greater than 2 periods')

    ts = monthly_df.iloc[:, 0].to_numpy()
    return monthly_df

def create_forecast(model, monthly_df):
    """
    Creates the forecasting data, 
    """
    global current_forecast_ts
    
    period = MONTHS_PER_YEAR
    freq = AGG_LEVEL_MONTH
    error_start_date,error_end_date=None, None
    forecast_length = int(forecast_len_field.v_model)
    
    if model_picker.v_model=='SARIMA':
        predictions,mape,mase,error_start_date,error_end_date=\
            sarima.forecast(data=monthly_df, freq=freq, period=period, numPointsPredict=forecast_length)
        
        #if predictions has a nan, then that means sarima model doesn't work for the given data (rarely happens)
        if np.isnan(predictions.values).any():
            raise ValueError("ERROR: SARIMA forecasting unable to be completed for this dataset.")
        
    elif model_picker.v_model=='Average of Previous Years':
        #get the results from the simple averaging model
        predictions,mape,error_start_date,error_end_date=simple_averaging.forecast(data=monthly_df,period=period,\
                                                        freq=freq,numPointsPredict=forecast_length)
        mase = 1 # by definition, because this is the naive model

    elif model_picker.v_model=='Holt-Winters Exponential Smoothing':
        #get the results from the holt winters model
        predictions,mape,mase,error_start_date,error_end_date=holt_winters.forecast(data=monthly_df,\
                            period=period,freq=freq,numPointsPredict=forecast_length)
    
    elif model_picker.v_model=='Convolutional Neural Network':
        #get the results from the CNN model
        predictions,mase,mape,error_start_date,error_end_date=CNN.predict_with_CNN(monthly_df,\
                                                                                   forecast_length,\
                                                                                   verbose=False)
        # Convert np array to series with time index
        predictions = pd.Series(predictions.reshape((forecast_length,)))
        predictions.index = pd.date_range(start=monthly_df.index[-1] , periods= forecast_length + 1, freq = 'M')[1:]
    
    return predictions, mape, mase, error_start_date, error_end_date


def handle_output():
    """
    This function is run by every button handler that produces output to the screen. This does the checking
    to see if we need any new computation (new cleaning of data and/or new forecasts) and then returns its results.
    
    The main handlers then access that data as they need to.
    """
    #global variables
    global current_forecast_key
    global current_forecast_ts
    global current_anom_methods
    global current_aggregated_data
    global current_forecasting_method
    global current_aggregation_level
    global new_upload
    global util_data
    global current_iqr_mult
    global current_sd_mult
    global current_sesd_mult
    
    # First check if we need to do anything.
    # Sort anom method lists so they can be compared
    # if new upload is true, it means new data was uploaded (however other fields may not have changed,
    #but need to redo calculations)
    anom_multipicker.v_model.sort()
    current_anom_methods.sort()
    
    #boolean that checks if anything has changed with regard to data cleaning (data being forecasted, type of anomaly 
    #detection, anomaly multipliers or the aggregation level), run again
    bool_changes_anomaly = current_forecast_key != series_picker.v_model or current_anom_methods != anom_multipicker.v_model\
        or current_aggregation_level != freq_data_picker.v_model or new_upload or\
        current_iqr_mult != iqr_multiplier.v_model or current_sd_mult != sd_multiplier.v_model or\
        current_sesd_mult != sesd_multiplier.v_model
    
    #redo if changes to anomaly detection
    if bool_changes_anomaly:
        results = clean_data(series_picker.v_model,anom_multipicker.v_model,freq_data_picker.v_model,\
                                               float(iqr_multiplier.v_model), float(sd_multiplier.v_model),\
                                               float(sesd_multiplier.v_model))
        cleaned_current_series_df = results[0] #only care about the first thing returned
        
        #aggregate to monthly level because that is how we do our forecasting
        current_aggregated_data = agg_data(cleaned_current_series_df)
    
    #always just forecast graph again even if no changes
    new_upload=False

    with figure_display_area:
        print("Calculating Forecast...")

    predictions, mape, mase, error_start_date, error_end_date =\
        create_forecast(model_picker.v_model, current_aggregated_data)
    #reassign all of the global variables
    current_forecast_ts = predictions
    current_forecast_key = series_picker.v_model
    current_anom_methods = anom_multipicker.v_model
    current_forecasting_method = model_picker.v_model
    current_aggregation_level = freq_data_picker.v_model
    current_iqr_mult = float(iqr_multiplier.v_model)
    current_sd_mult = float(sd_multiplier.v_model)
    current_sesd_mult = float(sesd_multiplier.v_model)
    
    # Save our data in case they call again with the same stuff
    
    return current_forecast_ts, mape, mase, error_start_date, error_end_date

In [5]:

# Graphics/UI and direct handler functions

def render():
    """
    Becuase the content of some UI elements depends on the settings in others,
    the UI needs to be re-displayed whenever something is changed.
    
    This gets called once at program start, and then again after every setting change.
    """
    
    #display(util_picker) # No util picker for demo.
    display(file_uploader)
    display(upload_caption_area)
    display(hidden_dl_link_area)
    display(full_display)

def on_change():
    """
    Called whenever any UI element is changed.
    
    Responsible for changing active_series, an identifier (file and column name) for the series of interest.
    Also responsible for calling render().
    """
    render()
    
    
# TODO split the data processing part of this out into something that gets called by this and
# the export handler.
def display_forecast_on_click(widget, event, data):
    """
    Runs whenever the "Display Forecast" button is clicked.
    
    Does the forecasting if needed, and show the plot.
    """
    global current_forecast_key
    global current_forecast_ts
    global current_aggregated_data
    global util_data
    
    try:
        predictions, mape, mase, error_start_date, error_end_date = handle_output()

        show_forecast_plot(predictions, mape, mase, error_start_date, error_end_date)
    except Exception as e:
        with figure_display_area:
            print("An error occurred during forecasting:\n", str(e))

def export_btn_on_click(widget, event, data):
    global current_forecast_ts
    """
    Construct export dataframe
    
    This also stores into current_aggregated_data
    """
    try:
        predictions, mape, mase, error_start_date, error_end_date = handle_output() 

        dti = pd.date_range(current_aggregated_data.index[-1], periods=len(current_forecast_ts)+1, freq='M')[1:]
        forecast_df = pd.DataFrame(index=dti, data={"data":current_forecast_ts.to_numpy()})
        export_df = pd.concat([current_aggregated_data,forecast_df])
        
        #rename the data column to the forecast key
        export_df=export_df.rename(columns={"data":series_picker.v_model})

        now = datetime.now()
        time_str = now.strftime("%Y-%m-%d_%H-%M-%S")
        with figure_display_area:
            print("Creating download link above...")
        csv_download_link(export_df, f"forecast_{time_str}.csv")
    except Exception as e:
        with figure_display_area:
            print("An error occurred during forecasting:\n", str(e))
            
def display_anom_on_click(widget, event, data):
    """
    Runs whenever the "Display Anomaly Detection" button is clicked.
    
    Only can click if one of anomaly detection methods selected
    """
    
    try:
        cleaned_current_series_df,forecast_key,current_series_df,sesd_anoms,sd_anoms,iqr_anoms,iqr_upper_bound,\
                            percent_anomalies=clean_data(series_picker.v_model,anom_multipicker.v_model,\
                                                         freq_data_picker.v_model,float(iqr_multiplier.v_model),\
                                                         float(sd_multiplier.v_model),float(sesd_multiplier.v_model))
        
        show_anomalies_plot(forecast_key,current_series_df,sesd_anoms,sd_anoms,iqr_anoms,iqr_upper_bound,\
                            percent_anomalies)
    except Exception as e:
        with figure_display_area:
            print("An error occurred during anomaly detection:\n", str(e))
            
def export_anom_btn_on_click(widget, event, data):
    """
    Runs the clean data function and exports the result
    """
    try:
        results=clean_data(series_picker.v_model,anom_multipicker.v_model,freq_data_picker.v_model,\
                           float(iqr_multiplier.v_model),float(sd_multiplier.v_model),float(sesd_multiplier.v_model))
        cleaned_current_series_df = results[0] #only care about the first thing returned 
        
        #rename the data column to the forecast key
        cleaned_current_series_df=cleaned_current_series_df.rename(columns={"data":series_picker.v_model})

        now = datetime.now()
        time_str = now.strftime("%Y-%m-%d_%H-%M-%S")
        with figure_display_area:
            print("Creating download link above...")
        csv_download_link(cleaned_current_series_df, f"anom_detection_{time_str}.csv")
    except Exception as e:
        with figure_display_area:
            print("An error occurred during forecasting:\n", str(e))
        
# Have to wrap the clear fn to take care of the param
def clear_btn_on_click(widget, event, data):
    plt.close('all')
    figure_display_area.clear_output()
    hidden_dl_link_area.clear_output()

In [6]:
# Functions for plotting

def y_fmt(tick_val, pos):
    if tick_val > 1000000:
        val = int(tick_val)/1000000
        return f'{val} M'
    elif tick_val > 1000:
        val = int(tick_val) / 1000
        return f'{val} k'
    else:
        return tick_val

def show_forecast_plot(predictions,mape,mase,error_start_date,error_end_date):
    """
    Plots the forecasting for the required model. 
    Given the predictions, mean absolute precent error, mean absolute scaled error,
    the start date for the error and the end date for the error of the testing data
    Uses global variable current_aggregated_data and figure_display_area
    """
    with figure_display_area:
        #ts is an array of the previous data and x is used for graphing and is a range 0-length of ts of indices
        ts=current_aggregated_data[current_aggregated_data.columns[0]]
        x=np.asarray(range(len(predictions)+len(ts)-1)) #-1 because one point overlap
        x_start = 0
        x_end = len(x)-1
        x_len = x_end - x_start
        
        #create equal spaces apart to put ticks with dates
        xt = [x_start, x_start + (x_len//3), x_start+(2*(x_len//3)), x_end]
        xtl=[]
        for curIndex in xt: 
            if curIndex<len(ts):
                xtl.append(pd.Timestamp.date(current_aggregated_data.index[curIndex]))
            else:
                xtl.append(pd.Timestamp.date(predictions.index[curIndex-len(ts)+1]))
            
        plt.figure(figsize=(12,8))
        labels_and_colors=[]
        plt.title("%s forecasting: %s" % (model_picker.v_model,current_forecast_key),size=15)
        
        #plot the training data
        labels_and_colors.append(["Training Data", "blue"])
        plt.plot(x[0:len(ts)], ts, label=labels_and_colors[-1][0], color=labels_and_colors[-1][1], marker="o")
        
        #forecast one more than needed because forecast the last value of the ts again
        labels_and_colors.append(["Forecast", "orange"])
        plt.plot(x[len(ts)-1:], predictions, color=labels_and_colors[-1][1], label=labels_and_colors[-1][0], marker="o")
        
        #set the ticks, names and ranges for the x and y axis
        plt.xticks(xt, xtl, size=15)
        plt.xlim(x_start, x_end)
        plt.ylabel("Utility Usage", size=15)
        plt.xlabel("Date", size=15)
        plt.grid(alpha=.3)
        
        #set the legend outside the graph window and set the text inside it to show the eror
        handles=[]
        for label_color_pair in labels_and_colors:
            handles.append(mpatches.Patch(color=label_color_pair[1], label=label_color_pair[0]))
        error="MAPE: %.2f%%\nMASE: %.2f\nError computed over:\n%s to %s"\
            % (mape,mase,error_start_date,error_end_date) 
        handles.append(mpatches.Patch(color='none', label=error))
        plt.legend(handles=handles, bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0., fontsize=12)
        ax = plt.gca()
        ax.yaxis.set_major_formatter(tick.FuncFormatter(y_fmt))
        plt.show()
        
def show_anomalies_plot(forecast_key, cleaned_current_series_df, sesd_anoms, sd_anoms, iqr_anoms, iqr_upper_bound,\
                        percent_anomalies):
    """
    Plot the anomalies if the user chose to remove anomalies
    Uses global variable figure_display_area
    """
    with figure_display_area:
        #ts is an array of the previous data and x is used for graphing and is a range of indices
        ts=cleaned_current_series_df[cleaned_current_series_df.columns[0]]
        x = np.asarray(range(len(ts)))
        x_start = 0
        x_end = len(x)-1
        x_len = x_end - x_start
        labels_and_colors=[] #keep track of this for making legend
        
        #create equal spaces apart to put ticks with dates
        xt = [x_start, x_start + (x_len//3), x_start+(2*(x_len//3)), x_end]
        xtl=[]
        for index in xt: 
            #xtl.append(pd.Timestamp.date(cleaned_current_series_df.index[index]))
            xtl.append(cleaned_current_series_df.index[index])

        plt.figure(figsize=(12,8))
        plt.title("Anomalies for {}".format(forecast_key), size=20)
        plt.plot(x, ts, color="#7ea4ce", label="Original Usage") #original data
        
        #only plot the anomalies boolean array if not none which means the user selected to have that anomaly
        #detection method done
        if iqr_anoms!=None:
            labels_and_colors.append(["IQR upper boundary", "orange", "dashed"])
            plt.plot(x, iqr_upper_bound, label=labels_and_colors[-1][0], color=labels_and_colors[-1][1],\
                     linestyle=labels_and_colors[-1][2])
            labels_and_colors.append(["IQR anomalies", "green", None])
            plt.scatter(x[iqr_anoms],ts[iqr_anoms], label=labels_and_colors[-1][0], color=labels_and_colors[-1][1],\
                        zorder = 5)
        if sd_anoms!=None:
            labels_and_colors.append(["SD anomalies", "red", None])
            plt.scatter(x[sd_anoms],ts[sd_anoms], label=labels_and_colors[-1][0], color=labels_and_colors[-1][1],\
                        zorder = 5)
        if sesd_anoms!=None:
            labels_and_colors.append(["Seasonal anomalies", "blue", None])
            plt.scatter(x[sesd_anoms],ts[sesd_anoms], label=labels_and_colors[-1][0], color=labels_and_colors[-1][1],\
                        zorder = 5)
        if sesd_anoms!=None and sd_anoms!=None:
            labels_and_colors.append(["SD & Seasonal anomalies", "purple", None])
            plt.scatter(x[sesd_anoms and sd_anoms],ts[sesd_anoms and sd_anoms], label=labels_and_colors[-1][0],\
                        color=labels_and_colors[-1][1], zorder = 5)
        
        #set the ticks, names and ranges for the x and y axis
        plt.xticks(xt, xtl, size=15)
        plt.xlim(x_start, x_end)
        plt.yticks(size=15)
        plt.ylim()
        plt.xlabel("Date", size=15)
        plt.ylabel("Utility Usage", size=15)
        plt.grid(alpha=.3)
        
        #set the legend outside the graph window and set the text inside it to show the percent anomalies
        handles=[]
        for label_color_pair in labels_and_colors:
            handles.append(mpatches.Patch(label=label_color_pair[0], color=label_color_pair[1],\
                                          linestyle=label_color_pair[2]))
        anomalies="Percent data marked\nanomalous: %.2f%%" % (percent_anomalies) 
        handles.append(mpatches.Patch(color='none', label=anomalies))
        plt.legend(handles=handles, bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0., fontsize=12)
        ax = plt.gca()
        ax.yaxis.set_major_formatter(tick.FuncFormatter(y_fmt))
        plt.show()

In [7]:
# Data Reader
def file_uploader_on_change(change):
    global changes
    global calls
    global util_data
    global series_picker
    global new_upload
    
    [filename] = file_uploader.value
    uploaded_file = file_uploader.value[filename]
    size = uploaded_file["metadata"]["size"]
    file_uploader.description = filename
    
    # Read the data in dataframe
    util_data = pd.read_csv(io.BytesIO(uploaded_file["content"]))
    
    # Convert the time column to datetime
    for f in fmt_strings:
        try:
            util_data[time_col] = pd.to_datetime(util_data[time_col], format=f)
            break
        except ValueError:
            pass

    # Set names for dropdown in the UI
    util_series_names = util_data.drop([time_col], axis=1).columns
    series_picker.items = [n for n in util_series_names]
    series_picker.v_model = series_picker.items[0]
    series_picker.disabled = False
    display_btn.disabled = False
    export_btn.disabled = False
    new_upload = True
    
    # Print status
    upload_caption_area.clear_output()
    with upload_caption_area:
        print(f"Loaded: {filename}\nSize: {size} Bytes")
        
        
def anom_multipicker_on_change(change):
    """
    Shows/Hides controls for anomaly detection tuning.
    """
    active_anom_types = change.new
    
    #if a type of anomaly detection has been selected, enable the button to forecast and export
    if active_anom_types!=[]:
        display_anom_btn.disabled=False
        export_anom_btn.disabled=False
    else:
        display_anom_btn.disabled=True
        export_anom_btn.disabled=True
        
    if 'Nonlocal IQR' in active_anom_types:
        iqr_multiplier.style_ = 'display: block'
    else:
        iqr_multiplier.style_ = 'display: none'
    
    if 'SESD' in active_anom_types:
        sesd_multiplier.style_ = 'display: block'
    else:
        sesd_multiplier.style_ = 'display: none'
        
    if 'Rolling SD' in active_anom_types:
        sd_multiplier.style_ = 'display: block'
    else:
        sd_multiplier.style_ = 'display: none'

In [8]:
# Configure on_click and on_change behaviors
display_btn.on_event('click', display_forecast_on_click)
clear_btn.on_event('click', clear_btn_on_click)
export_btn.on_event('click', export_btn_on_click)
display_anom_btn.on_event('click', display_anom_on_click)
export_anom_btn.on_event('click', export_anom_btn_on_click)
file_uploader.observe(file_uploader_on_change, 'value')
anom_multipicker.observe(anom_multipicker_on_change, 'v_model')

# Dummy globals
fl = None
util_data = None
current_forecast_key = ""
current_forecast_ts = None
current_aggregated_data = None
current_anom_methods = []
current_forecasting_method = None
current_aggregation_level = None
new_upload = False

render()

FileUpload(value={}, accept='.csv', description='Click to Upload')

Output()

Output()

Row(children=[Col(children=[Select(items=['15 Minute Data', 'Daily', 'Weekly', 'Monthly'], label='Frequency of…