# Appendix: Visualization methods

In [23]:
import pandas as pd
import os
import param
import panel as pn
import holoviews as hv
from holoviews import opts
import pickle
hv.extension('bokeh')
pn.extension()

This notebook is visualization tool used to explore different imputation algorithms behaved under different amounts of artificial missing data.

In [9]:
df = pd.read_csv('dataset.csv', parse_dates=True, index_col=0)

df.head(10)

Unnamed: 0_level_0,BlueIsland_2m,BlueIsland_5m,BlueIsland_10m,Ingomar_2m,Ingomar_5m,Ingomar_10m,Ingomar_15m,McNuttsIsland_2m,McNuttsIsland_5m,McNuttsIsland_10m,McNuttsIsland_15m,McNuttsIsland_20m,TaylorsRock_2m,TaylorsRock_5m,TaylorsRock_10m,TaylorsRock_15m,TaylorsRock_20m
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-05-15,8.634,7.762,6.811,7.664,6.993,6.22,5.637,6.504,6.295,5.991,5.737,4.492,7.141,6.778,6.217,5.922,5.871
2018-05-16,9.009,7.564,6.215,7.347,6.636,5.912,5.39,7.222,6.786,6.184,5.737,4.42,6.98,6.595,6.05,5.732,5.449
2018-05-17,8.074,7.188,6.466,7.621,7.072,6.544,6.034,7.828,7.398,6.245,5.606,4.322,7.346,6.994,6.574,6.328,6.092
2018-05-18,8.441,7.328,6.099,7.993,7.554,7.025,6.501,8.065,7.444,6.599,5.952,4.644,7.312,7.033,6.523,6.207,5.757
2018-05-19,7.649,6.877,6.142,8.18,7.809,7.421,6.901,7.883,7.127,5.962,5.392,4.141,7.667,7.398,7.056,6.856,6.507
2018-05-20,8.082,7.681,7.039,8.556,8.183,7.406,6.688,7.302,6.725,5.845,5.598,4.373,7.666,7.542,7.239,6.886,6.61
2018-05-21,8.571,7.653,6.779,8.404,7.307,5.634,4.955,7.261,6.624,5.652,5.296,4.049,7.382,6.728,5.544,5.016,4.883
2018-05-22,8.922,7.265,6.167,8.38,6.934,6.065,5.49,7.03,6.164,5.559,5.219,3.938,7.433,6.636,5.777,5.373,5.112
2018-05-23,8.388,6.944,6.015,8.658,7.572,6.378,5.783,6.911,6.367,5.631,5.004,3.683,7.333,6.969,6.488,6.214,5.823
2018-05-24,7.53,6.311,5.458,8.401,6.87,5.815,5.231,8.254,7.088,5.596,5.145,3.832,7.237,6.931,6.377,5.818,5.372


In [18]:
# Create a dropdown selector
site_selector = pn.widgets.Select(name='Site', options=list(df.columns))

def highlight_nan_regions(label):

    series = df[label]
    
    # Identify NaN regions
    is_nan = series.isna()
    nan_ranges = []
    current_start = None

    for date, missing in is_nan.items():
        if missing and current_start is None:
            current_start = date
        elif not missing and current_start is not None:
            nan_ranges.append((current_start, date))
            current_start = None
    if current_start is not None:
        nan_ranges.append((current_start, series.index[-1]))

    # Create shaded regions
    spans = [
        hv.VSpan(start, end).opts(color='red', alpha=0.2)
        for start, end in nan_ranges
    ]

    curve = hv.Curve(series, label=label).opts(
        width=900, height=250, tools=['hover', 'box_zoom', 'pan', 'wheel_zoom'],
        show_grid=True, title=label
    )

    return curve * hv.Overlay(spans)
    
interactive_plot = hv.DynamicMap(pn.bind(highlight_nan_regions, site_selector))

pn.Column(site_selector, interactive_plot, 'Highlighted regions show gaps in each series')

# Visualize experiment results

In [33]:
df = pd.read_csv('results.csv')

plots = []
for metric in ['MAE', 'RMSE']:
    
    scatter = hv.NdOverlay({
        imputer: hv.Scatter(df[df['imputer_name'] == imputer], 'missing_fraction', metric, label=imputer).opts(size=8)
        for imputer in df['imputer_name'].unique()
    })
    
    scatter.opts(
        title=f'{metric} vs Missing Fraction by Imputation Strategy',
        xlabel='Missing Fraction (%)',
        ylabel=metric,
        width=800,
        height=400,
        legend_position='right'
    )

    plots.append(scatter)

(plots[0] + plots[1]).cols(1)

## Results (interactive dashboard)

This is the same information as the above pair of plots but presented using the `param` library.

In [34]:
class ResultsExplorer(param.Parameterized):
    imputer = param.ObjectSelector(default=df['imputer_name'].unique()[0],
                                    objects=list(df['imputer_name'].unique()))
    metric = param.ObjectSelector(default='MAE', objects=['MAE', 'RMSE'])

    @param.depends('imputer', 'metric')
    def view(self):
        subset = df[df['imputer_name'] == self.imputer]
        return hv.Scatter(subset, 'missing_fraction', self.metric).opts(
            title=f'{self.metric} vs Missing Fraction ({self.imputer})',
            xlabel='Missing Fraction (%)',
            ylabel=self.metric,
            size=8,
            alpha=0.7,
            width=800,
            height=400
        )

explorer = ResultsExplorer()

In [28]:
# Create a Panel layout with separate controls and plot
controls = pn.Param(
    explorer.param,
    widgets={
        'imputer': pn.widgets.Select,
        'metric': pn.widgets.RadioButtonGroup
    },
    show_name=False
)

# Compose everything together in a clean layout
dashboard = pn.Row(
    pn.Column(pn.pane.Markdown("### Controls"), controls, width=250),
    pn.Column(explorer.view)
)

dashboard

### Specific experiments

This visualization shows the results from a particular combination of imputation algorithm and artificial gaps. It is useful for understanding how different algorithm compare to each other.

In [35]:
# Load your data dictionary
results_dir = 'results'
data = {}
for fname in os.listdir(results_dir):
    if fname.endswith('.pkl'):
        with open(os.path.join(results_dir, fname), 'rb') as f:
            data[fname] = pickle.load(f)

# Widgets
imputer_selector = param.ObjectSelector(default=df['imputer_name'].unique()[0], objects=list(df['imputer_name'].unique()))
experiment_selector = pn.widgets.Select(name='Experiment', options=list(data.keys()))
column_selector = pn.widgets.Select(name='Column', options=[])

# Update column options
def update_columns(event=None):
    df = data[experiment_selector.value]['df']
    column_selector.options = list(df.columns)

experiment_selector.param.watch(update_columns, 'value')
update_columns()

# Plot function
@pn.depends(experiment_selector, column_selector)
def overlay_plot(experiment, column):
    entry = data[experiment]
    curves = []
    labels = {'df': 'Observed', 'df_true': 'True', 'df_imputed': 'Imputed'}
    colors = {'df': 'gray', 'df_true': 'green', 'df_imputed': 'orange'}

    for key in ['df_true', 'df_imputed', 'df']:
        if key in entry and column in entry[key].columns:
            df = entry[key]
            curve = hv.Curve((df.index, df[column]), 'Time', 'Value', label=labels[key]).opts(color=colors[key])
            curves.append(curve)

    overlay = hv.Overlay(curves)

    # Add gap highlight from internal 'gaps' key
    if 'gaps' in entry and column in entry['gaps']:
        df = entry['df']
        start_idx, end_idx = entry['gaps'][column]
        x0 = df.index[start_idx]
        x1 = df.index[end_idx]
        y0 = df[column].min()
        y1 = df[column].max()
        gap_box = hv.Rectangles([(x0, y0, x1, y1)]).opts(
            fill_color='lightgray',
            fill_alpha=0.5,
            line_alpha=0,
            tools=[]
        )
        overlay *= gap_box

    return overlay.opts(
        title=f"{column} across Observed, True, and Imputed",
        width=800,
        height=400,
        legend_position='right',
        tools=['hover']
    )
# Layout
dashboard = pn.Column(
    pn.Row(experiment_selector, column_selector),
    overlay_plot,
    "Gray box indicates an artifical gap."
)

dashboard