In [None]:
import warnings
from argparse import ArgumentParser
import pandas as pd
import pickle

import subprocess
import sys
import os

from datasets import Priv_NAMES as DATASET_NAMES
from datasets import get_private_dataset
from models import get_all_models, get_model
from utils.Server import train
from utils.Toolbox_analysis import create_latent_df, process_latent_df
from utils.Toolbox_visualization import format_latent_dict, load_and_scale_data, combine_latents, plot_latent_heatmap, plot_time_series_and_latents
warnings.simplefilter(action='ignore', category=FutureWarning)

def parse_args():
    parser = ArgumentParser(description='You Only Need Me', allow_abbrev=False)
    parser.add_argument('--device_id', type=int, default=0, help='The Device Id for Experiment')
    parser.add_argument('--run_simulation', type=bool, default=True, help='The Device Id for Experiment')
    parser.add_argument('--detect_anomalies', type=bool, default=False)
    parser.add_argument('--generate_viz', type=bool, default=True, help='Creates and saves interactive visualizations')


    # Communication - epochs
    parser.add_argument('--communication_epoch', type=int, default=3,
                        help='The Communication Epoch in Federated Learning')
    parser.add_argument('--local_epoch', type=int, default=3, help='The Local Epoch for each Participant')

    # Participants info
    parser.add_argument('--parti_num', type=int, default=None, help='The Number for Participants. If "None" will be setted as the sum of values described in --domain')
    parser.add_argument('--online_ratio', type=float, default=1, help='The Ratio for Online Clients')

    # Data parameter
    parser.add_argument('--dataset', type=str, default='fl_leaks', choices=DATASET_NAMES, help='Which scenario to perform experiments on.')
    parser.add_argument('--experiment_id', type=str, default='Pipeline_Full', help='Experiment identifier')
    parser.add_argument('--extra_coments', type=str, default='proto_month', help='Aditional info')
    parser.add_argument('--domains', type=dict, default={
                                                        'Graeme': 5,
                                                        # 'Balerma': 3,
                                                        },
                        help='Domains and respective number of participants.')

    ## Time series preprocessing
    parser.add_argument('--interval_agg', type=int, default=2 * 60 ** 2,
                        help='Agregation interval (seconds) of time series')
    parser.add_argument('--window_size', type=int, default=84, help='Rolling window length')

    # Model (AER) parameters
    parser.add_argument('--input_size', type=int, default=5, help='Number of sensors')  #TODO adaptar
    parser.add_argument('--output_size', type=int, default=5, help='Shape output - dense layer')
    parser.add_argument('--lstm_units', type=int, default=30,
                        help='Number of LSTM units (the latent space will have dimension 2 times bigger')
    

    # Federated parameters
    parser.add_argument('--model', type=str, default='fpl', help='Federated Model name.', choices=get_all_models()) #fedavg

    parser.add_argument('--structure', type=str, default='homogeneity')

    parser.add_argument('--pri_aug', type=str, default='weak',  # weak strong
                        help='Augmentation for Private Data')
    parser.add_argument('--learning_decay', type=bool, default=False, help='The Option for Learning Rate Decay')
    parser.add_argument('--averaging', type=str, default='weight', help='The Option for averaging strategy')

    parser.add_argument('--infoNCET', type=float, default=0.02, help='The InfoNCE temperature')
    parser.add_argument('--T', type=float, default=0.05, help='The Knowledge distillation temperature')
    parser.add_argument('--weight', type=int, default=1, help='The Weigth for the distillation loss')

    args, unknown = parser.parse_known_args()

    if args.parti_num is None:
        args.parti_num = sum(args.domains.values())

    return args

args = parse_args()

In [None]:
agg_int = 2
results_id = f'{args.experiment_id}_{args.communication_epoch}_{args.local_epoch}_{agg_int}_{args.window_size}_{args.extra_coments}'

results_path = f"results/results_{results_id}.pkl"
latent_path = f"results/{results_id}.pkl"


with open(latent_path, 'rb') as f:
    latent_dfs = pickle.load(f)

format_latent_dict(latent_dfs)
scaled_df = load_and_scale_data(id_network = 'Graeme', id_experiment = args.experiment_id)

In [None]:
df_combined = combine_latents(latent_dfs)
plot_latent_heatmap(df_combined, results_id)

In [None]:
plot_time_series_and_latents(df_combined, scaled_df, results_id, batch_temporal=2)

In [None]:
import os
import pandas as pd
import altair as alt
from sklearn.preprocessing import MinMaxScaler

alt.data_transformers.enable("vegafusion")

batch_temporal=2
epoch_tgt = args.communication_epoch - 1
# --- Prepare data ---

melted_df = scaled_df.melt(id_vars='timestamp', var_name='feature', value_name='value')
melted_df['timestamp'] = pd.to_datetime(melted_df['timestamp'])
melted_df['month'] = melted_df['timestamp'].dt.month

# Filter to a specific epoch
df_combined['month'] = df_combined['timestamp'].dt.month
# df['month'] = df['hour'].apply(lambda x: x - 12 if x >= 12 else x)

# Offset values per feature
unique_features = melted_df['feature'].unique()
offset_dict = {feature: i * 2 for i, feature in enumerate(unique_features)}
melted_df['offset_value'] = melted_df.apply(
    lambda row: row['value'] + offset_dict[row['feature']], axis=1
)

# --- Selections ---
start_ts = melted_df['timestamp'].min()
end_ts = start_ts + pd.Timedelta(days=batch_temporal)
date_range = (start_ts.to_pydatetime(), end_ts.to_pydatetime())

brush = alt.selection_interval(encodings=['x'], value={'x': date_range})
latent_selection = alt.selection_point(fields=['timestamp'], value=melted_df['timestamp'].min())

hour_options = [None] + sorted(df['month'].unique().tolist())
hour_selection = alt.selection_point(
    name="Select Month", 
    fields=['month'], 
    bind=alt.binding_select(options=hour_options, name='Month: '),
    value=None
)

# --- Time Series Plot Setup ---
base = alt.Chart(melted_df).mark_line().encode(
    x=alt.X('timestamp:T', title='Time'),
    y=alt.Y('offset_value:Q', title='Offset Scaled Value'),
    color=alt.Color('feature:N', title='Feature')
).properties(width=500)

highlighted_points = alt.Chart(melted_df).mark_circle(color='black', size=5).encode(
    x='timestamp:T',
    y='offset_value:Q',
    tooltip=['feature:N', 'timestamp:T']
).transform_filter(
    hour_selection
)

upper = (base + highlighted_points).encode(
    x=alt.X('timestamp:T', scale=alt.Scale(domain=brush))
).properties(
    height=200
)

lower = base.properties(height=60).add_params(brush)

time_series_chart = upper & lower

# --- Latent Space Plot with Hour Filtering ---

epoch_slider = alt.binding_range(min=df_combined['epoch'].min(),
                                 max=df_combined['epoch'].max(),
                                 step=1,
                                 name='Epoch: ')
epoch_select = alt.selection_point (fields=['epoch'], bind=epoch_slider, value = 0)


x_range = [int(df_combined['latent_1'].min()) - 3, int(df_combined['latent_1'].max()) + 3]
y_range = [int(df_combined['latent_2'].min()) - 3, int(df_combined['latent_2'].max()) + 3]

latent = alt.Chart(df_combined).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=75), scale=alt.Scale(domain=x_range), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=75), scale=alt.Scale(domain=y_range), title="Latent y"),
    color=alt.Color('label:N', scale=alt.Scale(scheme='tableau20'), title='Density'),
    tooltip=['label:N', 'timestamp:T', 'month:O']
).properties(
    width=400,
    height=300,
    title="Latent Space (highlight by hour)"
).add_params(
    latent_selection,
    hour_selection,
    epoch_select
).transform_filter(
    brush
).transform_filter(
    hour_selection
).transform_filter(
    epoch_select
).interactive()

# --- Final Layout ---
final_plot = latent | time_series_chart

# final_plot.save(f'results/imgs/Series_{results_id}_proto_month.html')
final_plot

In [None]:
# Step 1: Combine all epoch dataframes into one DataFrame
df_all = []

for epoch, df_epoch in latent_dfs['Baseline'].items():
    for method in ['pca_scl', 'umap_scl']:
        df = df_epoch[method].copy()
        df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp format
        df['epoch'] = epoch  # Add epoch column
        df['method'] = method
        df_all.append(df)

df_combined = pd.concat(df_all, ignore_index=True)
df_combined['month'] = df_combined['timestamp'].dt.month

# Step 2: Create selection slider
epoch_slider = alt.binding_range(min=df_combined['epoch'].min(),
                                 max=df_combined['epoch'].max(),
                                 step=1,
                                 name='Epoch: ')
epoch_select = alt.selection_point (fields=['epoch'], bind=epoch_slider, value = 0)

hour_options = [None] + sorted(df_combined['month'].unique().tolist())
hour_selection = alt.selection_point(
    name="Select Hour", 
    fields=['month'], 
    bind=alt.binding_select(options=hour_options, name='Hour: '),
    value=None
)

method_options = df_combined['method'].unique().tolist()
method_selection = alt.selection_point(
    name="Select method", 
    fields=['method'], 
    bind=alt.binding_select(options=method_options, name='Method: '),
    value=method_options[0]
)

# Selection for color field
color_field_selection = alt.selection_point(
    name='ColorBy',
    fields=['key'],
    bind=alt.binding_select(options=['label', 'month'], name='Color by: '),
    value='label'
)

# Fold label and month into key/value pairs
folded = alt.Chart(df_combined).transform_filter(
    epoch_select & hour_selection & method_selection
).transform_fold(
    ['label', 'month'],
    as_=['key', 'value']
).transform_filter(
    color_field_selection
).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=100), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=100), title="Latent y"),
    color=alt.Color('value:N', scale=alt.Scale(scheme='tableau20'), title='Color'),
    tooltip=['label:N', 'month:Q', 'epoch:Q']
).add_params(
    epoch_select,
    hour_selection,
    method_selection,
    color_field_selection
).properties(
    width=350,
    height=350,
    title="Interactive Latent Space Heatmap (Color-coded)"
).interactive()

controls = alt.Chart(df_combined).mark_point().encode().add_params(
    epoch_select,
    hour_selection,
    method_selection,
    color_field_selection
).properties(
    title="Controls"
)

# folded.save(f'results/imgs/Heat_{results_id}_proto_month.html')

folded

In [None]:
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

import altair as alt
alt.data_transformers.enable("vegafusion")

from IPython.display import display, HTML

display(HTML("""
<style>
form.vega-bindings {
  position: absolute;
  right: 0px;
  top: 0px;
}
</style>
"""))

In [None]:
id_network = 'Graeme'
id_experiment = 'PIPELINE_PATTERNS'

comm_epoch = 8
local_epoch = 3

agg_interval = 2
window_size = 84
results_id = f'{id_network}_{id_experiment}_{agg_interval}_{window_size}'

batch_temporal = (agg_interval * window_size) / 24

file_path = f'results/exp_latente_df_graeme_{id_experiment}_{comm_epoch}_{local_epoch}_{agg_interval}_{window_size}_proto_month.pkl'

# file_path = f'results/exp_latente_df_{id_network.lower()}_{agg_interval}_{window_size}_proto_month_30_old.pkl'
with open(file_path, 'rb') as file:
    latent_dfs = pickle.load(file)

In [None]:
for case in latent_dfs.values():
    for epoch in case.values():
        date_cols = epoch['latent_space'].iloc[:, :4].reset_index(drop=True)
        for space in epoch.keys():
            if ('pca' in space) or ('umap' in space):
                epoch[space] = pd.concat([date_cols, epoch[space]], axis = 1)
                epoch[space].columns = date_cols.columns.tolist() + ['latent_1', 'latent_2']

In [None]:
# Load data
client_A = pd.read_csv(f'datasets/leaks/{id_network}/{id_experiment}/ClientA_Baseline.csv')

# Extract timestamps
timestamps = pd.to_datetime(client_A['timestamp'], unit='s')

# Initialize scaler
scaler = MinMaxScaler(feature_range=(-1, 1))

# Scale features (excluding timestamp)
scaled_values = scaler.fit_transform(client_A.iloc[:, 1:])

# Convert scaled data back to DataFrame with original column names (except timestamp)
scaled_df = pd.DataFrame(scaled_values, columns=client_A.columns[1:])

# Add timestamp as the first column
scaled_df.insert(0, 'timestamp', timestamps)

# Check shape
scaled_df.head(6)

In [None]:
# Step 1: Combine all epoch dataframes into one DataFrame
df_all = []

for epoch, df_epoch in latent_dfs['Baseline'].items():
    for method in ['pca_scl', 'umap_scl']:
        df = df_epoch[method].copy()
        df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp format
        df['epoch'] = epoch  # Add epoch column
        df['method'] = method
        df_all.append(df)

df_combined = pd.concat(df_all, ignore_index=True)
df_combined['hour_filter'] = df_combined['hour'].apply(lambda x: x - 12 if x >= 12 else x)
df_combined['month'] = df_combined['timestamp'].dt.month

# Step 2: Create selection slider
epoch_slider = alt.binding_range(min=df_combined['epoch'].min(),
                                 max=df_combined['epoch'].max(),
                                 step=1,
                                 name='Epoch: ')
epoch_select = alt.selection_point (fields=['epoch'], bind=epoch_slider, value = 0)

hour_options = [None] + sorted(df_combined['hour_filter'].unique().tolist())
hour_selection = alt.selection_point(
    name="Select Hour", 
    fields=['hour_filter'], 
    bind=alt.binding_select(options=hour_options, name='Hour: '),
    value=None
)

method_options = df_combined['method'].unique().tolist()
method_selection = alt.selection_point(
    name="Select method", 
    fields=['method'], 
    bind=alt.binding_select(options=method_options, name='Method: '),
    value=method_options[0]
)

# Selection for color field
color_field_selection = alt.selection_point(
    name='ColorBy',
    fields=['key'],
    bind=alt.binding_select(options=['label', 'hour_filter', 'month'], name='Color by: '),
    value='label'
)

# Fold label and hour_filter into key/value pairs
folded = alt.Chart(df_combined).transform_filter(
    epoch_select & hour_selection & method_selection
).transform_fold(
    ['label', 'hour_filter', 'month'],
    as_=['key', 'value']
).transform_filter(
    color_field_selection
).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=100), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=100), title="Latent y"),
    color=alt.Color('value:N', scale=alt.Scale(scheme='tableau20'), title='Color'),
    tooltip=['label:N', 'hour_filter:Q', 'epoch:Q']
).add_params(
    epoch_select,
    hour_selection,
    method_selection,
    color_field_selection
).properties(
    width=350,
    height=350,
    title="Interactive Latent Space Heatmap (Color-coded)"
).interactive()

controls = alt.Chart(df_combined).mark_point().encode().add_params(
    epoch_select,
    hour_selection,
    method_selection,
    color_field_selection
).properties(
    title="Controls"
)

# folded.save(f'results/imgs/Heat_{results_id}_proto_hour.html')

folded

In [None]:
def plot_latent_heatmap(df_combined, results_id):
    """Creates and saves an interactive heatmap of the latent space.
    :param period:
    """
    epoch_slider = alt.binding_range(min=df_combined['epoch'].min(), max=df_combined['epoch'].max(), step=1, name='Epoch: ')
    epoch_select = alt.selection_point(fields=['epoch'], bind=epoch_slider, value=0)

    hour_options = [None] + sorted(df_combined['hour_filter'].unique().tolist())
    hour_selection = alt.selection_point(fields=['hour_filter'], bind=alt.binding_select(options=hour_options, name=f'Hour: '), value=None)

    month_options = [None] + sorted(df_combined['month'].unique().tolist())
    month_selection = alt.selection_point(fields=['month'], bind=alt.binding_select(options=month_options, name=f'Month: '), value=None)

    method_options = df_combined['method'].unique().tolist()
    method_selection = alt.selection_point(fields=['method'], bind=alt.binding_select(options=method_options, name='Method: '), value=method_options[0])

    color_field_selection = alt.selection_point(fields=['key'], bind=alt.binding_select(options=['label', 'hour_filter', 'month'], name='Color by: '), value='label')

    folded = alt.Chart(df_combined).transform_filter(
        epoch_select & month_selection & hour_selection & method_selection
    ).transform_fold(
        ['label', 'hour_filter', 'month'], as_=['key', 'value']
    ).transform_filter(
        color_field_selection
    ).mark_rect().encode(
        x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=100), title="Latent x"),
        y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=100), title="Latent y"),
        color=alt.Color('value:N', scale=alt.Scale(scheme='tableau20'), title='Color'),
        tooltip=['label:N', 'month:Q', 'hour:Q', 'epoch:Q']
    ).add_params(
        epoch_select, month_selection, hour_selection, method_selection, color_field_selection
    ).properties(
        width=350,
        height=350,
        title="Interactive Latent Space Heatmap (Color-coded)"
    ).interactive()

    # folded.save(f'results/imgs/Heat_{results_id}_proto.html')
    return folded

In [None]:
df_combined = combine_latents(latent_dfs)
df_combined['hour_filter'] = df_combined['hour'].apply(lambda x: x - 12 if x >= 12 else x)
df_combined['month'] = df_combined['timestamp'].dt.month

plot = plot_latent_heatmap(df_combined, results_id)

plot

In [None]:
def plot_time_series_and_latents(df_combined, scaled_df, results_id, batch_temporal=14):
    """Creates and saves the combined plot of time-series and filtered latent space.
    :param period:
    """
    melted_df = scaled_df.melt(id_vars='timestamp', var_name='feature', value_name='value')
    melted_df['timestamp'] = pd.to_datetime(melted_df['timestamp'])
    melted_df['month'] = melted_df['timestamp'].dt.month

    melted_df['hour'] = melted_df['timestamp'].dt.hour
    melted_df['hour_filter'] = melted_df['hour'].apply(lambda x: x - 12 if x >= 12 else x)

    # Offset time-series values for stacking
    unique_features = melted_df['feature'].unique()
    offset_dict = {feature: i * 2 for i, feature in enumerate(unique_features)}
    melted_df['offset_value'] = melted_df.apply(lambda row: row['value'] + offset_dict[row['feature']], axis=1)

    # --- Selections ---
    start_ts = melted_df['timestamp'].min()
    end_ts = start_ts + pd.Timedelta(days=batch_temporal)
    brush = alt.selection_interval(encodings=['x'], value={'x': (start_ts, end_ts)})
    latent_selection = alt.selection_point(fields=['timestamp'], value=melted_df['timestamp'].min())

    hour_options = [None] + sorted(df_combined['hour_filter'].unique().tolist())
    hour_selection = alt.selection_point(
        name=f"Select Hour:",
        fields=['hour_filter'],
        bind=alt.binding_select(options=hour_options, name=f'Hour: '),
        value=None)

    month_options = [None] + sorted(df_combined['month'].unique().tolist())
    month_selection = alt.selection_point(
        name=f"Select Month:",
        fields=['month'],
        bind=alt.binding_select(options=month_options, name=f'Month: '),
        value=None)

    method_options = df_combined['method'].unique().tolist()
    method_selection = alt.selection_point(fields=['method'], bind=alt.binding_select(options=method_options, name='Method: '), value=method_options[0])

    # --- Time Series Chart ---
    base = alt.Chart(melted_df).mark_line().encode(
        x='timestamp:T',
        y='offset_value:Q',
        color='feature:N'
    ).properties(width=500)

    points = alt.Chart(melted_df).mark_circle(color='black', size=5).encode(
        x='timestamp:T',
        y='offset_value:Q',
        tooltip=['feature:N', 'timestamp:T']
    ).transform_filter(month_selection & hour_selection)

    upper = (base + points).encode(
        x=alt.X('timestamp:T', scale=alt.Scale(domain=brush))
    ).properties(height=200)

    lower = base.properties(height=60).add_params(brush)

    time_series_chart = upper & lower

    # --- Latent Space Chart ---
    x_range = [int(df_combined['latent_1'].min()) - 3, int(df_combined['latent_1'].max()) + 3]
    y_range = [int(df_combined['latent_2'].min()) - 3, int(df_combined['latent_2'].max()) + 3]
    
    epoch_slider = alt.binding_range(min=df_combined['epoch'].min(),
                                 max=df_combined['epoch'].max(),
                                 step=1,
                                 name='Epoch: ')
    epoch_select = alt.selection_point (fields=['epoch'], bind=epoch_slider, value = 0)

    color_field_selection = alt.selection_point(fields=['key'], bind=alt.binding_select(options=['label', 'hour_filter', 'month'], name='Color by: '), value='label')

    latent = alt.Chart(df_combined).transform_filter(
        brush & epoch_select & month_selection & hour_selection & method_selection
    ).transform_fold(
        ['label', 'hour_filter', 'month'], as_=['key', 'value']
    ).transform_filter(
        color_field_selection
    ).mark_rect().encode(
        x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=100), title="Latent x"),
        y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=100), title="Latent y"),
        color=alt.Color('value:N', scale=alt.Scale(scheme='tableau20'), title='Color'),
        tooltip=['label:N', 'month:Q', 'hour:Q', 'epoch:Q']
    ).add_params(
        latent_selection, epoch_select, month_selection, hour_selection, method_selection, color_field_selection
    ).properties(
        width=350,
        height=350,
        title="Interactive Latent Space Heatmap (Color-coded)"
    ).interactive()


    # --- Layout and Save ---
    final_plot = latent | time_series_chart
    return final_plot


plot = plot_time_series_and_latents(df_combined, scaled_df, results_id)

plot

In [None]:
# Step 1: Combine all epoch dataframes into one DataFrame
df_all = []

for epoch, df_epoch in latent_dfs['Baseline'].items():
    for method in ['pca_scl', 'umap_scl']:
        df = df_epoch[method].copy()
        df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp format
        df['epoch'] = epoch  # Add epoch column
        df['method'] = method
        df_all.append(df)

df_combined = pd.concat(df_all, ignore_index=True)
df_combined['month'] = df_combined['timestamp'].dt.month

# Step 2: Create selection slider
epoch_slider = alt.binding_range(min=df_combined['epoch'].min(),
                                 max=df_combined['epoch'].max(),
                                 step=1,
                                 name='Epoch: ')
epoch_select = alt.selection_point (fields=['epoch'], bind=epoch_slider, value = 0)

hour_options = [None] + sorted(df_combined['month'].unique().tolist())
hour_selection = alt.selection_point(
    name="Select Hour", 
    fields=['month'], 
    bind=alt.binding_select(options=hour_options, name='Hour: '),
    value=None
)

method_options = df_combined['method'].unique().tolist()
method_selection = alt.selection_point(
    name="Select method", 
    fields=['method'], 
    bind=alt.binding_select(options=method_options, name='Method: '),
    value=method_options[0]
)

# Selection for color field
color_field_selection = alt.selection_point(
    name='ColorBy',
    fields=['key'],
    bind=alt.binding_select(options=['label', 'month'], name='Color by: '),
    value='label'
)

# Fold label and month into key/value pairs
folded = alt.Chart(df_combined).transform_filter(
    epoch_select & hour_selection & method_selection
).transform_fold(
    ['label', 'month'],
    as_=['key', 'value']
).transform_filter(
    color_field_selection
).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=100), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=100), title="Latent y"),
    color=alt.Color('value:N', scale=alt.Scale(scheme='tableau20'), title='Color'),
    tooltip=['label:N', 'month:Q', 'epoch:Q']
).add_params(
    epoch_select,
    hour_selection,
    method_selection,
    color_field_selection
).properties(
    width=350,
    height=350,
    title="Interactive Latent Space Heatmap (Color-coded)"
).interactive()

controls = alt.Chart(df_combined).mark_point().encode().add_params(
    epoch_select,
    hour_selection,
    method_selection,
    color_field_selection
).properties(
    title="Controls"
)

folded.save(f'results/imgs/Heat_{results_id}_proto_month.html')

# folded

In [None]:
# --- Prepare data ---
melted_df = scaled_df.melt(id_vars='timestamp', var_name='feature', value_name='value')
melted_df['timestamp'] = pd.to_datetime(melted_df['timestamp'])
melted_df['hour'] = melted_df['timestamp'].dt.hour
melted_df['hour_filter'] = melted_df['hour'].apply(lambda x: x - 12 if x >= 12 else x)

# Filter to a specific epoch
df = df_combined[df_combined['epoch'] == 8].copy()
df['hour_filter'] = df['hour'].apply(lambda x: x - 12 if x >= 12 else x)

# Offset values per feature
unique_features = melted_df['feature'].unique()
offset_dict = {feature: i * 2 for i, feature in enumerate(unique_features)}
melted_df['offset_value'] = melted_df.apply(
    lambda row: row['value'] + offset_dict[row['feature']], axis=1
)

# --- Selections ---
start_ts = melted_df['timestamp'].min()
end_ts = start_ts + pd.Timedelta(days=batch_temporal)
date_range = (start_ts.to_pydatetime(), end_ts.to_pydatetime())

brush = alt.selection_interval(encodings=['x'], value={'x': date_range})
latent_selection = alt.selection_point(fields=['timestamp'], value=melted_df['timestamp'].min())

hour_options = [None] + sorted(df['hour_filter'].unique().tolist())
hour_selection = alt.selection_point(
    name="Select Hour", 
    fields=['hour_filter'], 
    bind=alt.binding_select(options=hour_options, name='Hour: '),
    value=None
)

# --- Time Series Plot Setup ---
base = alt.Chart(melted_df).mark_line().encode(
    x=alt.X('timestamp:T', title='Time'),
    y=alt.Y('offset_value:Q', title='Offset Scaled Value'),
    color=alt.Color('feature:N', title='Feature')
).properties(width=500)

highlighted_points = alt.Chart(melted_df).mark_circle(color='black', size=5).encode(
    x='timestamp:T',
    y='offset_value:Q',
    tooltip=['feature:N', 'timestamp:T']
).transform_filter(
    hour_selection
)

upper = (base + highlighted_points).encode(
    x=alt.X('timestamp:T', scale=alt.Scale(domain=brush))
).properties(
    height=200
)

lower = base.properties(height=60).add_params(brush)

time_series_chart = upper & lower

# --- Latent Space Plot with Hour Filtering ---
x_range = [int(df['latent_1'].min()) - 3, int(df['latent_1'].max()) + 3]
y_range = [int(df['latent_2'].min()) - 3, int(df['latent_2'].max()) + 3]

latent = alt.Chart(df).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=50), scale=alt.Scale(domain=x_range), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=50), scale=alt.Scale(domain=y_range), title="Latent y"),
    color=alt.Color('label:N', scale=alt.Scale(scheme='tableau20'), title='Density'),
    tooltip=['label:N', 'timestamp:T', 'hour:O']
).properties(
    width=400,
    height=300,
    title="Latent Space (highlight by hour)"
).add_params(
    latent_selection,
    hour_selection
).transform_filter(
    brush
).transform_filter(
    hour_selection
).interactive()

# --- Final Layout ---
final_plot = latent | time_series_chart

final_plot.save(f'results/imgs/Series_{results_id}.html')
final_plot

In [None]:
# --- Prepare data ---
melted_df = scaled_df.melt(id_vars='timestamp', var_name='feature', value_name='value')
melted_df['timestamp'] = pd.to_datetime(melted_df['timestamp'])
melted_df['month'] = melted_df['timestamp'].dt.month

# Filter to a specific epoch
df_combined['month'] = df_combined['timestamp'].dt.month
df = df_combined[df_combined['epoch'] == 7].copy()
# df['month'] = df['hour'].apply(lambda x: x - 12 if x >= 12 else x)

# Offset values per feature
unique_features = melted_df['feature'].unique()
offset_dict = {feature: i * 2 for i, feature in enumerate(unique_features)}
melted_df['offset_value'] = melted_df.apply(
    lambda row: row['value'] + offset_dict[row['feature']], axis=1
)

# --- Selections ---
start_ts = melted_df['timestamp'].min()
end_ts = start_ts + pd.Timedelta(days=batch_temporal)
date_range = (start_ts.to_pydatetime(), end_ts.to_pydatetime())

brush = alt.selection_interval(encodings=['x'], value={'x': date_range})
latent_selection = alt.selection_point(fields=['timestamp'], value=melted_df['timestamp'].min())

hour_options = [None] + sorted(df['month'].unique().tolist())
hour_selection = alt.selection_point(
    name="Select Month", 
    fields=['month'], 
    bind=alt.binding_select(options=hour_options, name='Month: '),
    value=None
)

# --- Time Series Plot Setup ---
base = alt.Chart(melted_df).mark_line().encode(
    x=alt.X('timestamp:T', title='Time'),
    y=alt.Y('offset_value:Q', title='Offset Scaled Value'),
    color=alt.Color('feature:N', title='Feature')
).properties(width=500)

highlighted_points = alt.Chart(melted_df).mark_circle(color='black', size=5).encode(
    x='timestamp:T',
    y='offset_value:Q',
    tooltip=['feature:N', 'timestamp:T']
).transform_filter(
    hour_selection
)

upper = (base + highlighted_points).encode(
    x=alt.X('timestamp:T', scale=alt.Scale(domain=brush))
).properties(
    height=200
)

lower = base.properties(height=60).add_params(brush)

time_series_chart = upper & lower

# --- Latent Space Plot with Hour Filtering ---
x_range = [int(df['latent_1'].min()) - 3, int(df['latent_1'].max()) + 3]
y_range = [int(df['latent_2'].min()) - 3, int(df['latent_2'].max()) + 3]

latent = alt.Chart(df).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=50), scale=alt.Scale(domain=x_range), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=50), scale=alt.Scale(domain=y_range), title="Latent y"),
    color=alt.Color('label:N', scale=alt.Scale(scheme='tableau20'), title='Density'),
    tooltip=['label:N', 'timestamp:T', 'month:O']
).properties(
    width=400,
    height=300,
    title="Latent Space (highlight by hour)"
).add_params(
    latent_selection,
    hour_selection
).transform_filter(
    brush
).transform_filter(
    hour_selection
).interactive()

# --- Final Layout ---
final_plot = latent | time_series_chart

final_plot.save(f'results/imgs/Series_{results_id}_proto_month.html')
# final_plot

In [None]:
melted_df = scaled_df.melt(id_vars='timestamp', var_name='feature', value_name='value')
melted_df['hour'] = melted_df['timestamp'].dt.hour
melted_df['day'] = melted_df['timestamp'].dt.day

melted_df['hour_bin'] = (melted_df['hour'] // agg_interval) * agg_interval

# Assign an offset per feature
unique_features = melted_df['feature'].unique()
offset_dict = {feature: i * 2 for i, feature in enumerate(unique_features)}  # 2 is the vertical spacing

# Apply the offset
melted_df['offset_value'] = melted_df.apply(
    lambda row: row['value'] + offset_dict[row['feature']], axis=1
)

# Convert timestamp to datetime
melted_df['timestamp'] = pd.to_datetime(melted_df['timestamp'])


# Set up plot
plt.figure(figsize=(15, 6))

for feat in melted_df['feature'].unique():
        
    plot_df = melted_df[melted_df['feature'] == feat].copy()
    plot_df = plot_df.iloc[:450, :]
    # Make sure timestamp is a datetime object
    plot_df['timestamp'] = pd.to_datetime(plot_df['timestamp'])
    
    # Set timestamp as index (optional but convenient for plotting)
    plot_df.set_index('timestamp', inplace=True)
    
    # Plot full series in gray
    plt.plot(plot_df.index, plot_df['offset_value'], color='lightgray', label='Full Series')
    
    # Define bins to plot and colors
    bins_to_plot = [i for i in range (0, 12, agg_interval)]
    colors = ['red', 'blue', 'green', 'orange', 'purple', 'pink']
    
    # Loop over each bin
    for bin_value, color in zip(bins_to_plot, colors):
        for day in plot_df['day'].unique():
            # Define the 4-hour window starting at bin_value
            hour_range = [(bin_value + i) % 24 for i in range(4)]
            
            # Select the rows within that 4-hour window for the current day
            mask = (plot_df['day'] == day) & (plot_df['hour'].isin(hour_range))
            segment = plot_df[mask]
    
            # Plot the segment
            plt.plot(segment.index, segment['offset_value'], color=color, label=f'Bin {bin_value}' if day == plot_df['day'].unique()[0] else "")
    
            # Define the 4-hour window starting at bin_value
            hour_range = [(bin_value +12 + i) for i in range(4)]
            
            # Select the rows within that 4-hour window for the current day
            mask = (plot_df['day'] == day) & (plot_df['hour'].isin(hour_range))
            segment = plot_df[mask]
    
            # Plot the segment
            plt.plot(segment.index, segment['offset_value'], color=color, label=f'Bin {bin_value}' if day == plot_df['day'].unique()[0] else "")
        
# Formatting
plt.title(f'Series Highlighted for Hour Bins: {bins_to_plot} (4-hour blocks)')
plt.xlabel('Time')
plt.ylabel('Offset Value')
# plt.legend(loc='upper left', fontsize=8)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import altair as alt
import pandas as pd

# --- Prepare data ---
melted_df = scaled_df.melt(id_vars='timestamp', var_name='feature', value_name='value')
melted_df['timestamp'] = pd.to_datetime(melted_df['timestamp'])

df = df_combined[df_combined['epoch'] == 14]

# Offset by feature
unique_features = melted_df['feature'].unique()
offset_dict = {feature: i * 2 for i, feature in enumerate(unique_features)}
melted_df['offset_value'] = melted_df.apply(
    lambda row: row['value'] + offset_dict[row['feature']], axis=1
)

# --- Selection setup ---
start_ts = melted_df['timestamp'].min()
end_ts = start_ts + pd.Timedelta(days=batch_temporal)
date_range = (start_ts.to_pydatetime(), end_ts.to_pydatetime())

brush = alt.selection_interval(encodings=['x'], value={'x': date_range})
latent_selection = alt.selection_point(fields=['timestamp'], value = melted_df['timestamp'].min())

# --- Base line chart ---
base = alt.Chart(melted_df).mark_line().encode(
    x=alt.X('timestamp:T', title='Time'),
    y=alt.Y('offset_value:Q', title='Offset Scaled Value'),
    color=alt.Color('feature:N', title='Feature')
).properties(width=500)

# --- Points from latent selection (overlaid) ---
highlighted_points = alt.Chart(melted_df).mark_circle(color='black', size=50).encode(
    x='timestamp:T',
    y='offset_value:Q',
    tooltip=['feature:N', 'timestamp:T']
).transform_filter(
    latent_selection
)

# --- Upper (zoomed with brush, highlights added) ---
upper = (base + highlighted_points).encode(
    x=alt.X('timestamp:T', scale=alt.Scale(domain=brush))
).properties(height=200)

# --- Lower (overview with brush) ---
lower = base.properties(height=60).add_params(brush)

# --- Combine upper and lower ---
time_series_chart = upper & lower

# --- Latent heatmap with selection ---
x_range = [int(df['latent_1'].min()) - 3, int(df['latent_1'].max()) + 3] 
y_range = [int(df['latent_2'].min()) - 3, int(df['latent_2'].max()) + 3]

latent = alt.Chart(df).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=150), scale=alt.Scale(domain=x_range), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=150), scale=alt.Scale(domain=y_range), title="Latent y"),
    color=alt.Color('label:N', scale=alt.Scale(scheme='spectral'), title='Density'),
    tooltip=['label:N', 'timestamp:T', 'hour']
).properties(
    width=400,
    height=300,
    title="Latent Space (select region to highlight time)"
).add_params(
    latent_selection
).transform_filter(
    brush
).interactive()

# --- Final layout ---
latent | time_series_chart

In [None]:
# Step 1: Combine all epoch dataframes into one DataFrame
df_all = []

for epoch, df_epoch in latent_dfs['Baseline'].items():
    for method in ['pca_scl', 'umap_scl']:
        df = df_epoch[method].copy()
        df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp format
        df['epoch'] = epoch  # Add epoch column
        df['method'] = method
        df_all.append(df)

df_combined = pd.concat(df_all, ignore_index=True)
df_combined['hour_filter'] = df_combined['hour'].apply(lambda x: x - 12 if x >= 12 else x)

# Step 2: Create selection slider
epoch_slider = alt.binding_range(min=df_combined['epoch'].min(),
                                 max=df_combined['epoch'].max(),
                                 step=1,
                                 name='Epoch: ')
epoch_select = alt.selection_point (fields=['epoch'], bind=epoch_slider, value = 0)

hour_options = [None] + sorted(df_combined['hour_filter'].unique().tolist())
hour_selection = alt.selection_point(
    name="Select Hour", 
    fields=['hour_filter'], 
    bind=alt.binding_select(options=hour_options, name='Hour: '),
    value=None
)

method_options = df_combined['method'].unique().tolist()
method_selection = alt.selection_point(
    name="Select method", 
    fields=['method'], 
    bind=alt.binding_select(options=method_options, name='Method: '),
    value=method_options[0]
)

# Step 3: Define heatmap chart
heatmap = alt.Chart(df_combined).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=100), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=100), title="Latent y"),
    color=alt.Color('label:N', scale=alt.Scale(scheme='spectral'), title='Label'),
    tooltip=['label:N', 'epoch:Q', 'hour']
).add_params(
    epoch_select
).add_params(
    hour_selection
).add_params(
    method_selection
).transform_filter(
    epoch_select, hour_selection, method_selection
).properties(
    width=350,
    height=350,
    title="Interactive Latent Space Heatmap by Epoch"
).interactive()

heatmap

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

with open(f'results/exp_latente_results_graeme_{agg_interval}_{window_size}_proto.pkl', 'rb') as file:
    results = pickle.load(file)
    
# Raw and transformed data
transformed = results['Baseline']['dl'][0]['X'][0]       # shape: (200, 5)

# Determine aggregation window
agg_window = 3
lenght_raw = transformed.shape[0] * agg_window
scaler = MinMaxScaler()
raw_scaled = scaler.fit_transform(client_A.iloc[:lenght_raw, 1:], (-1, 1))  # shape: (600, 5)

# Time axes
time_raw = np.arange(raw_scaled.shape[0])
time_transformed = np.arange(transformed.shape[0]) * agg_window + agg_window // 2  # center of each window

# Plot
plt.figure(figsize=(10, 4))
offset = 2  # vertical offset to separate each feature

for i in range(raw_scaled.shape[1]):
    # Offset for clarity
    plt.plot(time_raw, raw_scaled[:, i] + i * offset, color='lightgray', label=f'Raw Feature {i+1}' if i == 0 else "")
    plt.plot(time_transformed, transformed[:, i] + i * offset, label=f'Transformed Feature {i+1}')

plt.title('Raw vs Transformed (Aggregated) Multivariate Time Series')
plt.xlabel('Time Step')
plt.yticks([i * offset for i in range(raw_scaled.shape[1])],
           [f'Feature {i+1}' for i in range(raw_scaled.shape[1])])
plt.legend(bbox_to_anchor = (1,1))
plt.grid(True)
plt.tight_layout()
plt.show()
