In [None]:
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
agg_interval = 2
window_size = 84

batch_temporal = (agg_interval * window_size) / 24

with open(f'results/exp_latente_df_graeme_{agg_interval}_{window_size}.pkl', 'rb') as file:
    latent_dfs = pickle.load(file)

In [None]:
for case in latent_dfs.values():
    for epoch in case.values():
        date_cols = epoch['latent_space'].iloc[:, :4].reset_index(drop=True)
        for space in epoch.keys():
            if ('pca' in space) or ('umap' in space):
                epoch[space] = pd.concat([date_cols, epoch[space]], axis = 1)
                epoch[space].columns = date_cols.columns.tolist() + ['latent_1', 'latent_2']

In [None]:
import pandas as pd
import altair as alt
alt.data_transformers.enable("vegafusion")


# Step 1: Combine all epoch dataframes into one DataFrame
df_all = []

for epoch, df_epoch in latent_dfs['Baseline'].items():
    df = df_epoch['umap_raw'].copy()
    df['timestamp'] = pd.to_datetime(df['timestamp'])  # Ensure timestamp format
    df['epoch'] = epoch  # Add epoch column
    df_all.append(df)

df_combined = pd.concat(df_all, ignore_index=True)

# Step 2: Create selection slider
epoch_slider = alt.binding_range(min=df_combined['epoch'].min(),
                                 max=df_combined['epoch'].max(),
                                 step=1,
                                 name='Epoch: ')
epoch_select = alt.selection_point (fields=['epoch'], bind=epoch_slider, value = 0)

# Step 3: Define heatmap chart
heatmap = alt.Chart(df_combined).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=100), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=100), title="Latent y"),
    color=alt.Color('label:N', scale=alt.Scale(scheme='spectral'), title='Label'),
    tooltip=['label:N', 'epoch:Q']
).add_params(
    epoch_select
).transform_filter(
    epoch_select
).properties(
    width=350,
    height=350,
    title="Interactive Latent Space Heatmap by Epoch"
)

heatmap

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load data
client_A = pd.read_csv('datasets/leaks/Graeme/PIPELINE_PYTHON/ClientA_Baseline.csv')

# Extract timestamps
timestamps = pd.to_datetime(client_A['timestamp'], unit='s')

# Initialize scaler
scaler = MinMaxScaler(feature_range=(-1, 1))

# Scale features (excluding timestamp)
scaled_values = scaler.fit_transform(client_A.iloc[:, 1:])

# Convert scaled data back to DataFrame with original column names (except timestamp)
scaled_df = pd.DataFrame(scaled_values, columns=client_A.columns[1:])

# Add timestamp as the first column
scaled_df.insert(0, 'timestamp', timestamps)

# Check shape
print(scaled_df.shape)
scaled_df.head(6)

In [None]:
import altair as alt
import pandas as pd

# --- Prepare data ---
melted_df = scaled_df.melt(id_vars='timestamp', var_name='feature', value_name='value')
melted_df['timestamp'] = pd.to_datetime(melted_df['timestamp'])

df = df_combined[df_combined['epoch'] == 8]

# Offset by feature
unique_features = melted_df['feature'].unique()
offset_dict = {feature: i * 2 for i, feature in enumerate(unique_features)}
melted_df['offset_value'] = melted_df.apply(
    lambda row: row['value'] + offset_dict[row['feature']], axis=1
)

# --- Selection setup ---
start_ts = melted_df['timestamp'].min()
end_ts = start_ts + pd.Timedelta(days=batch_temporal)
date_range = (start_ts.to_pydatetime(), end_ts.to_pydatetime())

brush = alt.selection_interval(encodings=['x'], value={'x': date_range})
latent_selection = alt.selection_point(fields=['timestamp'], value = melted_df['timestamp'].min())

# --- Base line chart ---
base = alt.Chart(melted_df).mark_line().encode(
    x=alt.X('timestamp:T', title='Time'),
    y=alt.Y('offset_value:Q', title='Offset Scaled Value'),
    color=alt.Color('feature:N', title='Feature')
).properties(width=500)

# --- Points from latent selection (overlaid) ---
highlighted_points = alt.Chart(melted_df).mark_circle(color='black', size=50).encode(
    x='timestamp:T',
    y='offset_value:Q',
    tooltip=['feature:N', 'timestamp:T']
).transform_filter(
    latent_selection
)

# --- Upper (zoomed with brush, highlights added) ---
upper = (base + highlighted_points).encode(
    x=alt.X('timestamp:T', scale=alt.Scale(domain=brush))
).properties(height=200)

# --- Lower (overview with brush) ---
lower = base.properties(height=60).add_params(brush)

# --- Combine upper and lower ---
time_series_chart = upper & lower

# --- Latent heatmap with selection ---
x_range = [int(df['latent_1'].min()) - 3, int(df['latent_1'].max()) + 3] 
y_range = [int(df['latent_2'].min()) - 3, int(df['latent_2'].max()) + 3]

latent = alt.Chart(df).mark_rect().encode(
    x=alt.X('latent_1:Q', bin=alt.Bin(maxbins=150), scale=alt.Scale(domain=x_range), title="Latent x"),
    y=alt.Y('latent_2:Q', bin=alt.Bin(maxbins=150), scale=alt.Scale(domain=y_range), title="Latent y"),
    color=alt.Color('label:N', scale=alt.Scale(scheme='spectral'), title='Density'),
    tooltip=['label:N', 'timestamp:T', 'hour']
).properties(
    width=400,
    height=300,
    title="Latent Space (select region to highlight time)"
).add_params(
    latent_selection
).transform_filter(
    brush
).interactive()

# --- Final layout ---
latent | time_series_chart

In [None]:
melted_df = scaled_df.melt(id_vars='timestamp', var_name='feature', value_name='value')
melted_df['hour'] = melted_df['timestamp'].dt.hour
melted_df['day'] = melted_df['timestamp'].dt.day

melted_df['hour_bin'] = (melted_df['hour'] // agg_interval) * agg_interval

# Assign an offset per feature
unique_features = melted_df['feature'].unique()
offset_dict = {feature: i * 2 for i, feature in enumerate(unique_features)}  # 2 is the vertical spacing

# Apply the offset
melted_df['offset_value'] = melted_df.apply(
    lambda row: row['value'] + offset_dict[row['feature']], axis=1
)

# Convert timestamp to datetime
melted_df['timestamp'] = pd.to_datetime(melted_df['timestamp'])


# Set up plot
plt.figure(figsize=(15, 6))

for feat in melted_df['feature'].unique():
        
    plot_df = melted_df[melted_df['feature'] == feat].copy()
    plot_df = plot_df.iloc[:450, :]
    # Make sure timestamp is a datetime object
    plot_df['timestamp'] = pd.to_datetime(plot_df['timestamp'])
    
    # Set timestamp as index (optional but convenient for plotting)
    plot_df.set_index('timestamp', inplace=True)
    
    # Plot full series in gray
    plt.plot(plot_df.index, plot_df['offset_value'], color='lightgray', label='Full Series')
    
    # Define bins to plot and colors
    bins_to_plot = [i for i in range (0, 12, agg_interval)]
    colors = ['red', 'blue', 'green', 'orange', 'purple', 'pink']
    
    # Loop over each bin
    for bin_value, color in zip(bins_to_plot, colors):
        for day in plot_df['day'].unique():
            # Define the 4-hour window starting at bin_value
            hour_range = [(bin_value + i) % 24 for i in range(4)]
            
            # Select the rows within that 4-hour window for the current day
            mask = (plot_df['day'] == day) & (plot_df['hour'].isin(hour_range))
            segment = plot_df[mask]
    
            # Plot the segment
            plt.plot(segment.index, segment['offset_value'], color=color, label=f'Bin {bin_value}' if day == plot_df['day'].unique()[0] else "")
    
            # Define the 4-hour window starting at bin_value
            hour_range = [(bin_value +12 + i) for i in range(4)]
            
            # Select the rows within that 4-hour window for the current day
            mask = (plot_df['day'] == day) & (plot_df['hour'].isin(hour_range))
            segment = plot_df[mask]
    
            # Plot the segment
            plt.plot(segment.index, segment['offset_value'], color=color, label=f'Bin {bin_value}' if day == plot_df['day'].unique()[0] else "")
        
# Formatting
plt.title(f'Series Highlighted for Hour Bins: {bins_to_plot} (4-hour blocks)')
plt.xlabel('Time')
plt.ylabel('Offset Value')
# plt.legend(loc='upper left', fontsize=8)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Raw and transformed data
transformed = results['Baseline']['dl'][0]['X'][0]       # shape: (200, 5)

# Determine aggregation window
agg_window = 3
lenght_raw = transformed.shape[0] * agg_window
raw_scaled = scaler.fit_transform(client_A.iloc[:lenght_raw, 1:], (-1, 1))  # shape: (600, 5)

# Time axes
time_raw = np.arange(raw_scaled.shape[0])
time_transformed = np.arange(transformed.shape[0]) * agg_window + agg_window // 2  # center of each window

# Plot
plt.figure(figsize=(10, 4))
offset = 2  # vertical offset to separate each feature

for i in range(raw_scaled.shape[1]):
    # Offset for clarity
    plt.plot(time_raw, raw_scaled[:, i] + i * offset, color='lightgray', label=f'Raw Feature {i+1}' if i == 0 else "")
    plt.plot(time_transformed, transformed[:, i] + i * offset, label=f'Transformed Feature {i+1}')

plt.title('Raw vs Transformed (Aggregated) Multivariate Time Series')
plt.xlabel('Time Step')
plt.yticks([i * offset for i in range(raw_scaled.shape[1])],
           [f'Feature {i+1}' for i in range(raw_scaled.shape[1])])
plt.legend(bbox_to_anchor = (1,1))
plt.grid(True)
plt.tight_layout()
plt.show()
