In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
from matplotlib.colors import LinearSegmentedColormap
from datetime import datetime, timedelta
import pickle


# Helper function to load data from file if it exists
def load_data(file_path="../test_data_1yr.pkl"):
    with open(file_path, "rb") as f:
        return pickle.load(f)


# Function to create heatmap of data availability
def visualize_data_availability(time_series_dict, figsize=(20, 10)):
    """Create a heatmap showing data availability across sensors over time"""
    # Get all unique timestamps from all sensors
    all_timestamps = set()
    for sensor_id, series in time_series_dict.items():
        all_timestamps.update(series.index)

    all_timestamps = sorted(all_timestamps)

    # Create a DataFrame with all timestamps and fill with NaN
    data_matrix = pd.DataFrame(index=all_timestamps)

    # For each sensor, add a column to the DataFrame
    for sensor_id, series in time_series_dict.items():
        data_matrix[sensor_id] = np.nan
        # Only fill in data that exists
        data_matrix.loc[series.index, sensor_id] = 1

    # Resample to a lower resolution for better visualization if too many datapoints
    if len(all_timestamps) > 1000:
        data_matrix = data_matrix.resample("1H").mean()

    # Create a custom colormap (white for NaN, blue gradient for data)
    colors = ["white", "#deebf7", "#9ecae1", "#3182bd"]
    cmap = LinearSegmentedColormap.from_list("custom_cmap", colors, N=256)

    # Plot the heatmap
    plt.figure(figsize=figsize)
    ax = sns.heatmap(data_matrix.T, cmap=cmap, cbar=False)

    # Format the x-axis to show time
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d"))
    ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=2))
    plt.xticks(rotation=45)

    plt.title("Data Availability Across Sensors", fontsize=16)
    plt.ylabel("Sensor ID", fontsize=12)
    plt.xlabel("Date", fontsize=12)

    plt.tight_layout()
    return plt.gcf()


# Function to visualize time windows for a subset of sensors
def visualize_time_windows(
    time_series_dict, window_size=24, stride=1, n_sensors=6, figsize=(20, 15)
):
    """Visualize time windows for selected sensors"""
    # Identify sensors with most data points
    sensor_data_counts = {
        sensor_id: len(series) for sensor_id, series in time_series_dict.items()
    }
    top_sensors = sorted(sensor_data_counts, key=sensor_data_counts.get, reverse=True)[
        :n_sensors
    ]

    fig, axes = plt.subplots(n_sensors, 1, figsize=figsize)

    for i, sensor_id in enumerate(top_sensors):
        series = time_series_dict[sensor_id]
        ax = axes[i]

        # Plot the raw time series
        ax.plot(series.index, series.values, color="gray", alpha=0.7, label="Raw data")

        # Find segments without large gaps
        # Simulating the TimeSeriesPreprocessor logic
        segments = []
        start_idx = 0
        gap_threshold = pd.Timedelta(minutes=15)

        for j in range(1, len(series.index)):
            time_diff = series.index[j] - series.index[j - 1]

            if time_diff > gap_threshold or (
                np.isnan(series.values[j - 1]) or np.isnan(series.values[j])
            ):
                if j - start_idx >= window_size:
                    segments.append((start_idx, j))
                start_idx = j

        # Add the last segment if it's long enough
        if len(series.index) - start_idx >= window_size:
            segments.append((start_idx, len(series.index)))

        # Highlight windows for each segment
        for start_seg, end_seg in segments:
            segment_values = series.values[start_seg:end_seg]
            segment_indices = series.index[start_seg:end_seg]

            # Draw segment boundaries
            ax.axvspan(
                segment_indices[0], segment_indices[-1], color="lightgreen", alpha=0.2
            )

            # Draw window samples
            for j in range(0, len(segment_values) - window_size + 1, stride):
                window_start = segment_indices[j]
                window_end = segment_indices[j + window_size - 1]

                # Only show a subset of windows to avoid overcrowding
                if j % 20 == 0:  # Show every 20th window
                    ax.axvspan(window_start, window_end, color="blue", alpha=0.1)

        ax.set_title(f"Sensor ID: {sensor_id}", fontsize=12)
        ax.set_ylabel("Traffic Count", fontsize=10)

        # Format x-axis
        ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d"))
        ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=2))

    axes[-1].set_xlabel("Date", fontsize=12)
    plt.tight_layout()
    return fig


# Function to create a comparison of window distributions
def visualize_window_distributions(
    time_series_dict, window_size=24, n_sensors=15, figsize=(16, 10)
):
    """Create a visualization showing the distribution of windows across sensors"""
    # Count windows per sensor
    window_counts = {}

    gap_threshold = pd.Timedelta(minutes=15)

    for sensor_id, series in time_series_dict.items():
        # Find segments without large gaps (like TimeSeriesPreprocessor)
        segments = []
        start_idx = 0

        for j in range(1, len(series.index)):
            time_diff = series.index[j] - series.index[j - 1]

            if time_diff > gap_threshold or (
                np.isnan(series.values[j - 1]) or np.isnan(series.values[j])
            ):
                if j - start_idx >= window_size:
                    segments.append((start_idx, j))
                start_idx = j

        # Add the last segment if it's long enough
        if len(series.index) - start_idx >= window_size:
            segments.append((start_idx, len(series.index)))

        # Count windows
        total_windows = 0
        for start_seg, end_seg in segments:
            segment_len = end_seg - start_seg
            total_windows += max(0, segment_len - window_size + 1)

        window_counts[sensor_id] = total_windows

    # Sort by window count
    top_sensors = sorted(window_counts.items(), key=lambda x: x[1], reverse=True)[
        :n_sensors
    ]

    # Create a bar chart
    plt.figure(figsize=figsize)
    sns.barplot(
        x=[s[0] for s in top_sensors], y=[s[1] for s in top_sensors], palette="viridis"
    )

    plt.title(
        f"Number of Available Windows (size={window_size}) by Sensor", fontsize=16
    )
    plt.ylabel("Number of Windows", fontsize=12)
    plt.xlabel("Sensor ID", fontsize=12)
    plt.xticks(rotation=45)

    plt.tight_layout()
    return plt.gcf()


# Function to create a histogram of window values
def visualize_window_value_distributions(
    time_series_dict, window_size=24, n_sensors=5, figsize=(18, 12)
):
    """Create histograms of window values for top sensors"""
    # Count data points per sensor to identify top sensors
    sensor_data_counts = {
        sensor_id: len(series) for sensor_id, series in time_series_dict.items()
    }
    top_sensors = sorted(sensor_data_counts, key=sensor_data_counts.get, reverse=True)[
        :n_sensors
    ]

    fig, axes = plt.subplots(n_sensors, 2, figsize=figsize)

    for i, sensor_id in enumerate(top_sensors):
        series = time_series_dict[sensor_id]

        # Histogram of all values
        sns.histplot(series.values, kde=True, ax=axes[i, 0])
        axes[i, 0].set_title(f"Sensor {sensor_id} - All Values", fontsize=12)
        axes[i, 0].set_xlabel("Traffic Count", fontsize=10)

        # Time series with daily pattern visualization
        if len(series) > 24 * 4:  # At least 1 day of data (assuming 15-min intervals)
            # Convert to hour of day
            hour_of_day = series.index.hour + series.index.minute / 60

            # Create a scatter plot of hour vs value
            sns.scatterplot(x=hour_of_day, y=series.values, alpha=0.5, ax=axes[i, 1])

            # Add a smoothed trend line
            try:
                sns.regplot(
                    x=hour_of_day,
                    y=series.values,
                    scatter=False,
                    order=4,
                    ax=axes[i, 1],
                    color="red",
                )
            except:
                # If there's an error with the regression, skip it
                pass

            axes[i, 1].set_title(f"Sensor {sensor_id} - Daily Pattern", fontsize=12)
            axes[i, 1].set_xlabel("Hour of Day", fontsize=10)
            axes[i, 1].set_ylabel("Traffic Count", fontsize=10)
            axes[i, 1].set_xlim(0, 24)

    plt.tight_layout()
    return fig


# Function to visualize a calendar heatmap for a single sensor
def visualize_calendar_heatmap(time_series_dict, sensor_id=None, figsize=(20, 6)):
    """Create a calendar heatmap for a specific sensor"""
    # Choose the sensor with the most data points if not specified
    if sensor_id is None:
        sensor_data_counts = {
            sensor_id: len(series) for sensor_id, series in time_series_dict.items()
        }
        sensor_id = max(sensor_data_counts, key=sensor_data_counts.get)

    series = time_series_dict.get(sensor_id)
    if series is None or len(series) == 0:
        return None

    # Resample to hourly data
    hourly_data = series.resample("1H").mean()

    # Create a DataFrame with date as index and hour as columns
    pivot_data = hourly_data.reset_index()
    pivot_data["date"] = pivot_data["index"].dt.date
    pivot_data["hour"] = pivot_data["index"].dt.hour

    # Pivot the table
    calendar_data = pivot_data.pivot(index="date", columns="hour", values=0)

    # Plot the heatmap
    plt.figure(figsize=figsize)
    ax = sns.heatmap(calendar_data, cmap="viridis", robust=True)

    plt.title(f"Calendar Heatmap for Sensor {sensor_id}", fontsize=16)
    plt.xlabel("Hour of Day", fontsize=12)
    plt.ylabel("Date", fontsize=12)

    plt.tight_layout()
    return plt.gcf()


# Main function to create all visualizations
def create_time_window_visualizations(data_file="../test_data_1yr.pkl", window_size=24):
    """Create and return a list of visualizations for time window analysis"""
    # Load the data
    time_series_dict = load_data(data_file)

    # Create visualizations
    visualizations = []

    # 1. Data availability heatmap
    viz1 = visualize_data_availability(time_series_dict)
    visualizations.append(("data_availability", viz1))

    # 2. Time windows visualization for top sensors
    viz2 = visualize_time_windows(time_series_dict, window_size=window_size)
    visualizations.append(("time_windows", viz2))

    # 3. Window count distribution
    viz3 = visualize_window_distributions(time_series_dict, window_size=window_size)
    visualizations.append(("window_distribution", viz3))

    # 4. Window value distributions
    viz4 = visualize_window_value_distributions(
        time_series_dict, window_size=window_size
    )
    visualizations.append(("value_distribution", viz4))

    # 5. Calendar heatmap for the sensor with most data
    viz5 = visualize_calendar_heatmap(time_series_dict)
    visualizations.append(("calendar_heatmap", viz5))

    return visualizations

In [None]:
visualizations = create_time_window_visualizations()

for name, fig in visualizations:
    plt.figure(fig.number)
    plt.savefig(f"{name}.png", dpi=300, bbox_inches="tight")
    plt.close(fig)

In [None]:
import numpy as np
import pandas as pd
import pickle
from datetime import datetime, timedelta
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# # Helper function to load data from file
# def load_data(file_path='test_data_1yr.pkl'):
#     with open(file_path, 'rb') as f:
#         return pickle.load(f)

# # Find continuous segments in time series
# def find_continuous_segments(time_index, values, gap_threshold=pd.Timedelta(minutes=15)):
#     segments = []
#     start_idx = 0

#     for i in range(1, len(time_index)):
#         time_diff = time_index[i] - time_index[i-1]

#         # Check for gaps in time or values
#         if (time_diff > gap_threshold) or (np.isnan(values[i-1]) or np.isnan(values[i])):
#             if i - start_idx >= 24:  # Assuming minimum window size of 24
#                 segments.append((start_idx, i))
#             start_idx = i

#     # Add the last segment if it's long enough
#     if len(time_index) - start_idx >= 24:
#         segments.append((start_idx, len(time_index)))

#     return segments

# # Create an interactive data availability heatmap
# def interactive_data_availability(time_series_dict):
#     """Create an interactive heatmap showing data availability across sensors over time"""
#     # Get all unique timestamps from all sensors
#     all_timestamps = set()
#     for sensor_id, series in time_series_dict.items():
#         all_timestamps.update(series.index)

#     all_timestamps = sorted(all_timestamps)

#     # Create a DataFrame with all timestamps and fill with NaN
#     data_matrix = pd.DataFrame(index=all_timestamps)

#     # For each sensor, add a column to the DataFrame
#     for sensor_id, series in time_series_dict.items():
#         data_matrix[sensor_id] = np.nan
#         # Only fill in data that exists
#         data_matrix.loc[series.index, sensor_id] = 1

#     # Resample to a lower resolution for better visualization if too many datapoints
#     if len(all_timestamps) > 1000:
#         data_matrix = data_matrix.resample('1H').mean()

#     # Convert to long format for plotly
#     data_long = data_matrix.reset_index().melt(
#         id_vars='index',
#         var_name='sensor_id',
#         value_name='has_data'
#     )

#     # Create the heatmap with plotly
#     fig = px.density_heatmap(
#         data_long,
#         x='index',
#         y='sensor_id',
#         z='has_data',
#         color_continuous_scale=[
#             [0, 'rgba(255,255,255,0)'],  # Transparent for NaN
#             [0.5, 'rgba(222,235,247,1)'],  # Light blue
#             [1, 'rgba(49,130,189,1)']      # Dark blue
#         ],
#         title='Data Availability Across Sensors (Interactive)',
#         labels={'index': 'Date', 'sensor_id': 'Sensor ID', 'has_data': 'Data Available'}
#     )

#     # Update layout
#     fig.update_layout(
#         height=800,
#         xaxis_title='Date',
#         yaxis_title='Sensor ID',
#         title_x=0.5,
#         coloraxis_showscale=False
#     )

#     return fig

# # Create interactive window visualization for a given sensor
# def interactive_sensor_windows(time_series_dict, sensor_id, window_size=24, stride=1):
#     """Create an interactive visualization of windows for a specific sensor"""
#     series = time_series_dict.get(sensor_id)
#     if series is None or len(series) == 0:
#         return None

#     # Find continuous segments
#     segments = find_continuous_segments(series.index, series.values)

#     # Create a figure
#     fig = go.Figure()

#     # Add the raw time series
#     fig.add_trace(go.Scatter(
#         x=series.index,
#         y=series.values,
#         mode='lines',
#         name='Raw Data',
#         line=dict(color='darkgray')
#     ))

#     # Add segments and windows
#     for start_seg, end_seg in segments:
#         segment_indices = series.index[start_seg:end_seg]

#         # Add segment highlight
#         fig.add_trace(go.Scatter(
#             x=[segment_indices[0], segment_indices[0], segment_indices[-1], segment_indices[-1]],
#             y=[series.values.min(), series.values.max(), series.values.max(), series.values.min()],
#             fill="toself",
#             mode='none',
#             name=f'Segment: {segment_indices[0].date()} to {segment_indices[-1].date()}',
#             fillcolor='rgba(144,238,144,0.2)',
#             showlegend=True
#         ))

#         # Add a few example windows
#         n_windows = len(segment_indices) - window_size + 1

#         # Only show a few windows to avoid overcrowding
#         window_step = max(1, n_windows // 5)

#         for i in range(0, n_windows, window_step):
#             window_start = segment_indices[i]
#             window_end = segment_indices[i + window_size - 1]

#             fig.add_trace(go.Scatter(
#                 x=[window_start, window_start, window_end, window_end],
#                 y=[series.values.min(), series.values.max(), series.values.max(), series.values.min()],
#                 fill="toself",
#                 mode='none',
#                 name=f'Window: {window_start}',
#                 fillcolor='rgba(0,0,255,0.1)',
#                 showlegend=False
#             ))

#     # Update layout
#     fig.update_layout(
#         title=f'Time Windows for Sensor {sensor_id}',
#         xaxis_title='Date',
#         yaxis_title='Traffic Count',
#         height=600,
#         legend=dict(
#             orientation="h",
#             yanchor="bottom",
#             y=1.02,
#             xanchor="right",
#             x=1
#         )
#     )

#     return fig

# # Create a dashboard with multiple sensor window visualizations
# def interactive_window_dashboard(time_series_dict, window_size=24, n_sensors=4):
#     """Create a dashboard with window visualizations for multiple sensors"""
#     # Identify sensors with most data points
#     sensor_data_counts = {sensor_id: len(series) for sensor_id, series in time_series_dict.items()}
#     top_sensors = sorted(sensor_data_counts, key=sensor_data_counts.get, reverse=True)[:n_sensors]

#     # Create subplots
#     fig = make_subplots(
#         rows=n_sensors,
#         cols=1,
#         subplot_titles=[f'Sensor {sensor_id}' for sensor_id in top_sensors],
#         vertical_spacing=0.1
#     )

#     # Add data for each sensor
#     for i, sensor_id in enumerate(top_sensors):
#         series = time_series_dict[sensor_id]

#         # Add the raw time series
#         fig.add_trace(
#             go.Scatter(
#                 x=series.index,
#                 y=series.values,
#                 mode='lines',
#                 name=f'Sensor {sensor_id}',
#                 line=dict(color='darkgray')
#             ),
#             row=i+1,
#             col=1
#         )

#         # Find continuous segments
#         segments = find_continuous_segments(series.index, series.values)

#         # Add segment highlights for one example segment
#         for j, (start_seg, end_seg) in enumerate(segments):
#             if j > 2:  # Limit to first 3 segments to avoid overcrowding
#                 break

#             segment_indices = series.index[start_seg:end_seg]

#             # Add segment highlight
#             fig.add_trace(
#                 go.Scatter(
#                     x=[segment_indices[0], segment_indices[0], segment_indices[-1], segment_indices[-1], segment_indices[0]],
#                     y=[series.min(), series.max(), series.max(), series.min(), series.min()],
#                     fill="toself",
#                     mode='none',
#                     name=f'S{sensor_id} Segment {j+1}',
#                     fillcolor=f'rgba(144,238,144,0.2)',
#                     showlegend=True
#                 ),
#                 row=i+1,
#                 col=1
#             )

#     # Update layout
#     fig.update_layout(
#         height=300*n_sensors,
#         title_text="Time Windows Across Multiple Sensors",
#         showlegend=True,
#         legend=dict(orientation="h", y=-0.1)
#     )

#     return fig

# # Create a PCA visualization of sensor windows
# def visualize_window_pca(time_series_dict, window_size=24, n_sensors=10):
#     """Create a PCA visualization of sensor windows to see patterns"""
#     # Collect window data
#     windows_data = []
#     sensor_ids = []

#     # Process top sensors
#     sensor_data_counts = {sensor_id: len(series) for sensor_id, series in time_series_dict.items()}
#     top_sensors = sorted(sensor_data_counts, key=sensor_data_counts.get, reverse=True)[:n_sensors]

#     for sensor_id in top_sensors:
#         series = time_series_dict[sensor_id]

#         # Find continuous segments
#         segments = find_continuous_segments(series.index, series.values)

#         # Extract windows
#         for start_seg, end_seg in segments:
#             segment_values = series.values[start_seg:end_seg]

#             # Create windows
#             for i in range(0, len(segment_values) - window_size + 1, window_size//2):  # 50% overlap
#                 window = segment_values[i:i+window_size]

#                 if not np.isnan(window).any():  # Skip windows with NaN values
#                     windows_data.append(window)
#                     sensor_ids.append(sensor_id)

#     if not windows_data:
#         return None

#     # Convert to numpy array
#     X = np.array(windows_data)

#     # Normalize the data
#     scaler = StandardScaler()
#     X_scaled = scaler.fit_transform(X)

#     # Apply PCA
#     pca = PCA(n_components=2)
#     X_pca = pca.fit_transform(X_scaled)

#     # Create a DataFrame for plotting
#     pca_df = pd.DataFrame({
#         'PC1': X_pca[:, 0],
#         'PC2': X_pca[:, 1],
#         'sensor_id': sensor_ids
#     })

#     # Create a scatter plot
#     fig = px.scatter(
#         pca_df,
#         x='PC1',
#         y='PC2',
#         color='sensor_id',
#         title='PCA of Sensor Windows',
#         labels={'PC1': f'PC1 ({pca.explained_variance_ratio_[0]:.2%} variance)',
#                 'PC2': f'PC2 ({pca.explained_variance_ratio_[1]:.2%} variance)'},
#         hover_data=['sensor_id']
#     )

#     fig.update_layout(height=700, width=900)

#     return fig

# # Create a window count bar chart
# def interactive_window_counts(time_series_dict, window_size=24, n_sensors=20):
#     """Create an interactive bar chart of window counts by sensor"""
#     # Count windows per sensor
#     window_counts = {}

#     for sensor_id, series in time_series_dict.items():
#         # Find segments without large gaps
#         segments = find_continuous_segments(series.index, series.values)

#         # Count windows
#         total_windows = 0
#         for start_seg, end_seg in segments:
#             segment_len = end_seg - start_seg
#             total_windows += max(0, segment_len - window_size + 1)

#         window_counts[sensor_id] = total_windows

#     # Sort by window count
#     sorted_counts = sorted(window_counts.items(), key=lambda x: x[1], reverse=True)[:n_sensors]

#     # Create a DataFrame
#     count_df = pd.DataFrame(sorted_counts, columns=['sensor_id', 'window_count'])

#     # Create a bar chart
#     fig = px.bar(
#         count_df,
#         x='sensor_id',
#         y='window_count',
#         title=f'Number of Available Windows (size={window_size}) by Sensor',
#         labels={'sensor_id': 'Sensor ID', 'window_count': 'Number of Windows'},
#         color='window_count',
#         color_continuous_scale=px.colors.sequential.Viridis
#     )

#     fig.update_layout(height=600, xaxis_tickangle=-45)

#     return fig

# # Create a heatmap of daily patterns
# def visualize_daily_patterns(time_series_dict, n_sensors=6):
#     """Create a heatmap of daily patterns for top sensors"""
#     # Identify sensors with most data points
#     sensor_data_counts = {sensor_id: len(series) for sensor_id, series in time_series_dict.items()}
#     top_sensors = sorted(sensor_data_counts, key=sensor_data_counts.get, reverse=True)[:n_sensors]

#     # Create subplots
#     fig = make_subplots(
#         rows=n_sensors,
#         cols=1,
#         subplot_titles=[f'Sensor {sensor_id} - Daily Pattern' for sensor_id in top_sensors],
#         vertical_spacing=0.08
#     )

#     # Process each sensor
#     for i, sensor_id in enumerate(top_sensors):
#         series = time_series_dict[sensor_id]

#         # Create a DataFrame with hour and day of week
#         df = pd.DataFrame({
#             'value': series.values,
#             'hour': series.index.hour,
#             'day_of_week': series.index.dayofweek
#         })

#         # Group by hour and day of week
#         pivot_data = df.pivot_table(
#             values='value',
#             index='day_of_week',
#             columns='hour',
#             aggfunc='mean'
#         ).fillna(0)

#         # Create heatmap
#         heatmap = go.Heatmap(
#             z=pivot_data.values,
#             x=pivot_data.columns,
#             y=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'],
#             colorscale='Viridis',
#             showscale=(i==0),  # Only show colorbar for first heatmap
#         )

#         fig.add_trace(heatmap, row=i+1, col=1)

#         # Update axes
#         fig.update_xaxes(title_text="Hour of Day" if i==n_sensors-1 else "", row=i+1, col=1)
#         fig.update_yaxes(title_text="Day of Week", row=i+1, col=1)

#     # Update layout
#     fig.update_layout(
#         height=250*n_sensors,
#         title_text="Daily Traffic Patterns Across Sensors"
#     )

#     return fig

# # Function to create all interactive visualizations
# def create_interactive_visualizations(data_file='test_data_1yr.pkl', window_size=24):
#     """Create and return a list of interactive visualizations for time window analysis"""
#     # Load the data
#     time_series_dict = load_data(data_file)

#     # Create visualizations
#     visualizations = []

#     # 1. Data availability heatmap
#     viz1 = interactive_data_availability(time_series_dict)
#     visualizations.append(('data_availability', viz1))

#     # 2. Window dashboard for top sensors
#     viz2 = interactive_window_dashboard(time_series_dict, window_size=window_size)
#     visualizations.append(('window_dashboard', viz2))

#     # 3. Window counts
#     viz3 = interactive_window_counts(time_series_dict, window_size=window_size)
#     visualizations.append(('window_counts', viz3))

#     # 4. PCA of windows
#     viz4 = visualize_window_pca(time_series_dict, window_size=window_size)
#     visualizations.append(('window_pca', viz4))

#     # 5. Daily patterns heatmap
#     viz5 = visualize_daily_patterns(time_series_dict)
#     visualizations.append(('daily_patterns', viz5))

#     return visualizations

# # Create a comprehensive dashboard combining multiple visualizations
# def create_sensor_window_dashboard(data_file='test_data_1yr.pkl', window_size=24):
#     """Create a comprehensive dashboard for analyzing sensor time windows"""
#     # Load the data
#     time_series_dict = load_data(data_file)

#     # Identify sensors with most data points for individual analysis
#     sensor_data_counts = {sensor_id: len(series) for sensor_id, series in time_series_dict.items()}
#     top_sensor = max(sensor_data_counts, key=sensor_data_counts.get)

#     # Create a multi-page dashboard using HTML and Plotly
#     from plotly.io import to_html
#     import plotly.io as pio

#     # Set theme
#     pio.templates.default = "plotly_white"

#     # Create individual visualizations
#     data_avail_fig = interactive_data_availability(time_series_dict)
#     top_sensor_fig = interactive_sensor_windows(time_series_dict, top_sensor, window_size)
#     window_counts_fig = interactive_window_counts(time_series_dict, window_size)
#     daily_patterns_fig = visualize_daily_patterns(time_series_dict, n_sensors=4)

#     # Try to create PCA visualization if possible
#     try:
#         pca_fig = visualize_window_pca(time_series_dict, window_size)
#     except:
#         pca_fig = None

#     # Combine into HTML
#     html_content = f"""
#     <!DOCTYPE html>
#     <html>
#     <head>
#         <title>Sensor Window Analysis Dashboard</title>
#         <style>
#             body {{
#                 font-family: Arial, sans-serif;
#                 margin: 0;
#                 padding: 20px;
#                 background-color: #f5f5f5;
#             }}
#             .dashboard-container {{
#                 max-width: 1200px;
#                 margin: 0 auto;
#                 background-color: white;
#                 border-radius: 8px;
#                 overflow: hidden;
#                 box-shadow: 0 0 10px rgba(0,0,0,0.1);
#             }}
#             .dashboard-header {{
#                 background-color: #4C78A8;
#                 color: white;
#                 padding: 20px;
#                 text-align: center;
#             }}
#             .dashboard-section {{
#                 padding: 20px;
#                 margin-bottom: 20px;
#                 border-bottom: 1px solid #eee;
#             }}
#             h1 {{
#                 margin: 0;
#             }}
#             h2 {{
#                 color: #2C3E50;
#                 margin-top: 0;
#             }}
#             .viz-container {{
#                 margin-top: 20px;
#             }}
#         </style>
#     </head>
#     <body>
#         <div class="dashboard-container">
#             <div class="dashboard-header">
#                 <h1>Sensor Time Window Analysis Dashboard</h1>
#                 <p>Window Size: {window_size} time steps</p>
#             </div>

#             <div class="dashboard-section">
#                 <h2>Data Availability Overview</h2>
#                 <p>This heatmap shows when data is available across all sensors. Darker blue indicates data availability.</p>
#                 <div class="viz-container">
#                     {to_html(data_avail_fig, include_plotlyjs='cdn', full_html=False)}
#                 </div>
#             </div>

#             <div class="dashboard-section">
#                 <h2>Time Windows for Top Sensor (ID: {top_sensor})</h2>
#                 <p>This visualization shows the raw data for the sensor with the most data points, highlighting continuous segments and example windows.</p>
#                 <div class="viz-container">
#                     {to_html(top_sensor_fig, include_plotlyjs='cdn', full_html=False)}
#                 </div>
#             </div>

#             <div class="dashboard-section">
#                 <h2>Window Count Distribution</h2>
#                 <p>This bar chart shows the number of available windows for each sensor.</p>
#                 <div class="viz-container">
#                     {to_html(window_counts_fig, include_plotlyjs='cdn', full_html=False)}
#                 </div>
#             </div>

#             <div class="dashboard-section">
#                 <h2>Daily Traffic Patterns</h2>
#                 <p>These heatmaps show the average traffic patterns by hour of day and day of week for top sensors.</p>
#                 <div class="viz-container">
#                     {to_html(daily_patterns_fig, include_plotlyjs='cdn', full_html=False)}
#                 </div>
#             </div>
#     """

#     # Add PCA visualization if available
#     if pca_fig is not None:
#         html_content += f"""
#             <div class="dashboard-section">
#                 <h2>PCA of Sensor Windows</h2>
#                 <p>This scatter plot shows a 2D representation of the window patterns across sensors using Principal Component Analysis.</p>
#                 <div class="viz-container">
#                     {to_html(pca_fig, include_plotlyjs='cdn', full_html=False)}
#                 </div>
#             </div>
#         """

#     # Close HTML
#     html_content += """
#         </div>
#     </body>
#     </html>
#     """

#     # Return the HTML content
#     return html_content

In [None]:
# # Or create a complete dashboard
# dashboard_html = create_sensor_window_dashboard(data_file='../test_data_1yr.pkl', window_size=24)

In [None]:
# # Save the HTML to a file
# with open('sensor_dashboard.html', 'w') as f:
#     f.write(dashboard_html)

# print("Dashboard saved to sensor_dashboard.html")