In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt
from plotly.offline import plot

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from utils import save_plotly_figure_as_html
from ex_01_read_data import get_welding_data



In [3]:
data_path = Path("data/Welding/data.csv")
plot_path = Path("plots/ex_02")

## Exercise 2.1: Dataset Statistics Analysis

In this exercise, we'll calculate and present comprehensive descriptive statistics for the welding dataset. This statistical overview will help us understand the fundamental characteristics of our data before visualization or modeling.

1. Calculate the class distribution to understand data balance
2. Count samples per experiment to assess data volume across experimental conditions
3. Determine min/max values for voltage and current to understand the range of measurements
4. Calculate mean and standard deviation to assess central tendency and variability
5. Find median values to identify central points unaffected by outliers

In [4]:
data, labels, exp_ids = get_welding_data(data_path)

In [None]:
# 1. Class Distribution
unique_labels, label_counts = np.unique(labels, return_counts=True)
class_distribution = dict(zip(unique_labels, label_counts))
print("1. Class Distribution:")
for label, count in class_distribution.items():
    print(f"  Label {label}: {count} samples")

# 2. Samples per Experiment
unique_exps, exp_counts = np.unique(exp_ids, return_counts=True)
exp_distribution = dict(zip(unique_exps, exp_counts))
print("\n2. Samples per Experiment:")
for exp_id, count in exp_distribution.items():
    print(f"  Experiment {exp_id}: {count} samples")

current = data[:, :, 0]
voltage = data[:, :, 1]

current_flat = current.flatten()
voltage_flat = voltage.flatten()

# 3. Min/Max values
print("\n3. Min/Max Values:")
print(f"  Current: min = {current_flat.min():.4f}, max = {current_flat.max():.4f}")
print(f"  Voltage: min = {voltage_flat.min():.4f}, max = {voltage_flat.max():.4f}")

# 4. Mean and Standard Deviation
print("\n4. Mean and Standard Deviation:")
print(f"  Current: mean = {current_flat.mean():.4f}, std = {current_flat.std():.4f}")
print(f"  Voltage: mean = {voltage_flat.mean():.4f}, std = {voltage_flat.std():.4f}")

# 5. Median Values
print("\n5. Median Values:")
print(f"  Current: median = {np.median(current_flat):.4f}")
print(f"  Voltage: median = {np.median(voltage_flat):.4f}")

1. Class Distribution:
  Label 0: 90746 samples
  Label 1: 67219 samples

2. Samples per Experiment:
  Experiment 1: 35521 samples
  Experiment 2: 17913 samples
  Experiment 3: 46768 samples
  Experiment 4: 57763 samples

3. Min/Max Values:
  Current: min = -1.1808, max = 614.1541
  Voltage: min = 0.0741, max = 101.4852

4. Mean and Standard Deviation:
  Current: mean = 173.4557, std = 180.8409
  Voltage: mean = 22.0372, std = 6.0248

5. Median Values:
  Current: median = 68.5884
  Voltage: median = 20.8414


## Exercise 2.2: Current and Voltage Distribution Visualization

In this exercise, we'll create interactive boxplots to visualize and compare the distributions of voltage and current measurements in the welding data. Boxplots will help us identify central tendencies, spread, and potential outliers in our measurements.

1. Create side-by-side boxplots for voltage and current using Plotly
2. Display key statistics (median, quartiles, etc.) in a visual format in the plot
3. Enable interactive exploration of the distributions
4. Save the visualization for future reference

In [6]:
data, labels, exp_ids = get_welding_data(data_path, n_samples=10_000)

save_path =  plot_path / "voltage_current_distribution"

In [None]:
# Flatten current and voltage values
current = data[:, :, 0].flatten()
voltage = data[:, :, 1].flatten()

# Prepare data for Plotly
df = pd.DataFrame({
    "Current (A)": current,
    "Voltage (V)": voltage
})

# Melt into long format for grouped boxplot
df_melted = df.melt(var_name="Measurement", value_name="Value")

# Create side-by-side boxplots
fig = px.box(
    df_melted,
    x="Measurement",
    y="Value",
    points="outliers",  # show individual outliers
    color="Measurement",
    title="Current and Voltage Distribution (Boxplot)",
    boxmode="group"
)

fig.update_layout(
    yaxis_title="Value",
    xaxis_title="Measurement Type",
    showlegend=False
)

plot(fig)

save_plotly_figure_as_html(fig, save_path)
print(f"Plot saved to: {save_path.with_suffix('.html')}")

Plot saved to: plots/ex_02/voltage_current_distribution.html


## Exercise 2.3: Time-Series Sequence Visualization

In this exercise, we'll implement functions to visualize the time-series patterns of welding cycles. These visualizations will help us understand the temporal relationships between voltage and current during the welding process and identify patterns associated with quality outcomes.

1. Create dual-axis plots showing voltage and current over time (10 welding cycles -> sequence_length=10) using matplotlib
2. Implement clear legends and labels for data interpretation
3. Enable saving of visualizations for reporting and comparison

In [None]:

data, labels, exp_ids = get_welding_data(data_path, n_samples=100, return_sequences=True, sequence_length=10)
save_path = plot_path / "welding_sample.png"    

In [None]:
def plot_welding_sequence(sequence: np.ndarray, label: int, save_path: Path | None = None):
    """
    Plot a single welding sequence with dual y-axes for voltage and current.

    Args:
        sequence (np.ndarray): Array of shape (sequence_length, 2) containing current and voltage.
        label (int): Class label for the sequence.
        save_path (Path | None): If provided, save the plot to this path.
    """
    current = sequence[:, 0]
    voltage = sequence[:, 1]
    time = np.arange(len(current))

    fig, ax1 = plt.subplots(figsize=(10, 5))

    color1 = 'tab:blue'
    ax1.set_xlabel("Time Step")
    ax1.set_ylabel("Current (A)", color=color1)
    ax1.plot(time, current, color=color1, label="Current")
    ax1.tick_params(axis='y', labelcolor=color1)

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

    color2 = 'tab:red'
    ax2.set_ylabel("Voltage (V)", color=color2)
    ax2.plot(time, voltage, color=color2, label="Voltage")
    ax2.tick_params(axis='y', labelcolor=color2)

    plt.title(f"Welding Cycle (Label: {label})")
    fig.tight_layout()

    # Legends
    ax1.legend(loc="upper left")
    ax2.legend(loc="upper right")

    if save_path:
        save_path.parent.mkdir(parents=True, exist_ok=True)
        plt.savefig(save_path, dpi=300)
        print(f"Saved plot to {save_path}")
    else:
        plt.show()

    plt.close()

sequence = data[0]
label = labels[0][0]
plot_welding_sequence(sequence, label, save_path)

Saved plot to plots/ex_02/welding_sample.png


## Exercise 2.4: Interactive Time-Series Visualization with Plotly

In this exercise, we'll create enhanced interactive visualizations of welding samples using Plotly. These interactive plots will provide more advanced exploration capabilities for analyzing the time-series patterns.

1. Create interactive plots with dual y-axes for voltage and current
2. Implement time-based range sliders for detailed exploration
3. Add unified tooltips for precise data reading
4. Display quality metrics in the plot title
5. Save interactive visualizations as HTML for sharing

In [13]:
def create_plotly_plot(data: np.ndarray, labels: np.ndarray, exp_ids: np.ndarray = None) -> go.Figure:
    """
    Create an interactive Plotly visualization of a random welding sample.

    Args:
        data (np.ndarray): Array containing voltage and current data
        labels (np.ndarray): Array containing class labels
        exp_ids (np.ndarray, optional): Array containing experiment IDs. Defaults to None.

    Returns:
        plotly.graph_objects.Figure: Interactive Plotly figure object
    """
    # Select a random sequence
    idx = np.random.randint(len(data))
    sequence = data[idx]
    label = labels[idx][0]
    exp_id = exp_ids[idx][0] if exp_ids is not None else "N/A"

    current = sequence[:, 0]
    voltage = sequence[:, 1]
    time = np.arange(len(current))

    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    # Add current (primary y-axis)
    fig.add_trace(
        go.Scatter(x=time, y=current, name="Current (A)", mode="lines", line=dict(color="blue")),
        secondary_y=False
    )

    # Add voltage (secondary y-axis)
    fig.add_trace(
        go.Scatter(x=time, y=voltage, name="Voltage (V)", mode="lines", line=dict(color="red")),
        secondary_y=True
    )

    # Update layout
    fig.update_layout(
        title_text=f"Welding Sample | Label: {label} | Experiment ID: {exp_id}",
        xaxis_title="Time Step",
        yaxis_title="Current (A)",
        yaxis2_title="Voltage (V)",
        legend=dict(x=0.01, y=0.99),
        hovermode="x unified",
        xaxis=dict(
            rangeslider=dict(visible=True),
            type="linear"
        ),
        template="plotly_white"
    )

    return fig


fig = create_plotly_plot(data, labels, exp_ids)
save_plotly_figure_as_html(fig, plot_path / "welding_samples")
fig.show()

## Exercise 2.5: Multiple Sample Comparison

In this exercise, we'll generate and compare visualizations from multiple random welding samples. This comparison will help us identify common patterns and variations across different welding cycles.
 

1. Generate multiple random sample visualizations using matplotlib of single welding cycles
2. Create dual-axis plots showing voltage and current over time
3. Implement clear legends and labels for data interpretation
4. Save each visualization for comparison

In [None]:
data, labels, exp_ids = get_welding_data(data_path, n_samples=1_000)

In [15]:
for i in range(5):
    plot_random_sample(data, labels, save_path=plot_path / f"welding_sample_{i}.png")
    

Saved plot to plots/ex_02/welding_sample_0.png
Saved plot to plots/ex_02/welding_sample_1.png
Saved plot to plots/ex_02/welding_sample_2.png
Saved plot to plots/ex_02/welding_sample_3.png
Saved plot to plots/ex_02/welding_sample_4.png


In [14]:
def plot_random_sample(data: np.ndarray, labels: np.ndarray, save_path: Path | None = None):
    """
    Plot a random welding sample with dual y-axes (current and voltage) using matplotlib.

    Args:
        data (np.ndarray): Array of shape (n_samples, timesteps, 2), with current and voltage.
        labels (np.ndarray): Array of class labels, shape (n_samples,) or (n_samples, timesteps).
        save_path (Path | None): If provided, save the plot to this path.
    """
    idx = np.random.randint(len(data))
    sample = data[idx]
    label = labels[idx][0]

    return plot_welding_sequence(sample, label, save_path)