In [1]:
import os
import numpy as np
import pandas as pd
import torch  # GPU support

def load_ieee_phm_vibration_data(directory, use_gpu=True):
    """
    Loads vibration signal data from the IEEE PHM 2012 Bearing Dataset with correct separator.

    Parameters:
        directory (str): Path to the dataset directory in Kaggle.
        use_gpu (bool): Whether to load data onto GPU.

    Returns:
        data_dict (dict): Dictionary where keys are file paths and values are vibration signal tensors.
    """
    data_dict = {}

    # Check if GPU is available
    device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")

    # Walk through all subdirectories (e.g., Bearing1_1, Bearing1_2, etc.)
    for bearing_folder in sorted(os.listdir(directory)):
        bearing_path = os.path.join(directory, bearing_folder)
        if os.path.isdir(bearing_path):  # Ensure it's a directory
            print(f"\n📂 Processing Bearing Folder: {bearing_folder}")
            data_dict[bearing_folder] = []
            
            # Read all CSV files inside each bearing folder
            for file_name in sorted(os.listdir(bearing_path)):
                if file_name.startswith("acc") and file_name.endswith(".csv"):  # Load only vibration files
                    file_path = os.path.join(bearing_path, file_name)
                    
                    try:
                        # print(f"🔄 Loading vibration file: {file_name}")

                        # Load CSV file using COMMA separator
                        df = pd.read_csv(file_path, sep=",", header=None, dtype=str, engine="python")

                        # print(f"\n🔹 Raw CSV Data from {file_name} (First 5 rows):\n{df.head()}\n")

                        # Ensure we select only numeric acceleration columns
                        # print(f"🔍 CSV Shape Before Processing: {df.shape}")

                        # Drop timestamp columns (Assuming first 4 columns are time-related)
                        df = df.iloc[:, -2:]  # Keep only the last 2 columns (Horizontal & Vertical acceleration)

                        # Convert to numeric values
                        df = df.apply(pd.to_numeric, errors="coerce").dropna()

                        # print(f"🔹 Processed CSV Data (First 5 rows after numeric conversion):\n{df.head()}\n")

                        if df.shape[0] == 0 or df.shape[1] != 2:
                            print(f"⚠️ Skipping {file_name} - No valid numeric data found.")
                            continue

                        # Convert to numpy array and flatten
                        signal_data = df.values.flatten()
                        # print(f"✅ Loaded {file_name}: Shape {signal_data.shape}")

                        # Convert to PyTorch Tensor and move to GPU if available
                        signal_tensor = torch.tensor(signal_data, dtype=torch.float32).to(device)

                        # Store in dictionary (append to list)
                        data_dict[bearing_folder].append(signal_tensor)

                    except Exception as e:
                        print(f"❌ Error loading {file_path}: {e}")
                        continue

    return data_dict, device

# Set the dataset directory (Update this path based on your Kaggle dataset location)
dataset_directory = "/kaggle/input/ieee-phm-2012-data-challenge/ieee-phm-2012-data-challenge-dataset-master/Learning_set"

# Load the dataset (GPU enabled) with corrected separator
bearing_data, device = load_ieee_phm_vibration_data(dataset_directory, use_gpu=True)

# Display available bearings and sample data shape
print(f"\n🚀 Using device: {device}")
print("✅ Loaded Bearings:", list(bearing_data.keys())[:5])

# Check if any valid data exists
for bearing, signals in bearing_data.items():
    if len(signals) > 0:
        print(f"📊 {bearing} First CSV Shape: {signals[0].shape}")
    else:
        print(f"⚠️ {bearing} contains NO valid data!")



📂 Processing Bearing Folder: Bearing1_1

📂 Processing Bearing Folder: Bearing1_2

📂 Processing Bearing Folder: Bearing2_1

📂 Processing Bearing Folder: Bearing2_2

📂 Processing Bearing Folder: Bearing3_1

📂 Processing Bearing Folder: Bearing3_2

🚀 Using device: cuda
✅ Loaded Bearings: ['Bearing1_1', 'Bearing1_2', 'Bearing2_1', 'Bearing2_2', 'Bearing3_1']
📊 Bearing1_1 First CSV Shape: torch.Size([5120])
📊 Bearing1_2 First CSV Shape: torch.Size([5120])
📊 Bearing2_1 First CSV Shape: torch.Size([5120])
📊 Bearing2_2 First CSV Shape: torch.Size([5120])
📊 Bearing3_1 First CSV Shape: torch.Size([5120])
📊 Bearing3_2 First CSV Shape: torch.Size([5120])


In [2]:
import pywt
import torch
import numpy as np

def apply_wavelet_transform_paper(data_dict, wavelet="db8", level=1, trunc_length=1280):
    """
    Applies Discrete Wavelet Transform (DWT) using 'db8' at level 1 and ensures correct truncation as per the paper.

    Parameters:
        data_dict (dict): Dictionary containing vibration signal tensors.
        wavelet (str): Type of wavelet to use (default: "db8").
        level (int): Decomposition level (fixed at 1 based on the paper).
        trunc_length (int): Fixed length to truncate the coefficients to (default: 1280).

    Returns:
        wavelet_dict (dict): Dictionary with transformed and truncated wavelet coefficients.
    """
    wavelet_dict = {}

    for bearing, signals in data_dict.items():
        print(f"\n🔄 Processing Bearing: {bearing}")
        wavelet_dict[bearing] = []

        for signal_tensor in signals:
            # Convert PyTorch tensor to NumPy for processing
            signal_np = signal_tensor.cpu().numpy()

            # **Step 1: Apply Discrete Wavelet Transform (DWT) at level 1**
            coeffs = pywt.wavedec(signal_np, wavelet, level=level)

            # **Step 2: Extract Approximate & Detail Coefficients**
            approx_coeffs = coeffs[0]  # Approximate Coefficients (Low-Frequency)
            detail_coeffs = coeffs[1]  # First-Level Detail Coefficients (High-Frequency)

            # **Step 3: Truncate half of each spectrum**
            trunc_approx = approx_coeffs[:trunc_length]
            trunc_detail = detail_coeffs[:trunc_length]

            # **Step 4: Stack into (2, 1280) format for CNN input**
            wavelet_processed = torch.tensor([trunc_approx, trunc_detail], dtype=torch.float32, device=signal_tensor.device)

            # Store processed wavelet data
            wavelet_dict[bearing].append(wavelet_processed)

            # print(f"✅ {bearing}: Level {level}, Original Shape {signal_np.shape} → Truncated Shape {wavelet_processed.shape}")

    return wavelet_dict

# Apply Wavelet Transform as per the paper with level=1
wavelet_data_paper = apply_wavelet_transform_paper(bearing_data, wavelet="db8", level=1, trunc_length=1280)

# 🔹 Print final shape for verification
print("\n🚀 Final Verification of Shapes:")
for bearing, signals in wavelet_data_paper.items():
    if len(signals) > 0:
        print(f"📊 {bearing} Processed Shape: {signals[0].shape}")
    else:
        print(f"⚠️ {bearing} contains NO processed wavelet data!")



🔄 Processing Bearing: Bearing1_1


  wavelet_processed = torch.tensor([trunc_approx, trunc_detail], dtype=torch.float32, device=signal_tensor.device)



🔄 Processing Bearing: Bearing1_2

🔄 Processing Bearing: Bearing2_1

🔄 Processing Bearing: Bearing2_2

🔄 Processing Bearing: Bearing3_1

🔄 Processing Bearing: Bearing3_2

🚀 Final Verification of Shapes:
📊 Bearing1_1 Processed Shape: torch.Size([2, 1280])
📊 Bearing1_2 Processed Shape: torch.Size([2, 1280])
📊 Bearing2_1 Processed Shape: torch.Size([2, 1280])
📊 Bearing2_2 Processed Shape: torch.Size([2, 1280])
📊 Bearing3_1 Processed Shape: torch.Size([2, 1280])
📊 Bearing3_2 Processed Shape: torch.Size([2, 1280])


In [3]:
import torch
import random
import numpy as np

def sliding_window_segmentation_fixed(data_dict, window_size=12, step_size=1):
    """
    Implements sliding window segmentation with separate positive and negative sample lists.
    
    Parameters:
        data_dict (dict): Dictionary containing wavelet-transformed vibration signal tensors.
        window_size (int): The size of each historical segment (default: 12).
        step_size (int): The step size for moving the window (default: 1).

    Returns:
        positive_samples (dict): Dictionary containing positive-order segments.
        negative_samples (dict): Dictionary containing reversed-order segments.
    """
    positive_samples = {}
    negative_samples = {}

    for bearing, signals in data_dict.items():
        print(f"\n🔄 Processing Sliding Window Segmentation for: {bearing}")
        positive_samples[bearing] = []
        negative_samples[bearing] = []

        for signal_tensor in signals:
            # Convert to NumPy and print original shape
            sequence = signal_tensor.cpu().numpy()  # Expecting (2, 1280)
            # print(f"📏 Original Shape: {sequence.shape}")  # Confirm it is (2, 1280)

            seq_length = sequence.shape[1]  # Time axis = 1280

            # **Step 1: Randomly Cut the First Two-Thirds of the Trajectory**
            cut_length = int(2/3 * seq_length)  # First two-thirds
            random_start = random.randint(0, cut_length - int(0.5 * cut_length))  # Random cut within first 2/3
            truncated_sequence = sequence[:, random_start:random_start + int(0.5 * cut_length)]  # Shape (2, N)

            # print(f"✂️ Truncated Sequence Shape: {truncated_sequence.shape}")  # Expecting (2, ~640)

            # **Step 2: Apply Sliding Window Segmentation Over TIME Dimension**
            num_segments = (truncated_sequence.shape[1] - window_size) // step_size + 1

            for i in range(num_segments):
                start_idx = i * step_size
                end_idx = start_idx + window_size

                # Extract the segment correctly over the TIME axis
                segment = truncated_sequence[:, start_idx:end_idx]  # Expecting (2, 12)
                # print(f"🪟 Segment {i} Shape: {segment.shape}")  # Should be (2, 12)

                # **Save Positive Sample**
                positive_samples[bearing].append(torch.tensor(segment, dtype=torch.float32, device=signal_tensor.device))

                # **Create Negative Sample (Reversed Order in Time Axis, with Copy Fix)**
                segment_reversed = np.flip(segment, axis=1).copy()  # Reverse time axis & make a copy
                negative_samples[bearing].append(torch.tensor(segment_reversed, dtype=torch.float32, device=signal_tensor.device))

            # print(f"✅ {bearing}: Generated {num_segments} Segments.")

    return positive_samples, negative_samples

# Apply Fixed Sliding Window Segmentation
positive_samples, negative_samples = sliding_window_segmentation_fixed(wavelet_data_paper, window_size=12, step_size=1)

# 🔹 Print final verification
print("\n🚀 Final Verification of Segmented Data:")
for bearing in positive_samples.keys():
    if len(positive_samples[bearing]) > 0:
        print(f"📊 {bearing} First Positive Sample Shape: {positive_samples[bearing][0].shape}")
        print(f"📊 {bearing} First Negative Sample Shape: {negative_samples[bearing][0].shape}")
    else:
        print(f"⚠️ {bearing} contains NO segmented data!")



🔄 Processing Sliding Window Segmentation for: Bearing1_1

🔄 Processing Sliding Window Segmentation for: Bearing1_2

🔄 Processing Sliding Window Segmentation for: Bearing2_1

🔄 Processing Sliding Window Segmentation for: Bearing2_2

🔄 Processing Sliding Window Segmentation for: Bearing3_1

🔄 Processing Sliding Window Segmentation for: Bearing3_2

🚀 Final Verification of Segmented Data:
📊 Bearing1_1 First Positive Sample Shape: torch.Size([2, 12])
📊 Bearing1_1 First Negative Sample Shape: torch.Size([2, 12])
📊 Bearing1_2 First Positive Sample Shape: torch.Size([2, 12])
📊 Bearing1_2 First Negative Sample Shape: torch.Size([2, 12])
📊 Bearing2_1 First Positive Sample Shape: torch.Size([2, 12])
📊 Bearing2_1 First Negative Sample Shape: torch.Size([2, 12])
📊 Bearing2_2 First Positive Sample Shape: torch.Size([2, 12])
📊 Bearing2_2 First Negative Sample Shape: torch.Size([2, 12])
📊 Bearing3_1 First Positive Sample Shape: torch.Size([2, 12])
📊 Bearing3_1 First Negative Sample Shape: torch.Size(

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SiameseCNN(nn.Module):
    def __init__(self):
        super(SiameseCNN, self).__init__()

        # 1D CNN to extract features from vibration signals
        self.conv1 = nn.Conv1d(in_channels=2, out_channels=64, kernel_size=3, padding=1)  # Input: (batch, 2, 12)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        
        # Global average pooling to reduce spatial dimensions
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        
        # Fully connected layers
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 64)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.global_pool(x).squeeze(-1)  # Shape: (batch, 256)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # Output: (batch, 64)
        return x


In [6]:
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, positive_features, negative_features):
        # Compute Euclidean distance between feature vectors
        distance = F.pairwise_distance(positive_features, negative_features, keepdim=True)
        
        # Loss encourages positive pairs to be close and negative pairs to be far apart
        loss = torch.mean((1) * torch.pow(distance, 2) + (1 - 1) * torch.pow(torch.clamp(self.margin - distance, min=0.0), 2))
        return loss


In [12]:
import random
import torch

# Define total number of pretext samples needed
TARGET_PRETEXT_SAMPLES = 251520

def sample_pretext_data(positive_samples, negative_samples, target_size):
    """
    Randomly selects 'target_size' samples from positive and negative datasets.

    Parameters:
        positive_samples (dict): Dictionary containing positive samples per bearing.
        negative_samples (dict): Dictionary containing negative samples per bearing.
        target_size (int): Total number of samples to randomly select.

    Returns:
        selected_positive (list): List of randomly chosen positive samples.
        selected_negative (list): List of randomly chosen negative samples.
    """
    # Flatten all samples into lists
    all_positive = []
    all_negative = []

    for bearing in positive_samples.keys():
        all_positive.extend(positive_samples[bearing])
        all_negative.extend(negative_samples[bearing])

    # Ensure equal selection from positive and negative datasets
    selected_positive = random.sample(all_positive, target_size // 2)
    selected_negative = random.sample(all_negative, target_size // 2)

    return selected_positive, selected_negative

# 🔄 Select a limited number of pretext samples
selected_positive, selected_negative = sample_pretext_data(positive_samples, negative_samples, TARGET_PRETEXT_SAMPLES)

# Move selected data to GPU for faster training
selected_positive = [sample.to(device) for sample in selected_positive]
selected_negative = [sample.to(device) for sample in selected_negative]

# Print dataset summary
print("\n📊 Updated Pretext Training Dataset:")
print(f"✅ Selected Positive Samples: {len(selected_positive)}")
print(f"✅ Selected Negative Samples: {len(selected_negative)}")



📊 Updated Pretext Training Dataset:
✅ Selected Positive Samples: 125760
✅ Selected Negative Samples: 125760


In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Detect and use GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"🚀 Using device: {device}")

class SiameseCNN(nn.Module):
    def __init__(self):
        super(SiameseCNN, self).__init__()

        # Move CNN layers to GPU
        self.conv1 = nn.Conv1d(in_channels=2, out_channels=64, kernel_size=3, padding=1).to(device)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1).to(device)
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1).to(device)
        
        self.global_pool = nn.AdaptiveAvgPool1d(1).to(device)
        
        self.fc1 = nn.Linear(256, 128).to(device)
        self.fc2 = nn.Linear(128, 64).to(device)

    def forward(self, x):
        x = x.to(device)  # Ensure input is on GPU
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.global_pool(x).squeeze(-1)  # Shape: (batch, 256)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # Output: (batch, 64)
        return x

class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, positive_features, negative_features):
        distance = F.pairwise_distance(positive_features, negative_features)
        loss = torch.mean(torch.pow(distance, 2))
        return loss

import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define batch size
batch_size = 64

# Convert selected data into TensorDataset and DataLoader
positive_dataset = TensorDataset(torch.stack(selected_positive))
negative_dataset = TensorDataset(torch.stack(selected_negative))

positive_loader = DataLoader(positive_dataset, batch_size=batch_size, shuffle=True)
negative_loader = DataLoader(negative_dataset, batch_size=batch_size, shuffle=True)

# Initialize model and optimizer
cnn_encoder = SiameseCNN().to(device)
contrastive_loss_fn = ContrastiveLoss().to(device)
optimizer = optim.Adam(cnn_encoder.parameters(), lr=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    total_loss = 0
    batch_count = 0

    for (pos_batch, neg_batch) in zip(positive_loader, negative_loader):
        pos_batch = pos_batch[0].to(device)  # Extract tensor from dataset and move to GPU
        neg_batch = neg_batch[0].to(device)

        # Forward pass
        pos_features = cnn_encoder(pos_batch)
        neg_features = cnn_encoder(neg_batch)

        # Compute contrastive loss
        loss = contrastive_loss_fn(pos_features, neg_features)
        total_loss += loss.item()

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_count += 1

    print(f"Epoch [{epoch+1}/{num_epochs}] | Loss: {total_loss:.10f} | Processed {batch_count} batches.")


🚀 Using device: cuda
Epoch [1/10] | Loss: 0.0053258782 | Processed 1965 batches.
Epoch [2/10] | Loss: 0.0000157883 | Processed 1965 batches.
Epoch [3/10] | Loss: 0.0000032113 | Processed 1965 batches.
Epoch [4/10] | Loss: 0.0000007437 | Processed 1965 batches.
Epoch [5/10] | Loss: 0.0000002920 | Processed 1965 batches.
Epoch [6/10] | Loss: 0.0000002346 | Processed 1965 batches.
Epoch [7/10] | Loss: 0.0000001529 | Processed 1965 batches.
Epoch [8/10] | Loss: 0.0000001328 | Processed 1965 batches.
Epoch [9/10] | Loss: 0.0000001263 | Processed 1965 batches.
Epoch [10/10] | Loss: 0.0000001189 | Processed 1965 batches.
