In [1]:
import pandas as pd
import numpy as np   

In [None]:
! pip install dtw-python
! pip install fastdtw

In [90]:
# function for renaming columns for consistency in the processing
def rename_columns(df):
    column_mapping = {
        # Time Column
        "Time (s)": "time",

        # Right Sensor Data
        "HS/1 Switchback - High Speed Data/Row 1 Right (Counts)": "row1_right",
        "HS/1 Switchback - High Speed Data/Row 2 Right (Counts)": "row2_right",
        "HS/1 Switchback - High Speed Data/Row 3 Right (Counts)": "row3_right",
        "HS/1 Switchback - High Speed Data/Row 4 Right (Counts)": "row4_right",
        "HS/1 Switchback - High Speed Data/Row 5 Right (Counts)": "row5_right",
        "HS/1 Switchback - High Speed Data/Row 6 Right (Counts)": "row6_right",
        "HS/1 Switchback - High Speed Data/Row 7 Right (Counts)": "row7_right",
        "HS/1 Switchback - High Speed Data/Row 8 Right (Counts)": "row8_right",
        "HS/1 Switchback - High Speed Data/Row 9 Right (Counts)": "row9_right",
        "HS/1 Switchback - High Speed Data/Row 10 Right (Counts)": "row10_right",
        "HS/1 Switchback - High Speed Data/Row 11 Right (Counts)": "row11_right",
        "HS/1 Switchback - High Speed Data/Row 12 Right (Counts)": "row12_right",

        # Left Sensor Data
        "HS/1 Switchback - High Speed Data/Row 1 Left (Counts)": "row1_left",
        "HS/1 Switchback - High Speed Data/Row 2 Left (Counts)": "row2_left",
        "HS/1 Switchback - High Speed Data/Row 3 Left (Counts)": "row3_left",
        "HS/1 Switchback - High Speed Data/Row 4 Left (Counts)": "row4_left",
        "HS/1 Switchback - High Speed Data/Row 5 Left (Counts)": "row5_left",
        "HS/1 Switchback - High Speed Data/Row 6 Left (Counts)": "row6_left",
        "HS/1 Switchback - High Speed Data/Row 7 Left (Counts)": "row7_left",
        "HS/1 Switchback - High Speed Data/Row 8 Left (Counts)": "row8_left",
        "HS/1 Switchback - High Speed Data/Row 9 Left (Counts)": "row9_left",
        "HS/1 Switchback - High Speed Data/Row 10 Left (Counts)": "row10_left",
        "HS/1 Switchback - High Speed Data/Row 11 Left (Counts)": "row11_left",
        "HS/1 Switchback - High Speed Data/Row 12 Left (Counts)": "row12_left",

        # Row Sums (Alternate Naming Formats)
        "1 SB Row 1 Sum/1 SB Row 1 Sum (Counts)": "row1_sum",
        "1 SB Row 2 Sum/1 SB Row 2 Sum (Counts)": "row2_sum",
        "1 SB Row 3 Sum/1 SB Row 3 Sum (Counts)": "row3_sum",
        "1 SB Row 4 Sum/1 SB Row 4 Sum (Counts)": "row4_sum",
        "1 SB Row 5 Sum/1 SB Row 5 Sum (Counts)": "row5_sum",
        "1 SB Row 6 Sum/1 SB Row 6 Sum (Counts)": "row6_sum",
        "1 SB Row 7 Sum/1 SB Row 7 Sum (Counts)": "row7_sum",
        "1 SB Row 8 Sum/1 SB Row 8 Sum (Counts)": "row8_sum",
        "1 SB Row 9 Sum/1 SB Row 9 Sum (Counts)": "row9_sum",
        "1 SB Row 10 Sum/1 SB Row 10 Sum (Counts)": "row10_sum",
        "1 SB Row 11 Sum/1 SB Row 11 Sum (Counts)": "row11_sum",
        "1 SB Row 12 Sum/1 SB Row 12 Sum (Counts)": "row12_sum",

        # Alternate Naming
        "Row 1 Sum/Row 1 Sum (Counts)": "row1_sum",
        "Row 2 Sum/Row 2 Sum (Counts)": "row2_sum",
        "Row 3 Sum/Row 3 Sum (Counts)": "row3_sum",
        "Row 4 Sum/Row 4 Sum (Counts)": "row4_sum",
        "Row 5 Sum/Row 5 Sum (Counts)": "row5_sum",
        "Row 6 Sum/Row 6 Sum (Counts)": "row6_sum",
        "Row 7 Sum/Row 7 Sum (Counts)": "row7_sum",
        "Row 8 Sum/Row 8 Sum (Counts)": "row8_sum",
        "Row 9 Sum/Row 9 Sum (Counts)": "row9_sum",
        "Row 10 Sum/Row 10 Sum (Counts)": "row10_sum",
        "Row 11 Sum/Row 11 Sum (Counts)": "row11_sum",
        "Row 12 Sum/Row 12 Sum (Counts)": "row12_sum",
    }

    return df.rename(columns=column_mapping)



import re

def rename_columns(df):
    column_mapping = {
        "Time (s)": "time",
    }

    df = df.rename(columns=column_mapping)

    new_columns = {}
    right_col = ''
    left_col = ''
    sum_col = ''

    for col in df.columns:
        # Right Sensor
        match_right = re.search(r"Row (\d+) Right", col, re.IGNORECASE)
        if match_right:
            row_num = match_right.group(1)
            new_name = f"row{row_num}_right"
            new_columns[col] = new_name
            right_col = new_name
            continue

        # Left Sensor
        match_left = re.search(r"Row (\d+) Left", col, re.IGNORECASE)
        if match_left:
            row_num = match_left.group(1)
            new_name = f"row{row_num}_left"
            new_columns[col] = new_name
            left_col = new_name
            continue

        # Row Sum
        match_sum = re.search(r"Row (\d+) Sum", col, re.IGNORECASE)
        if match_sum:
            row_num = match_sum.group(1)
            new_name = f"row{row_num}_sum"
            new_columns[col] = new_name
            sum_col = new_name
            continue

    renamed_df = df.rename(columns=new_columns)
    return renamed_df, right_col, left_col, sum_col



# Processing the 1 second window for peak detection
import numpy as np
import pandas as pd
from dtw import dtw
import pywt
from scipy.signal import find_peaks, find_peaks_cwt

# Function for Wavelet Smoothing
def wavelet_smooth(signal, wavelet="db4", level=4):
    coeffs = pywt.wavedec(signal, wavelet, mode="per")
    sigma = np.median(np.abs(coeffs[-level])) / 0.6745  # Estimate noise level
    uthresh = sigma * np.sqrt(2 * np.log(len(signal)))  # Universal threshold
    coeffs[1:] = (pywt.threshold(c, value=uthresh, mode='soft') for c in coeffs[1:])
    return pywt.waverec(coeffs, wavelet, mode="per")


In [91]:
import os
import glob

train_csv_dir = "field_data/extracted data all/train_raw_csv"
train_csv_files = glob.glob(os.path.join(train_csv_dir, "*.csv"))

val_csv_dir = "field_data/extracted data all/validation_raw_csv"
val_csv_files = glob.glob(os.path.join(val_csv_dir, "*.csv"))


In [None]:
#Feature Extraction
import os
import glob
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from scipy.signal import find_peaks, peak_prominences, peak_widths

# Create output directory
train_output_dir = "field_data/model_data/train"
val_output_dir = "field_data/model_data/validation"

# Loop through and process
for idx, path in enumerate(train_csv_files, start=1):
    try:
        df1 = pd.read_csv(path)
        df, row_right, row_left, row_sum = rename_columns(df1)

        df = df.fillna(method='ffill').reset_index(drop=True)
        window_df = df

        # Normalize and smooth
        scaler = MinMaxScaler()
        raw_signal = scaler.fit_transform(window_df[[row_sum]]).flatten()
        smoothed_signal = wavelet_smooth(raw_signal)
        df['smoothed'] = smoothed_signal[:len(df)]

        # Peak detection
        peaks, _ = find_peaks(smoothed_signal, height=0.15, distance=10)
        df['peak_id'] = np.nan
        for i, peak_index in enumerate(peaks):
            df.at[peak_index, 'peak_id'] = i + 1

        # Peak features
        amplitudes = smoothed_signal[peaks]
        prominences = peak_prominences(smoothed_signal, peaks)[0]
        widths_results = peak_widths(smoothed_signal, peaks, rel_height=0.5)
        widths = widths_results[0]
        durations = widths_results[2] - widths_results[1]
        distances_prev = np.diff(peaks, prepend=peaks[0])
        distances_next = np.diff(peaks, append=peaks[-1])

        peak_times = window_df['time'].iloc[peaks].values
        row_sum_values = window_df[row_sum].values[peaks]
        row_right_values = window_df[row_right].values[peaks]
        row_left_values = window_df[row_left].values[peaks]

        with np.errstate(divide='ignore', invalid='ignore'):
            rl_ratios = np.where(row_left_values != 0, row_right_values / row_left_values, np.nan)

        peak_features = pd.DataFrame({
            'peak_id': range(1, len(peaks) + 1),
            'time': peak_times,
            'amplitude': amplitudes,
            'prominence': prominences,
            'width': widths,
            'duration': durations,
            'distance_from_prev_peak': distances_prev,
            'distance_from_next_peak': distances_next,
            'row_sum_values': row_sum_values,
            'row_right_value': row_right_values,
            'row_left_value': row_left_values,
            'right_to_left_ratio': rl_ratios
        })

        # Save feature file
        out_path = os.path.join(train_output_dir, f"feature_{idx}.csv")
        peak_features.to_csv(out_path, index=False)
        print(f"Saved: {out_path}")

    except Exception as e:
        print(f"Error processing {path}: {e}")

Saved: field_data/model_data/train\feature_1.csv
Saved: field_data/model_data/train\feature_2.csv
Saved: field_data/model_data/train\feature_3.csv
Saved: field_data/model_data/train\feature_4.csv
Saved: field_data/model_data/train\feature_5.csv
Saved: field_data/model_data/train\feature_6.csv
Saved: field_data/model_data/train\feature_7.csv
Saved: field_data/model_data/train\feature_8.csv
Saved: field_data/model_data/train\feature_9.csv
Saved: field_data/model_data/train\feature_10.csv
Saved: field_data/model_data/train\feature_11.csv
Saved: field_data/model_data/train\feature_12.csv
Saved: field_data/model_data/train\feature_13.csv


# Feature Classification

In [114]:
import os
import glob

train_feature_dir = "field_data/model_data/train"
train_csv_files = glob.glob(os.path.join(train_feature_dir, "*.csv"))


val_feature_dir = "field_data/model_data/validation"
val_csv_files = glob.glob(os.path.join(val_feature_dir, "*.csv"))

import pandas as pd

def concatenate_csv_files(file_paths):
    """
    Concatenates a list of CSV file paths into a single DataFrame.

    Parameters:
        file_paths (list): List of CSV file paths (str)

    Returns:
        pd.DataFrame: Combined DataFrame containing all CSV data
    """
    df_list = []
    for path in file_paths:
        try:
            df = pd.read_csv(path)
            df_list.append(df)
        except Exception as e:
            print(f"Error reading {path}: {e}")

    combined_df = pd.concat(df_list, ignore_index=True)
    return combined_df


train_data = concatenate_csv_files(train_csv_files)
# val_data = concatenate_csv_files(val_csv_files)
val_data = pd.read_csv('field_data/model_data/validation/feature_3.csv')

In [115]:
from sklearn.preprocessing import MinMaxScaler

def standardize_columns(df, columns):
    """
    Standardizes the given columns in the DataFrame using Z-score normalization.

    Parameters:
    df (pd.DataFrame): Input DataFrame.
    columns (list): List of column names to be standardized.

    Returns:
    pd.DataFrame: DataFrame with standardized columns.
    dict: Dictionary containing fitted scaler (can be reused on test data).
    """
    scaler = MinMaxScaler()
    df_scaled = df.copy()
    df_scaled[columns] = scaler.fit_transform(df[columns])
    return df_scaled


features = [
            'amplitude', 'prominence', 'width',
            'distance_from_prev_peak', 'distance_from_next_peak',
            'right_to_left_ratio'
        ]

train_data_scaled = standardize_columns(train_data, features)
val_data_scaled = standardize_columns(val_data, features)

In [None]:
# Classofying the peaks based on rule based labelling
def classify_peaks_by_quantile(df, features=None, thresholds=None, quantile_cutoff=0.4):
    if features is None:
        features = [
            'amplitude', 'prominence', 'width',
            'distance_from_prev_peak', 'distance_from_next_peak',
            'right_to_left_ratio'
        ]

    # Compute thresholds if not provided
    if thresholds is None:
        thresholds = {feature: df[feature].quantile(quantile_cutoff) for feature in features}

    # Scoring function
    def classify_peak(row):
        score = sum(row[feature] > thresholds[feature] for feature in features)
        if score >= 4:
            return 'strong'
        elif score == 3:
            return 'good'
        elif score == 2:
            return 'maybe'
        else:
            return 'bad'

    df['peak_class'] = df.apply(classify_peak, axis=1)
    return df


train_df = classify_peaks_by_quantile(train_data_scaled)
val_df = classify_peaks_by_quantile(val_data_scaled)

#printing shapes
print(f"train shape: {train_df.shape}")
print(f"val shape: {val_df.shape}")

# Model Train

## XGBoost

In [118]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, classification_report

model_df = train_df.copy()

# labelling the class
label_map = {'bad': 0, 'maybe': 1, 'good': 2, 'strong': 3}
# label_map = {'bad': 0, 'maybe': 1, 'good': 2}
model_df['class_label'] = model_df['peak_class'].map(label_map)


# Selected features
features = [
    'amplitude', 'prominence', 'width',
    'distance_from_prev_peak', 'distance_from_next_peak',
    'right_to_left_ratio',
]

X = model_df[features]
y = model_df['class_label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# from imblearn.over_sampling import SMOTE

# smote = SMOTE(random_state=42)
# X_resampled, y_resampled = smote.fit_resample(X, y)
# # Train-test split
# X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, stratify=y_resampled)

# XGBoost Model Train
model_xgb = XGBClassifier(
    n_estimators=100,
    max_depth=4,
    learning_rate=0.1,
    objective='multi:softmax',  # for class labels directly
    num_class=3,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42
)
model_xgb.fit(X_train, y_train)

# Evaluate
preds = model_xgb.predict(X_test)
print(f"Precision (macro): {precision_score(y_test, preds, average='macro'):.2f}")
print(f"Recall (macro): {recall_score(y_test, preds, average='macro'):.2f}")

# Full class-wise report
print("\nClassification Report:")
print(classification_report(y_test, preds, target_names=label_map.keys()))

Parameters: { "use_label_encoder" } are not used.



Precision (macro): 0.93
Recall (macro): 0.93

Classification Report:
              precision    recall  f1-score   support

         bad       1.00      0.97      0.99        70
       maybe       0.93      0.83      0.87        75
        good       0.82      0.92      0.87        78
      strong       0.99      0.99      0.99       255

    accuracy                           0.95       478
   macro avg       0.93      0.93      0.93       478
weighted avg       0.95      0.95      0.95       478



In [119]:
val_model_df = val_df.copy()

# labelling the class
label_map = {'bad': 0, 'maybe': 1, 'good': 2, 'strong': 3}
# label_map = {'bad': 0, 'maybe': 1, 'good': 2}
val_model_df['class_label'] = val_model_df['peak_class'].map(label_map)


# Selected features
features = [
    'amplitude', 'prominence', 'width',
    'distance_from_prev_peak', 'distance_from_next_peak',
    'right_to_left_ratio',
]

X = val_model_df[features]
y = val_model_df['class_label']

# Evaluate
preds = model_xgb.predict(X)
print(f"Precision (macro): {precision_score(y, preds, average='macro'):.2f}")
print(f"Recall (macro): {recall_score(y, preds, average='macro'):.2f}")

# Full class-wise report
print("\nClassification Report:")
print(classification_report(y, preds, target_names=label_map.keys()))

Precision (macro): 0.50
Recall (macro): 0.30

Classification Report:
              precision    recall  f1-score   support

         bad       1.00      0.03      0.06        31
       maybe       0.12      0.05      0.07        20
        good       0.23      0.12      0.16        24
      strong       0.64      1.00      0.78        94

    accuracy                           0.59       169
   macro avg       0.50      0.30      0.27       169
weighted avg       0.59      0.59      0.48       169



In [120]:
stalk_counts = pd.Series(preds).value_counts()
print(stalk_counts)

3    147
2     13
1      8
0      1
Name: count, dtype: int64


## RF Model

In [121]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, classification_report, accuracy_score

model_df = train_df.copy()

# labelling the class
label_map = {'bad': 0, 'maybe': 1, 'good': 2, 'strong': 3}
# label_map = {'bad': 0, 'maybe': 1, 'good': 2}

model_df['class_label'] = model_df['peak_class'].map(label_map)


# Selected features
features = [
    'amplitude', 'prominence', 'width',
    'distance_from_prev_peak', 'distance_from_next_peak',
    'right_to_left_ratio',
]

X = model_df[features]
y = model_df['class_label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# from imblearn.over_sampling import SMOTE
# smote = SMOTE(random_state=42)
# X_resampled, y_resampled = smote.fit_resample(X, y)
# # Train-test split
# X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, stratify=y_resampled)

# Train model
model_rf = RandomForestClassifier(
    n_estimators=150,
    max_depth=10,
    class_weight='balanced',
    random_state=42
)
model_rf.fit(X_train, y_train)

# Evaluate
preds = model_rf.predict(X_test)
print(f"Precision (macro): {precision_score(y_test, preds, average='macro'):.2f}")
print(f"Recall (macro): {recall_score(y_test, preds, average='macro'):.2f}")

# Full class-wise report
print("\nClassification Report:")
print(classification_report(y_test, preds, target_names=label_map.keys()))

Precision (macro): 0.92
Recall (macro): 0.93

Classification Report:
              precision    recall  f1-score   support

         bad       1.00      0.97      0.99        70
       maybe       0.87      0.92      0.90        75
        good       0.84      0.83      0.84        78
      strong       0.98      0.98      0.98       255

    accuracy                           0.94       478
   macro avg       0.92      0.93      0.92       478
weighted avg       0.94      0.94      0.94       478



In [122]:
val_model_df = val_df.copy()

# labelling the class
label_map = {'bad': 0, 'maybe': 1, 'good': 2, 'strong': 3}
# label_map = {'bad': 0, 'maybe': 1, 'good': 2}

val_model_df['class_label'] = val_model_df['peak_class'].map(label_map)


# Selected features
features = [
    'amplitude', 'prominence', 'width',
    'distance_from_prev_peak', 'distance_from_next_peak',
    'right_to_left_ratio',
]

X = val_model_df[features]
y = val_model_df['class_label']

# Evaluate
preds = model_rf.predict(X)
print(f"Precision (macro): {precision_score(y, preds, average='macro'):.2f}")
print(f"Recall (macro): {recall_score(y, preds, average='macro'):.2f}")

# Full class-wise report
print("\nClassification Report:")
print(classification_report(y, preds, target_names=label_map.keys()))


Precision (macro): 0.23
Recall (macro): 0.28

Classification Report:
              precision    recall  f1-score   support

         bad       0.00      0.00      0.00        31
       maybe       0.12      0.05      0.07        20
        good       0.13      0.08      0.10        24
      strong       0.64      1.00      0.78        94

    accuracy                           0.57       169
   macro avg       0.23      0.28      0.24       169
weighted avg       0.39      0.57      0.46       169



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [123]:
stalk_counts = pd.Series(preds).value_counts()
print(stalk_counts)

3    146
2     15
1      8
Name: count, dtype: int64


## Deep Model

In [124]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, classification_report
from sklearn.preprocessing import StandardScaler

model_df = train_df.copy()

# labelling the class
label_map = {'bad': 0, 'maybe': 1, 'good': 2, 'strong': 3}
# label_map = {'bad': 0, 'maybe': 1, 'good': 2}

model_df['class_label'] = model_df['peak_class'].map(label_map)


# Selected features
features = [
    'amplitude', 'prominence', 'width',
    'distance_from_prev_peak', 'distance_from_next_peak',
    'right_to_left_ratio',
]

scaler = StandardScaler()
X = model_df[features]
X_scaled = scaler.fit_transform(X)

y = model_df['class_label']

X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [125]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)


# Create DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [126]:
import torch.nn as nn

# class PeakClassifier(nn.Module):
#     def __init__(self, input_dim, num_classes):
#         super(PeakClassifier, self).__init__()
#         self.network = nn.Sequential(
#             nn.Linear(input_dim, 64),
#             nn.ReLU(),
#             nn.Dropout(0.3),
#             nn.Linear(64, 32),
#             nn.ReLU(),
#             nn.Dropout(0.2),
#             nn.Linear(32, num_classes)
#         )

#     def forward(self, x):
#         return self.network(x)


class PeakClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(PeakClassifier, self).__init__()
        self.network = nn.Sequential(
                nn.Linear(input_dim, 64),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(64, 32),
                nn.ReLU(),
                nn.Dropout(0.2),
                nn.Linear(32, num_classes)
                    )

    def forward(self, x):
        return self.network(x)


In [127]:
from sklearn.utils.class_weight import compute_class_weight
weights = compute_class_weight('balanced', classes=np.unique(y), y=y)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = PeakClassifier(input_dim=X_train.shape[1], num_classes=len(np.unique(y))).to(device)

criterion = nn.CrossEntropyLoss(weight=torch.tensor(weights).float().to(device))
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(100):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 146.7795
Epoch 2, Loss: 123.8216
Epoch 3, Loss: 111.9290
Epoch 4, Loss: 105.7356
Epoch 5, Loss: 103.0899
Epoch 6, Loss: 98.2925
Epoch 7, Loss: 97.4196
Epoch 8, Loss: 94.0955
Epoch 9, Loss: 92.3342
Epoch 10, Loss: 90.8656
Epoch 11, Loss: 89.3463
Epoch 12, Loss: 87.9436
Epoch 13, Loss: 87.4946
Epoch 14, Loss: 85.6191
Epoch 15, Loss: 86.9036
Epoch 16, Loss: 87.1451
Epoch 17, Loss: 83.8395
Epoch 18, Loss: 83.3630
Epoch 19, Loss: 85.2390
Epoch 20, Loss: 84.1197
Epoch 21, Loss: 82.8976
Epoch 22, Loss: 82.4661
Epoch 23, Loss: 81.1864
Epoch 24, Loss: 81.7012
Epoch 25, Loss: 79.5075
Epoch 26, Loss: 79.7924
Epoch 27, Loss: 80.3909
Epoch 28, Loss: 81.2959
Epoch 29, Loss: 78.4190
Epoch 30, Loss: 78.0454
Epoch 31, Loss: 77.4196
Epoch 32, Loss: 77.6737
Epoch 33, Loss: 77.3393
Epoch 34, Loss: 78.3523
Epoch 35, Loss: 77.6375
Epoch 36, Loss: 76.7255
Epoch 37, Loss: 77.6079
Epoch 38, Loss: 76.2675
Epoch 39, Loss: 75.5242
Epoch 40, Loss: 76.7313
Epoch 41, Loss: 73.8932
Epoch 42, Loss: 75.6

In [128]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for xb, yb in val_loader:
        xb, yb = xb.to(device), yb.to(device)
        out = model(xb)
        preds = torch.argmax(out, dim=1)
        correct += (preds == yb).sum().item()
        total += yb.size(0)

print(f"Validation Accuracy: {correct / total:.2f}")


Validation Accuracy: 0.81


In [129]:
from torch.utils.data import DataLoader, TensorDataset
import torch
import pandas as pd

# labelling the class
label_map = {'bad': 0, 'maybe': 1, 'good': 2, 'strong': 3}
# label_map = {'bad': 0, 'maybe': 1, 'good': 2}

val_df['class_label'] = val_df['peak_class'].map(label_map)

# 1. Extract features and labels
X_val = val_df[features].values.astype('float32')  # features must match training features
y_val = val_df['class_label'].values.astype('int64')     # or adjust if your label column is named differently

# 2. Convert to tensors
X_tensor = torch.tensor(X_val)
y_tensor = torch.tensor(y_val)

# 3. Create DataLoader
val_dataset = TensorDataset(X_tensor, y_tensor)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# 4. Evaluation loop
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for xb, yb in val_loader:
        xb, yb = xb.to(device), yb.to(device)
        out = model(xb)
        preds = torch.argmax(out, dim=1)
        correct += (preds == yb).sum().item()
        total += yb.size(0)

print(f"Validation Accuracy on unseen val_df: {correct / total:.2f}")


Validation Accuracy on unseen val_df: 0.56


In [130]:
from collections import Counter

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for xb, yb in val_loader:
        xb, yb = xb.to(device), yb.to(device)
        out = model(xb)
        preds = torch.argmax(out, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(yb.cpu().numpy())

# Convert to DataFrame for easy viewing
results_df = pd.DataFrame({
    'true_label': all_labels,
    'predicted_label': all_preds
})

# Optional: map numeric labels back to names
reverse_label_map = {v: k for k, v in label_map.items()}
results_df['true_label_name'] = results_df['true_label'].map(reverse_label_map)
results_df['predicted_label_name'] = results_df['predicted_label'].map(reverse_label_map)

# Print first few predictions
print(results_df.head())

# Class-wise prediction count
print("\nPredicted class distribution:")
print(results_df['predicted_label_name'].value_counts())


   true_label  predicted_label true_label_name predicted_label_name
0           3                3          strong               strong
1           3                3          strong               strong
2           3                3          strong               strong
3           3                3          strong               strong
4           3                3          strong               strong

Predicted class distribution:
predicted_label_name
strong    169
Name: count, dtype: int64


In [None]:
results_df.head()

Unnamed: 0,true_label,predicted_label,true_label_name,predicted_label_name
0,3,3,strong,strong
1,3,3,strong,strong
2,3,3,strong,strong
3,3,3,strong,strong
4,3,3,strong,strong
...,...,...,...,...
164,0,3,bad,strong
165,3,3,strong,strong
166,3,3,strong,strong
167,3,3,strong,strong
