In [1]:
import scipy.io
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import copy
import numpy as np

In [17]:

# Load data
train_data = scipy.io.loadmat('/content/drive/MyDrive/SSMT/train_speech_subtoken_punc_features.mat')
dev_data = scipy.io.loadmat('/content/drive/MyDrive/SSMT/dev_speech_subtoken_punc_features.mat')
test_data = scipy.io.loadmat('/content/drive/MyDrive/SSMT/test_speech_subtoken_punc_features.mat')

# Extract features and labels
filenames_train, X_train, y_train = train_data['filenames'], train_data['features'], train_data['labels']
filenames_dev, X_dev, y_dev = dev_data['filenames'], dev_data['features'], dev_data['labels']
filenames_test, X_test, y_test = test_data['filenames'], test_data['features'], test_data['labels']

In [18]:
print(X_train.T[0][0].shape)
print(y_train.shape)
print(X_dev.shape)
print(y_dev.shape)
print(filenames_train.shape)
print(filenames_dev.shape)
print(X_test.shape)
print(y_test.shape)
print(filenames_test.shape)

(35, 768)
(1, 1545)
(1, 255)
(1, 255)
(1545,)
(255,)
(1, 399)
(1, 399)
(399,)


In [19]:
print(X_train.T[0][0].shape)
print(y_train.shape)
print(X_dev.shape)
print(y_dev.shape)
print(filenames_train.shape)
print(filenames_dev.shape)
print(X_test.shape)
print(y_test.shape)
print(filenames_test.shape)

(35, 768)
(1, 1545)
(1, 255)
(1, 255)
(1545,)
(255,)
(1, 399)
(1, 399)
(399,)


In [20]:
# Shift all labels from 1–6 to 0–5
for i in range(y_train.shape[1]):
    y_train[0][i] = y_train[0][i] - 1

for i in range(y_dev.shape[1]):
    y_dev[0][i] = y_dev[0][i] - 1

for i in range(y_test.shape[1]):
    y_test[0][i] = y_test[0][i] - 1

In [21]:
print(y_train[0][0], y_dev[0][0], y_test[0][0])

[[5 0 0 0 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 1 1]] [[5 5 5 5 5 5 5 5 0 0 0 5 5 5 5 5 0 0 5 5 5 1 1 1 5 5 5 5 5 5 1 1 1 1]] [[0 0 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 5 1 1 1]]


In [22]:
print(y_dev.flatten().shape)

(255,)


In [23]:
l=[]
for i in y_dev.flatten():
  for j in i.flatten():
    if j not in l:
      l.append(j)
print(l)

[np.int64(5), np.int64(0), np.int64(1), np.int64(3), np.int64(4)]


In [24]:
# class PunctuationDataset(torch.utils.data.Dataset):
#     def __init__(self, X, Y):
#         self.X = [torch.tensor(X.T[i][0], dtype=torch.float32) for i in range(X.shape[1])]
#         self.Y = [torch.tensor(Y.T[i][0].flatten(), dtype=torch.long) for i in range(Y.shape[1])]

#         # Optional: sanity check
#         for x, y in zip(self.X, self.Y):
#             assert len(x) == len(y), f"Mismatched lengths: {x.shape[0]} vs {y.shape[0]}"

#     def __len__(self):
#         return len(self.X)

#     def __getitem__(self, idx):
#         return self.X[idx], self.Y[idx]
class PunctuationDataset(torch.utils.data.Dataset):
    def __init__(self, X, Y, filenames):
        self.X = [torch.tensor(X.T[i][0], dtype=torch.float32) for i in range(X.shape[1])]
        self.Y = [torch.tensor(Y.T[i][0].flatten(), dtype=torch.long) for i in range(Y.shape[1])]
        self.filenames = filenames

        assert len(self.X) == len(self.filenames), "Mismatch between data and filenames"

        for x, y in zip(self.X, self.Y):
            assert len(x) == len(y), f"Mismatched lengths: {x.shape[0]} vs {y.shape[0]}"

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx], self.filenames[idx]


In [25]:
# def collate_fn(batch):
#     xs, ys = zip(*batch)
#     lengths = [len(x) for x in xs]

#     xs_padded = nn.utils.rnn.pad_sequence(xs, batch_first=True)  # [batch, max_len, feat_dim]
#     ys_padded = nn.utils.rnn.pad_sequence(ys, batch_first=True, padding_value=-100)  # ignore_index

#     return xs_padded, ys_padded, lengths
def collate_fn(batch):
    xs, ys, filenames = zip(*batch)
    lengths = [len(x) for x in xs]

    xs_padded = nn.utils.rnn.pad_sequence(xs, batch_first=True)
    ys_padded = nn.utils.rnn.pad_sequence(ys, batch_first=True, padding_value=-100)

    return xs_padded, ys_padded, lengths, filenames


In [26]:
# Assuming you have a list of filenames (e.g., filenames_train for the train data)
train_dataset = PunctuationDataset(X_train, y_train, filenames_train)  # Pass filenames_train
val_dataset = PunctuationDataset(X_dev, y_dev, filenames_dev)  # Pass filenames_dev
test_dataset = PunctuationDataset(X_test, y_test, filenames_test)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

In [27]:
class ConvLSTMClassifier(nn.Module):
    def __init__(self, input_dim, lstm_hidden=512, num_classes=5):
        super(ConvLSTMClassifier, self).__init__()
        self.conv = nn.Conv1d(in_channels=input_dim, out_channels=128, kernel_size=5, padding=2)

        self.lstm = nn.LSTM(input_size=128, hidden_size=lstm_hidden, batch_first=True, bidirectional=True)

        self.linear_1024 = nn.Linear(2 * lstm_hidden, 1024)
        self.output_layer = nn.Linear(1024, num_classes)

    def forward(self, x_list, lengths):
        conv_outs = []
        for x in x_list:
            x = x.permute(1, 0).unsqueeze(0)
            x = self.conv(x)
            x = x.squeeze(0).permute(1, 0)
            conv_outs.append(x)

        padded = nn.utils.rnn.pad_sequence(conv_outs, batch_first=True)
        packed = nn.utils.rnn.pack_padded_sequence(padded, lengths, batch_first=True, enforce_sorted=False)

        lstm_out, _ = self.lstm(packed)
        lstm_out, _ = nn.utils.rnn.pad_packed_sequence(lstm_out, batch_first=True)

        linear_1024_out = self.linear_1024(lstm_out)
        logits = self.output_layer(linear_1024_out)

        # Return both the logits and the features from the last LSTM output
        return logits, lstm_out


In [28]:
def train_model(model, train_loader, val_loader, num_epochs=20, lr=1e-3, patience=5, device='cuda'):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)  # ignore padding values
    optimizer = optim.Adam(model.parameters(), lr=lr)

    skipped_count = 0  # Counter for skipped NaN cases

    # Early stopping variables
    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        correct_train = 0
        total_train = 0
        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")

        for x_batch, y_batch, lengths, filenames_train in train_bar:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            # Unpack the output tuple into `features` and `logits`
            logits, lstm_out = model(x_batch, lengths)  # logits are the output before the final layer

            # Check for NaN or Inf in logits (not features)
            if torch.isnan(logits).any() or torch.isinf(logits).any():
                print("NaN or Inf detected in model output, skipping this batch")
                skipped_count += 1
                continue  # Skip this batch

            logits = logits.view(-1, logits.shape[-1])  # Flatten for loss calculation
            y_batch = y_batch.view(-1)

            # Check for NaN or Inf in loss
            loss = criterion(logits, y_batch)
            if torch.isnan(loss) or torch.isinf(loss):
                print("NaN or Inf detected in loss, skipping this batch")
                skipped_count += 1
                continue  # Skip this batch

            loss.backward()
            optimizer.step()

            train_loss += loss.item()

            # Calculate training accuracy
            preds = logits.argmax(dim=-1)
            mask = y_batch != -100
            correct_train += (preds[mask] == y_batch[mask]).sum().item()
            total_train += mask.sum().item()

            train_bar.set_postfix(loss=loss.item())

        avg_train_loss = train_loss / len(train_loader)
        train_acc = correct_train / total_train if total_train > 0 else 0

        # --- Validation ---
        from sklearn.metrics import classification_report

        # In the validation loop:
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        predictions = []
        ground_truth = []
        val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]", leave=False)

        with torch.no_grad():
            for x_batch, y_batch, lengths, filenames_dev in val_bar:
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)

                outputs, lstm_out = model(x_batch, lengths)
                outputs = outputs.view(-1, outputs.shape[-1])
                y_batch = y_batch.view(-1)

                # Compute the loss
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()

                # Get the predictions
                preds = outputs.argmax(dim=-1)
                mask = y_batch != -100
                correct += (preds[mask] == y_batch[mask]).sum().item()
                total += mask.sum().item()

                # Store predictions and ground truth
                predictions.extend(preds[mask].cpu().numpy())
                ground_truth.extend(y_batch[mask].cpu().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_acc = correct / total if total > 0 else 0

        # Print classification report for per-class metrics
        print("\nClassification Report:")
        print(classification_report(ground_truth, predictions, zero_division=0))

        # Print summary of metrics
        print(f"Epoch {epoch+1}/{num_epochs} Summary: Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")

        print(f"Skipped {skipped_count} batches due to NaN/Inf")

        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            # Optionally, save the best model
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered after {patience} epochs without improvement.")
                break

        # Gradient Clipping to prevent explosion
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)


In [29]:
model = ConvLSTMClassifier(input_dim=768, lstm_hidden=512, num_classes=6)


In [None]:
train_model(model, train_loader, val_loader, num_epochs=50, patience=5)


Epoch 1/50 [Train]:  42%|████▏     | 659/1556 [00:11<00:07, 114.35it/s, loss=0.475]

NaN or Inf detected in model output, skipping this batch


Epoch 1/50 [Train]: 100%|██████████| 1556/1556 [00:20<00:00, 76.71it/s, loss=0.309]



Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       423
           1       0.94      0.74      0.83       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.91      1.00      0.95      5516

    accuracy                           0.91      6357
   macro avg       0.37      0.35      0.36      6357
weighted avg       0.85      0.91      0.88      6357

Epoch 1/50 Summary: Train Loss: 0.3282 | Val Loss: 0.2908 | Val Acc: 0.9114
Skipped 1 batches due to NaN/Inf


Epoch 2/50 [Train]:  24%|██▍       | 377/1556 [00:04<00:12, 92.56it/s, loss=0.973]

NaN or Inf detected in model output, skipping this batch


Epoch 2/50 [Train]: 100%|██████████| 1556/1556 [00:18<00:00, 86.28it/s, loss=0.642]



Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       423
           1       0.94      0.73      0.82       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.91      1.00      0.95      5516

    accuracy                           0.91      6357
   macro avg       0.37      0.35      0.35      6357
weighted avg       0.85      0.91      0.88      6357

Epoch 2/50 Summary: Train Loss: 0.3021 | Val Loss: 0.2886 | Val Acc: 0.9108
Skipped 2 batches due to NaN/Inf


Epoch 3/50 [Train]:  69%|██████▉   | 1073/1556 [00:12<00:05, 93.80it/s, loss=0.182]

NaN or Inf detected in model output, skipping this batch


Epoch 3/50 [Train]: 100%|██████████| 1556/1556 [00:17<00:00, 87.91it/s, loss=0.0407]



Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       423
           1       0.89      0.82      0.85       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.91      0.99      0.95      5516

    accuracy                           0.91      6357
   macro avg       0.36      0.36      0.36      6357
weighted avg       0.85      0.91      0.88      6357

Epoch 3/50 Summary: Train Loss: 0.2949 | Val Loss: 0.2854 | Val Acc: 0.9135
Skipped 3 batches due to NaN/Inf


Epoch 4/50 [Train]:  48%|████▊     | 751/1556 [00:08<00:08, 95.39it/s, loss=0.324]

NaN or Inf detected in model output, skipping this batch


Epoch 4/50 [Train]: 100%|██████████| 1556/1556 [00:17<00:00, 88.09it/s, loss=0.0183]



Classification Report:
              precision    recall  f1-score   support

           0       0.56      0.01      0.02       423
           1       0.97      0.72      0.83       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.91      1.00      0.95      5516

    accuracy                           0.91      6357
   macro avg       0.49      0.35      0.36      6357
weighted avg       0.89      0.91      0.88      6357

Epoch 4/50 Summary: Train Loss: 0.2808 | Val Loss: 0.2774 | Val Acc: 0.9124
Skipped 4 batches due to NaN/Inf


Epoch 5/50 [Train]:  35%|███▌      | 547/1556 [00:06<00:11, 91.05it/s, loss=0.867]

NaN or Inf detected in model output, skipping this batch


Epoch 5/50 [Train]: 100%|██████████| 1556/1556 [00:17<00:00, 87.17it/s, loss=0.138]



Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.00      0.01       423
           1       0.90      0.83      0.87       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.92      0.99      0.95      5516

    accuracy                           0.91      6357
   macro avg       0.46      0.37      0.37      6357
weighted avg       0.88      0.91      0.88      6357

Epoch 5/50 Summary: Train Loss: 0.2692 | Val Loss: 0.3239 | Val Acc: 0.9147
Skipped 5 batches due to NaN/Inf


Epoch 6/50 [Train]:  76%|███████▌  | 1175/1556 [00:13<00:04, 94.57it/s, loss=0.275]

NaN or Inf detected in model output, skipping this batch


Epoch 6/50 [Train]: 100%|██████████| 1556/1556 [00:17<00:00, 87.13it/s, loss=0.52]



Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.04      0.07       423
           1       0.90      0.84      0.87       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.92      0.99      0.95      5516

    accuracy                           0.92      6357
   macro avg       0.46      0.37      0.38      6357
weighted avg       0.89      0.92      0.89      6357

Epoch 6/50 Summary: Train Loss: 0.2664 | Val Loss: 0.2630 | Val Acc: 0.9155
Skipped 6 batches due to NaN/Inf


Epoch 7/50 [Train]:  36%|███▌      | 563/1556 [00:06<00:11, 86.43it/s, loss=0.217]

NaN or Inf detected in model output, skipping this batch


Epoch 7/50 [Train]: 100%|██████████| 1556/1556 [00:17<00:00, 88.86it/s, loss=0.456]



Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.06      0.11       423
           1       0.91      0.82      0.86       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.92      0.99      0.95      5516

    accuracy                           0.92      6357
   macro avg       0.48      0.37      0.38      6357
weighted avg       0.89      0.92      0.89      6357

Epoch 7/50 Summary: Train Loss: 0.2593 | Val Loss: 0.2942 | Val Acc: 0.9157
Skipped 7 batches due to NaN/Inf


Epoch 8/50 [Train]:  23%|██▎       | 351/1556 [00:03<00:10, 111.10it/s, loss=0.109] 

NaN or Inf detected in model output, skipping this batch


Epoch 8/50 [Train]: 100%|██████████| 1556/1556 [00:14<00:00, 107.26it/s, loss=0.419]



Classification Report:
              precision    recall  f1-score   support

           0       0.48      0.12      0.20       423
           1       0.93      0.80      0.86       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.92      0.99      0.95      5516

    accuracy                           0.92      6357
   macro avg       0.47      0.38      0.40      6357
weighted avg       0.89      0.92      0.90      6357

Epoch 8/50 Summary: Train Loss: 0.2479 | Val Loss: 0.2506 | Val Acc: 0.9154
Skipped 8 batches due to NaN/Inf


Epoch 9/50 [Train]:  15%|█▌        | 240/1556 [00:02<00:11, 113.46it/s, loss=0.0422]

NaN or Inf detected in model output, skipping this batch


Epoch 9/50 [Train]: 100%|██████████| 1556/1556 [00:14<00:00, 107.85it/s, loss=0.4]



Classification Report:
              precision    recall  f1-score   support

           0       0.36      0.20      0.25       423
           1       0.88      0.87      0.87       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.93      0.97      0.95      5516

    accuracy                           0.91      6357
   macro avg       0.43      0.41      0.42      6357
weighted avg       0.89      0.91      0.90      6357

Epoch 9/50 Summary: Train Loss: 0.2418 | Val Loss: 0.2641 | Val Acc: 0.9072
Skipped 9 batches due to NaN/Inf


Epoch 10/50 [Train]:  30%|███       | 471/1556 [00:04<00:09, 111.12it/s, loss=0.0233]

NaN or Inf detected in model output, skipping this batch


Epoch 10/50 [Train]: 100%|██████████| 1556/1556 [00:14<00:00, 107.40it/s, loss=0.14]



Classification Report:
              precision    recall  f1-score   support

           0       0.55      0.09      0.15       423
           1       0.90      0.83      0.86       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.92      0.99      0.95      5516

    accuracy                           0.92      6357
   macro avg       0.47      0.38      0.39      6357
weighted avg       0.89      0.92      0.89      6357

Epoch 10/50 Summary: Train Loss: 0.2419 | Val Loss: 0.2607 | Val Acc: 0.9166
Skipped 10 batches due to NaN/Inf


Epoch 11/50 [Train]:  72%|███████▏  | 1115/1556 [00:10<00:03, 115.42it/s, loss=0.0744]

NaN or Inf detected in model output, skipping this batch


Epoch 11/50 [Train]: 100%|██████████| 1556/1556 [00:14<00:00, 107.35it/s, loss=0.0805]



Classification Report:
              precision    recall  f1-score   support

           0       0.46      0.15      0.22       423
           1       0.90      0.85      0.87       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.93      0.98      0.95      5516

    accuracy                           0.92      6357
   macro avg       0.46      0.40      0.41      6357
weighted avg       0.89      0.92      0.90      6357

Epoch 11/50 Summary: Train Loss: 0.2356 | Val Loss: 0.2545 | Val Acc: 0.9157
Skipped 11 batches due to NaN/Inf


Epoch 12/50 [Train]:  93%|█████████▎| 1448/1556 [00:16<00:01, 96.83it/s, loss=0.237]

NaN or Inf detected in model output, skipping this batch


Epoch 12/50 [Train]: 100%|██████████| 1556/1556 [00:17<00:00, 89.97it/s, loss=0.101]



Classification Report:
              precision    recall  f1-score   support

           0       0.36      0.23      0.28       423
           1       0.82      0.88      0.85       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.94      0.96      0.95      5516

    accuracy                           0.90      6357
   macro avg       0.42      0.41      0.42      6357
weighted avg       0.89      0.90      0.89      6357

Epoch 12/50 Summary: Train Loss: 0.2289 | Val Loss: 0.2715 | Val Acc: 0.9036
Skipped 12 batches due to NaN/Inf


Epoch 13/50 [Train]:  96%|█████████▌| 1497/1556 [00:18<00:00, 91.49it/s, loss=0.116]

NaN or Inf detected in model output, skipping this batch


Epoch 13/50 [Train]: 100%|██████████| 1556/1556 [00:18<00:00, 83.09it/s, loss=0.193]
                                                                     


Classification Report:
              precision    recall  f1-score   support

           0       0.46      0.13      0.20       423
           1       0.83      0.87      0.85       402
           3       0.00      0.00      0.00        12
           4       0.00      0.00      0.00         4
           5       0.93      0.98      0.95      5516

    accuracy                           0.91      6357
   macro avg       0.44      0.39      0.40      6357
weighted avg       0.89      0.91      0.89      6357

Epoch 13/50 Summary: Train Loss: 0.2262 | Val Loss: 0.2650 | Val Acc: 0.9130
Skipped 13 batches due to NaN/Inf
Early stopping triggered after 5 epochs without improvement.




In [30]:
import torch
import scipy.io as sio
from sklearn.metrics import classification_report

def evaluate_model(model, data_loader, device='cpu', label_names=None, save_features=False, feature_layer='lstm', mat_file='features_by_file.mat'):
    model = model.to(device)
    model.eval()

    all_preds = []
    all_labels = []

    final_features = []
    final_filenames = []

    with torch.no_grad():
        for x_batch, y_batch, lengths, filenames in data_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            outputs, lstm_out = model(x_batch, lengths)  # logits, lstm_out
            outputs = outputs.view(-1, outputs.shape[-1])
            y_batch = y_batch.view(-1)

            preds = outputs.argmax(dim=-1)
            mask = y_batch != -100

            all_preds.extend(preds[mask].cpu().numpy())
            all_labels.extend(y_batch[mask].cpu().numpy())

            if save_features:
                # Choose layer to extract features from
                selected_features = lstm_out  # or change to `linear_1024_out` if you modify model to return it
                selected_features = selected_features.cpu()

                for i, fname in enumerate(filenames):
                    seq_len = lengths[i]
                    features = selected_features[i][:seq_len]  # [seq_len, feat_dim]
                    # features_dict[fname] = features.numpy()
                    final_features.append(features.numpy())
                    final_filenames.append(fname)

    if save_features:
        features_dict = {'filenames': final_filenames, 'features': final_features}

    print("📊 Per-Class Metrics:\n")
    report = classification_report(
        all_labels, all_preds, target_names=label_names, digits=4, zero_division=0
    )
    print(report)

    if save_features:
        print(f"💾 Saving per-file features to: {mat_file}")
        sio.savemat(mat_file, features_dict)


In [None]:
# torch.save(model.state_dict(), "/content/drive/MyDrive/SSMT/speech_punc_extraction_model.pt")

In [31]:
# Assuming 6 classes and you know the labels
label_names = ["0", "1", "2", "3", "4", "5"]

# Load the best model weights (optional if not already loaded)
model.load_state_dict(torch.load('/content/drive/MyDrive/SSMT/speech_punc_extraction_model.pt'))

# Specify where you want to save the features (e.g., 'train_features.mat')
mat_file_path = '/content/drive/MyDrive/SSMT/final_train_punct_features_speech.mat'

# Evaluate on training set (or val_loader) and save features
evaluate_model(model, train_loader, device='cuda', label_names=label_names, save_features=True, mat_file=mat_file_path)

label_names = ["0", "1", "2", "3", "4"]

mat_file_path = '/content/drive/MyDrive/SSMT/final_dev_punct_features_speech.mat'

evaluate_model(model, val_loader, device='cuda', label_names=label_names, save_features=True, mat_file=mat_file_path)

label_names = ["0", "1", "2", "3", "4", "5"]

mat_file_path = '/content/drive/MyDrive/SSMT/final_test_punct_features_speech.mat'

evaluate_model(model, test_loader, device='cuda', label_names=label_names, save_features=True, mat_file=mat_file_path)


📊 Per-Class Metrics:

              precision    recall  f1-score   support

           0     0.6324    0.1663    0.2634      3776
           1     0.9420    0.7726    0.8490      4165
           2     0.0000    0.0000    0.0000        38
           3     0.0000    0.0000    0.0000       177
           4     0.0000    0.0000    0.0000        88
           5     0.8879    0.9884    0.9355     33891

    accuracy                         0.8863     42135
   macro avg     0.4104    0.3212    0.3413     42135
weighted avg     0.8640    0.8863    0.8600     42135

💾 Saving per-file features to: /content/drive/MyDrive/SSMT/final_train_punct_features_speech.mat
📊 Per-Class Metrics:

              precision    recall  f1-score   support

           0     0.5105    0.1052    0.1744       694
           1     0.9428    0.7583    0.8405       695
           2     0.0000    0.0000    0.0000        31
           3     0.0000    0.0000    0.0000         6
           4     0.8715    0.9859    0.9251  

In [32]:
import scipy.io

# Load the .mat file
mat_file_path = '/content/drive/MyDrive/SSMT/final_train_punct_features_speech.mat'  # Replace with your file path
mat_data = scipy.io.loadmat(mat_file_path)

# Display the keys in the .mat file
print("Keys in the .mat file:", mat_data.keys())

# If the features are stored in a specific variable, you can access it
# For example, if the features are stored under 'features' key
if 'features' in mat_data:
    features = mat_data['features']
    print("Shape of the features:", features.shape)
else:
    print("No 'features' key found in the .mat file.")

features = mat_data['features'].T[1][0]
print(features.shape)




# Load the .mat file
mat_file_path = '/content/drive/MyDrive/SSMT/final_dev_punct_features_speech.mat'  # Replace with your file path
mat_data = scipy.io.loadmat(mat_file_path)

# Display the keys in the .mat file
print("Keys in the .mat file:", mat_data.keys())

# If the features are stored in a specific variable, you can access it
# For example, if the features are stored under 'features' key
if 'features' in mat_data:
    features = mat_data['features']
    print("Shape of the features:", features.shape)
else:
    print("No 'features' key found in the .mat file.")

features = mat_data['features'].T[1][0]
print(features.shape)




# Load the .mat file
mat_file_path = '/content/drive/MyDrive/SSMT/final_test_punct_features_speech.mat'  # Replace with your file path
mat_data = scipy.io.loadmat(mat_file_path)

# Display the keys in the .mat file
print("Keys in the .mat file:", mat_data.keys())

# If the features are stored in a specific variable, you can access it
# For example, if the features are stored under 'features' key
if 'features' in mat_data:
    features = mat_data['features']
    print("Shape of the features:", features.shape)
else:
    print("No 'features' key found in the .mat file.")

features = mat_data['features'].T[1][0]
print(features.shape)

Keys in the .mat file: dict_keys(['__header__', '__version__', '__globals__', 'filenames', 'features'])
Shape of the features: (1, 1545)
(30, 1024)
Keys in the .mat file: dict_keys(['__header__', '__version__', '__globals__', 'filenames', 'features'])
Shape of the features: (1, 255)
(34, 1024)
Keys in the .mat file: dict_keys(['__header__', '__version__', '__globals__', 'filenames', 'features'])
Shape of the features: (1, 399)
(24, 1024)
