In [1]:
import os
import numpy as np
import librosa
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# ------------------ Feature Extraction ------------------
def pad_or_truncate(features, target_length):
    """Pad or truncate features to a fixed length."""
    if features.shape[0] < target_length:
        padding = target_length - features.shape[0]
        padded_features = np.pad(features, ((0, padding), (0, 0)), mode='constant')
        return padded_features
    else:
        return features[:target_length, :]

def extract_lp_residual(audio, order=12):
    """Extract Linear Prediction (LP) residual"""
    lpc_coeffs = librosa.lpc(audio, order=order)
    lp_residual = np.convolve(audio, -lpc_coeffs, mode='full')[:len(audio)]
    lp_residual = np.nan_to_num(lp_residual, nan=0.0, posinf=0.0, neginf=0.0)
    return lp_residual

def extract_mgdcc(audio, sr, alpha=0.4, gamma=0.9, num_coeffs=13):
    """Extract Modified Group Delay Cepstral Coefficients (MGDCC)"""
    stft = librosa.stft(audio)
    magnitude = np.abs(stft)
    phase = np.angle(stft)
    group_delay = -np.diff(phase, axis=0)
    modified_gd = group_delay * (magnitude[:-1, :] ** gamma)
    mgd_cepstral = librosa.feature.mfcc(S=modified_gd, n_mfcc=num_coeffs)
    return mgd_cepstral.T

In [2]:
# ------------------ Dataset Loader ------------------
class DysarthriaDataset(Dataset):
    def __init__(self, data_path, severity_mapping, sr=16000, target_length=100):
        self.data_path = data_path
        self.severity_mapping = severity_mapping
        self.sr = sr
        self.target_length = target_length
        self.data = []
        self.labels = []

        severity_to_label = {severity: i for i, severity in enumerate(severity_mapping.keys())}

        for severity, speakers in severity_mapping.items():
            for speaker in speakers:
                speaker_path = os.path.join(data_path, speaker)
                for file in os.listdir(speaker_path):
                    if file.endswith(".wav"):
                        self.data.append(os.path.join(speaker_path, file))
                        self.labels.append(severity_to_label[severity])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        file_path = self.data[idx]
        label = self.labels[idx]
        audio, _ = librosa.load(file_path, sr=self.sr)
        lp_residual = extract_lp_residual(audio)
        features = extract_mgdcc(lp_residual, self.sr)
        features = pad_or_truncate(features, self.target_length)
        features = torch.tensor(features, dtype=torch.float32).unsqueeze(0)  # Add channel dimension
        label = torch.tensor(label, dtype=torch.long)
        return features, label

In [3]:
class DysarthriaSeverityCNN(nn.Module):
    def __init__(self, num_classes=4):
        super(DysarthriaSeverityCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc1_input_size = None  # Placeholder for dynamic computation
        self.fc1 = None
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        
        # Flatten the output of the convolutional layers
        x = x.view(x.size(0), -1)

        # Dynamically initialize fc1 if needed
        if self.fc1 is None:
            self.fc1_input_size = x.size(1)
            self.fc1 = nn.Linear(self.fc1_input_size, 128).to(x.device)

        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)  # Raw logits
        return x

In [4]:
def train_model(model, dataloader, criterion, optimizer, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        correct = 0
        total = 0

        for inputs, labels in tqdm(dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
            # Move data to device
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        if (epoch+1)%10 == 0:
            torch.save(model, f"model_{epoch+1}.pth")
            print(f"Model saved at {epoch+1+25}")
        accuracy = 100 * correct / total
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(dataloader):.4f}, Accuracy: {accuracy:.2f}%")

In [5]:
from torch.utils.data import random_split

severity_mapping = {
    "HIGH": ["M01", "M04", "M12", "F03"],
    "MEDIUM": ["F02", "M07", "M16"],
    "LOW": ["F04", "M05", "M11"],
    "VERY LOW": ["F05", "M08", "M09", "M10", "M14"]
}

dataset_path = "/kaggle/input/dysarthria-data/noisereduced-uaspeech"
dataset = DysarthriaDataset(dataset_path, severity_mapping)
# Splitting into train and eval sets
train_size = int(0.8 * len(dataset))  # 80% training
eval_size = len(dataset) - train_size  # 20% evaluation
train_dataset, eval_dataset = random_split(dataset, [train_size, eval_size])

# Creating DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=6, pin_memory=True)
eval_dataloader = DataLoader(eval_dataset, batch_size=64, shuffle=False, num_workers=6, pin_memory=True)



In [11]:
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Make sure the DysarthriaSeverityCNN class is defined in the script
# Load the entire DataParallel object
data_parallel_model = torch.load("/kaggle/input/models-dysar/model_70.pth")

# Extract the actual model from the DataParallel wrapper
model = data_parallel_model.module

# Move the model to the correct device
model = model.to(device)

# Wrap it again with DataParallel if using multiple GPUs
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

# Check model with dummy input to initialize layers dynamically
dummy_input = torch.zeros(1, 1, 100, 13).to(device)  # Shape matches input feature maps
model(dummy_input)

# Wrap model in DataParallel if multiple GPUs are available
# if torch.cuda.device_count() > 1:
#     print(f"Using {torch.cuda.device_count()} GPUs!")
#     model = nn.DataParallel(model)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

  data_parallel_model = torch.load("/kaggle/input/models-dysar/model_70.pth")


In [45]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.current_device())  # Check current GPU device
print(torch.cuda.get_device_name(0))  # Get GPU name
print(torch.cuda.get_device_name(1))  # Get GPU name

True
0
Tesla T4
Tesla T4


In [7]:
model = model.to(device)  # where device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
device

device(type='cuda')

In [67]:
# Train the model
print("Starting training...")
train_model(
    model=model,
    dataloader=train_dataloader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    num_epochs=75
)

Starting training...


Epoch 1/75: 100%|██████████| 921/921 [04:12<00:00,  3.65it/s]


Epoch 1/75, Loss: 0.5173, Accuracy: 85.16%


Epoch 2/75: 100%|██████████| 921/921 [04:03<00:00,  3.78it/s]


Epoch 2/75, Loss: 0.3789, Accuracy: 86.52%


Epoch 3/75: 100%|██████████| 921/921 [04:00<00:00,  3.83it/s]


Epoch 3/75, Loss: 0.3137, Accuracy: 88.48%


Epoch 4/75: 100%|██████████| 921/921 [04:03<00:00,  3.78it/s]


Epoch 4/75, Loss: 0.2812, Accuracy: 89.60%


Epoch 5/75: 100%|██████████| 921/921 [04:06<00:00,  3.73it/s]


Epoch 5/75, Loss: 0.2599, Accuracy: 89.99%


Epoch 6/75: 100%|██████████| 921/921 [04:09<00:00,  3.69it/s]


Epoch 6/75, Loss: 0.2630, Accuracy: 90.03%


Epoch 7/75: 100%|██████████| 921/921 [04:10<00:00,  3.68it/s]


Epoch 7/75, Loss: 0.2292, Accuracy: 91.19%


Epoch 8/75: 100%|██████████| 921/921 [04:06<00:00,  3.73it/s]


Epoch 8/75, Loss: 0.2147, Accuracy: 91.69%


Epoch 9/75: 100%|██████████| 921/921 [04:07<00:00,  3.72it/s]


Epoch 9/75, Loss: 0.2074, Accuracy: 91.82%


Epoch 10/75: 100%|██████████| 921/921 [04:09<00:00,  3.69it/s]


Model saved at 35
Epoch 10/75, Loss: 0.2003, Accuracy: 92.15%


Epoch 11/75: 100%|██████████| 921/921 [04:08<00:00,  3.70it/s]


Epoch 11/75, Loss: 0.1914, Accuracy: 92.38%


Epoch 12/75: 100%|██████████| 921/921 [04:10<00:00,  3.67it/s]


Epoch 12/75, Loss: 0.1915, Accuracy: 92.41%


Epoch 13/75: 100%|██████████| 921/921 [04:07<00:00,  3.73it/s]


Epoch 13/75, Loss: 0.1825, Accuracy: 92.78%


Epoch 14/75: 100%|██████████| 921/921 [04:09<00:00,  3.70it/s]


Epoch 14/75, Loss: 0.1758, Accuracy: 93.07%


Epoch 15/75: 100%|██████████| 921/921 [04:06<00:00,  3.74it/s]


Epoch 15/75, Loss: 0.1758, Accuracy: 93.12%


Epoch 16/75: 100%|██████████| 921/921 [04:09<00:00,  3.68it/s]


Epoch 16/75, Loss: 0.1762, Accuracy: 93.11%


Epoch 17/75: 100%|██████████| 921/921 [04:12<00:00,  3.64it/s]


Epoch 17/75, Loss: 0.2010, Accuracy: 92.38%


Epoch 18/75: 100%|██████████| 921/921 [04:13<00:00,  3.63it/s]


Epoch 18/75, Loss: 0.1726, Accuracy: 93.18%


Epoch 19/75: 100%|██████████| 921/921 [04:11<00:00,  3.67it/s]


Epoch 19/75, Loss: 0.1619, Accuracy: 93.65%


Epoch 20/75: 100%|██████████| 921/921 [04:08<00:00,  3.70it/s]


Model saved at 45
Epoch 20/75, Loss: 0.1599, Accuracy: 93.67%


Epoch 21/75: 100%|██████████| 921/921 [04:07<00:00,  3.71it/s]


Epoch 21/75, Loss: 0.1575, Accuracy: 93.74%


Epoch 22/75: 100%|██████████| 921/921 [04:08<00:00,  3.70it/s]


Epoch 22/75, Loss: 0.1567, Accuracy: 93.88%


Epoch 23/75: 100%|██████████| 921/921 [04:14<00:00,  3.61it/s]


Epoch 23/75, Loss: 0.1554, Accuracy: 93.82%


Epoch 24/75: 100%|██████████| 921/921 [04:06<00:00,  3.74it/s]


Epoch 24/75, Loss: 0.1546, Accuracy: 93.92%


Epoch 25/75: 100%|██████████| 921/921 [04:06<00:00,  3.74it/s]


Epoch 25/75, Loss: 0.1549, Accuracy: 93.91%


Epoch 26/75: 100%|██████████| 921/921 [04:05<00:00,  3.75it/s]


Epoch 26/75, Loss: 0.1520, Accuracy: 93.95%


Epoch 27/75: 100%|██████████| 921/921 [04:06<00:00,  3.74it/s]


Epoch 27/75, Loss: 0.1507, Accuracy: 94.11%


Epoch 28/75: 100%|██████████| 921/921 [04:13<00:00,  3.63it/s]


Epoch 28/75, Loss: 0.1541, Accuracy: 94.09%


Epoch 29/75: 100%|██████████| 921/921 [04:17<00:00,  3.57it/s]


Epoch 29/75, Loss: 0.1465, Accuracy: 94.24%


Epoch 30/75: 100%|██████████| 921/921 [04:16<00:00,  3.59it/s]


Model saved at 55
Epoch 30/75, Loss: 0.1443, Accuracy: 94.38%


Epoch 31/75: 100%|██████████| 921/921 [04:13<00:00,  3.63it/s]


Epoch 31/75, Loss: 0.1431, Accuracy: 94.50%


Epoch 32/75: 100%|██████████| 921/921 [04:11<00:00,  3.67it/s]


Epoch 32/75, Loss: 0.1460, Accuracy: 94.28%


Epoch 33/75: 100%|██████████| 921/921 [04:34<00:00,  3.35it/s]


Epoch 33/75, Loss: 0.1424, Accuracy: 94.47%


Epoch 34/75: 100%|██████████| 921/921 [04:59<00:00,  3.07it/s]


Epoch 34/75, Loss: 0.1384, Accuracy: 94.56%


Epoch 35/75: 100%|██████████| 921/921 [04:05<00:00,  3.75it/s]


Epoch 35/75, Loss: 0.1387, Accuracy: 94.55%


Epoch 36/75: 100%|██████████| 921/921 [04:15<00:00,  3.60it/s]


Epoch 36/75, Loss: 0.1400, Accuracy: 94.62%


Epoch 37/75: 100%|██████████| 921/921 [04:17<00:00,  3.57it/s]


Epoch 37/75, Loss: 0.1398, Accuracy: 94.53%


Epoch 38/75: 100%|██████████| 921/921 [04:19<00:00,  3.55it/s]


Epoch 38/75, Loss: 0.1395, Accuracy: 94.52%


Epoch 39/75: 100%|██████████| 921/921 [04:18<00:00,  3.56it/s]


Epoch 39/75, Loss: 0.2031, Accuracy: 93.69%


Epoch 40/75: 100%|██████████| 921/921 [04:18<00:00,  3.56it/s]


Model saved at 65
Epoch 40/75, Loss: 0.1545, Accuracy: 94.04%


Epoch 41/75: 100%|██████████| 921/921 [04:21<00:00,  3.52it/s]


Epoch 41/75, Loss: 0.1397, Accuracy: 94.57%


Epoch 42/75: 100%|██████████| 921/921 [04:20<00:00,  3.53it/s]


Epoch 42/75, Loss: 0.1343, Accuracy: 94.75%


Epoch 43/75: 100%|██████████| 921/921 [04:19<00:00,  3.55it/s]


Epoch 43/75, Loss: 0.1306, Accuracy: 95.11%


Epoch 44/75: 100%|██████████| 921/921 [04:14<00:00,  3.62it/s]


Epoch 44/75, Loss: 0.1319, Accuracy: 94.94%


Epoch 45/75: 100%|██████████| 921/921 [04:15<00:00,  3.60it/s]


Epoch 45/75, Loss: 0.1271, Accuracy: 95.02%


Epoch 46/75: 100%|██████████| 921/921 [04:10<00:00,  3.68it/s]


Epoch 46/75, Loss: 0.1278, Accuracy: 95.19%


Epoch 47/75: 100%|██████████| 921/921 [04:07<00:00,  3.72it/s]


Epoch 47/75, Loss: 0.1250, Accuracy: 95.22%


Epoch 48/75: 100%|██████████| 921/921 [04:12<00:00,  3.65it/s]


Epoch 48/75, Loss: 0.1294, Accuracy: 95.02%


Epoch 49/75: 100%|██████████| 921/921 [04:07<00:00,  3.71it/s]


Epoch 49/75, Loss: 0.1252, Accuracy: 95.30%


Epoch 50/75: 100%|██████████| 921/921 [04:10<00:00,  3.68it/s]


Model saved at 75
Epoch 50/75, Loss: 0.1273, Accuracy: 95.19%


Epoch 51/75: 100%|██████████| 921/921 [04:17<00:00,  3.58it/s]


Epoch 51/75, Loss: 0.1204, Accuracy: 95.37%


Epoch 52/75: 100%|██████████| 921/921 [04:14<00:00,  3.63it/s]


Epoch 52/75, Loss: 0.1224, Accuracy: 95.36%


Epoch 53/75: 100%|██████████| 921/921 [04:12<00:00,  3.65it/s]


Epoch 53/75, Loss: 0.1197, Accuracy: 95.43%


Epoch 54/75: 100%|██████████| 921/921 [04:10<00:00,  3.68it/s]


Epoch 54/75, Loss: 0.1227, Accuracy: 95.25%


Epoch 55/75: 100%|██████████| 921/921 [04:13<00:00,  3.63it/s]


Epoch 55/75, Loss: 0.1180, Accuracy: 95.60%


Epoch 56/75: 100%|██████████| 921/921 [04:10<00:00,  3.67it/s]


Epoch 56/75, Loss: 0.1196, Accuracy: 95.52%


Epoch 57/75: 100%|██████████| 921/921 [04:14<00:00,  3.62it/s]


Epoch 57/75, Loss: 0.1168, Accuracy: 95.62%


Epoch 58/75: 100%|██████████| 921/921 [04:10<00:00,  3.68it/s]


Epoch 58/75, Loss: 0.1179, Accuracy: 95.57%


Epoch 59/75: 100%|██████████| 921/921 [04:13<00:00,  3.64it/s]


Epoch 59/75, Loss: 0.1171, Accuracy: 95.56%


Epoch 60/75: 100%|██████████| 921/921 [04:13<00:00,  3.63it/s]


Model saved at 85
Epoch 60/75, Loss: 0.1199, Accuracy: 95.43%


Epoch 61/75: 100%|██████████| 921/921 [04:11<00:00,  3.66it/s]


Epoch 61/75, Loss: 0.1160, Accuracy: 95.64%


Epoch 62/75: 100%|██████████| 921/921 [04:16<00:00,  3.59it/s]


Epoch 62/75, Loss: 0.1171, Accuracy: 95.59%


Epoch 63/75: 100%|██████████| 921/921 [04:20<00:00,  3.53it/s]


Epoch 63/75, Loss: 0.1147, Accuracy: 95.76%


Epoch 64/75: 100%|██████████| 921/921 [04:18<00:00,  3.56it/s]


Epoch 64/75, Loss: 0.1122, Accuracy: 95.77%


Epoch 65/75: 100%|██████████| 921/921 [04:04<00:00,  3.76it/s]


Epoch 65/75, Loss: 0.1157, Accuracy: 95.75%


Epoch 66/75: 100%|██████████| 921/921 [04:04<00:00,  3.76it/s]


Epoch 66/75, Loss: 0.1175, Accuracy: 95.53%


Epoch 67/75: 100%|██████████| 921/921 [04:04<00:00,  3.76it/s]


Epoch 67/75, Loss: 0.1119, Accuracy: 95.75%


Epoch 68/75: 100%|██████████| 921/921 [04:03<00:00,  3.79it/s]


Epoch 68/75, Loss: 0.1102, Accuracy: 95.84%


Epoch 69/75: 100%|██████████| 921/921 [04:02<00:00,  3.80it/s]


Epoch 69/75, Loss: 0.1137, Accuracy: 95.75%


Epoch 70/75: 100%|██████████| 921/921 [04:23<00:00,  3.50it/s]


Model saved at 95
Epoch 70/75, Loss: 0.1107, Accuracy: 95.90%


Epoch 71/75: 100%|██████████| 921/921 [04:22<00:00,  3.51it/s]


Epoch 71/75, Loss: 1.1805, Accuracy: 94.79%


Epoch 72/75: 100%|██████████| 921/921 [04:23<00:00,  3.49it/s]


Epoch 72/75, Loss: 0.1674, Accuracy: 93.94%


Epoch 73/75: 100%|██████████| 921/921 [04:22<00:00,  3.51it/s]


Epoch 73/75, Loss: 0.1397, Accuracy: 94.90%


Epoch 74/75: 100%|██████████| 921/921 [04:23<00:00,  3.49it/s]


Epoch 74/75, Loss: 0.1267, Accuracy: 95.34%


Epoch 75/75: 100%|██████████| 921/921 [04:24<00:00,  3.49it/s]

Epoch 75/75, Loss: 0.1234, Accuracy: 95.38%





In [29]:
# Save only the state dictionary
torch.save(model.module.state_dict(), "model_50.pth")


In [34]:
import shutil
from IPython.display import HTML

In [36]:
def create_download_link(filename):
    shutil.move(filename, f"/kaggle/working/{filename}")
    return HTML(f'<a href="{filename}" download>{filename}</a>')

# Generate the download link for the saved model
create_download_link("model.pth")

In [12]:
# Evaluation loop
def evaluate_model(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(dataloader)
    print(f"Validation Loss: {avg_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    return avg_loss, accuracy

# Evaluate the model on validation set
print("Evaluating model...")
evaluate_model(model, eval_dataloader, criterion, device)

Evaluating model...


Evaluating: 100%|██████████| 231/231 [01:05<00:00,  3.54it/s]

Validation Loss: 0.1598, Validation Accuracy: 97.31%





(0.1598398262358702, 97.3057346454021)