In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [None]:
class AudioDataset(Dataset):
    def __init__(self, mfcc_features, labels):
        self.features = torch.FloatTensor(mfcc_features)
        self.labels = torch.LongTensor(labels)
        
    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

class Simple1DCNN(nn.Module):
    def __init__(self, input_features=40, input_length=150):
        super(Simple1DCNN, self).__init__()
        self.conv1 = nn.Conv1d(input_features, 64, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        # Calculate size after first Conv and Pool
        self.size_after_conv1 = (input_length + 2*1 - 3) // 1 + 1
        self.size_after_pool1 = (self.size_after_conv1 - 2) // 2 + 1

        self.flatten = nn.Flatten()
        # Adjust the linear layer size
        self.fc = nn.Linear(64 * self.size_after_pool1, 2)

    def forward(self, x):
        x = self.pool1(nn.ReLU()(self.conv1(x)))
        x = self.flatten(x)
        x = self.fc(x)
        return x

def train_model(model, train_loader, val_loader, num_epochs=50, learning_rate=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_acc, val_acc = [], []
    train_losses, val_losses = [], []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0

        for data in train_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_losses.append(running_loss / len(train_loader))
        train_acc.append(correct / total)

        model.eval()
        val_loss = 0.0
        correct, total = 0, 0
        with torch.no_grad():
            for data in val_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_losses.append(val_loss / len(val_loader))
        val_acc.append(correct / total)

        print(f'Epoch {epoch+1}/{num_epochs} - Train Loss: {train_losses[-1]:.4f} - Train Acc: {train_acc[-1]*100:.2f}% - Val Loss: {val_losses[-1]:.4f} - Val Acc: {val_acc[-1]*100:.2f}%')

    return model, train_acc, val_acc, train_losses, val_losses


Epoch 1/50 - Train Loss: 1.6405 - Train Acc: 51.00% - Val Loss: 0.2497 - Val Acc: 86.00%
Epoch 2/50 - Train Loss: 0.4404 - Train Acc: 79.50% - Val Loss: 0.0526 - Val Acc: 98.50%
Epoch 3/50 - Train Loss: 0.0857 - Train Acc: 96.50% - Val Loss: 0.0144 - Val Acc: 100.00%
Epoch 4/50 - Train Loss: 0.0080 - Train Acc: 100.00% - Val Loss: 0.0039 - Val Acc: 100.00%
Epoch 5/50 - Train Loss: 0.0035 - Train Acc: 100.00% - Val Loss: 0.0024 - Val Acc: 100.00%
Epoch 6/50 - Train Loss: 0.0024 - Train Acc: 100.00% - Val Loss: 0.0020 - Val Acc: 100.00%
Epoch 7/50 - Train Loss: 0.0020 - Train Acc: 100.00% - Val Loss: 0.0017 - Val Acc: 100.00%
Epoch 8/50 - Train Loss: 0.0017 - Train Acc: 100.00% - Val Loss: 0.0015 - Val Acc: 100.00%
Epoch 9/50 - Train Loss: 0.0015 - Train Acc: 100.00% - Val Loss: 0.0013 - Val Acc: 100.00%
Epoch 10/50 - Train Loss: 0.0013 - Train Acc: 100.00% - Val Loss: 0.0012 - Val Acc: 100.00%
Epoch 11/50 - Train Loss: 0.0012 - Train Acc: 100.00% - Val Loss: 0.0010 - Val Acc: 100.00%
Ep

(Simple1DCNN(
   (conv1): Conv1d(40, 64, kernel_size=(3,), stride=(1,), padding=(1,))
   (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (flatten): Flatten(start_dim=1, end_dim=-1)
   (fc): Linear(in_features=16000, out_features=2, bias=True)
 ),
 [0.51,
  0.795,
  0.965,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0],
 [0.86,
  0.985,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0],
 [1.

In [58]:
import pandas as pd

df = pd.read_csv('datasets/DAIC-WOZ/Patient_Classes.csv')

def segment_mfcc(mfcc, max_segment_length=200):
    # Segment the MFCC array into fixed lengths with possible overlap (if desired)
    # segment_length is the fixed length of each segment
    segments = []
    for start in range(0, mfcc.shape[1], max_segment_length):
        end = start + max_segment_length
        if end < mfcc.shape[1]:
            segments.append(mfcc[:, start:end])
        else:
            # Padding the last segment if it's shorter than the required segment length
            segments.append(np.pad(mfcc[:, start:], ((0,0), (0, max_segment_length - (mfcc.shape[1] - start))), 'constant'))
    return segments

def load_concatenated_mfcc(path):
    
    # Load the concatenated MFCC data
    data = np.load(path)
    return data['mfcc']

def create_datasets(df, max_segment_length=500):
    dataset = {'train': [], 'test': []}
    labels = {'train': [], 'test': []}

    for _, row in df.iterrows():
        patient_id = row['Participant_ID']
        label = row['PHQ8_Binary']
        print(patient_id, label)
        train_or_test = row['dataset']  # Could be 'train' or 'test'
        # if train_or_test is 'dev' change it to 'test'
        if train_or_test == 'dev':
            train_or_test = 'test'


        mfcc_path = f'datasets/DAIC-WOZ/ConcatenatedMFCC/concatenated_mfcc_{patient_id}.npz'

        try:
            mfcc = load_concatenated_mfcc(mfcc_path)  # Load the raw MFCC data
            segments = segment_mfcc(mfcc, max_segment_length=max_segment_length)
            
            # Append each segment to the corresponding dataset
            for segment in segments:
                dataset[train_or_test].append(segment)
                labels[train_or_test].append(label)
        except Exception as e:
            print(f"Failed to process patient {patient_id}: {e}")

    return dataset, labels


MAX_SEGMENT_LENGTH = 200
dataset, labels = create_datasets(df, max_segment_length=MAX_SEGMENT_LENGTH)

303 0
304 0
305 0
310 0
312 0
313 0
315 0
316 0
317 0
318 0
319 1
320 1
321 1
322 0
324 0
325 1
326 0
327 0
328 0
330 1
333 0
336 0
338 1
339 1
340 0
341 0
343 0
344 1
345 1
347 1
348 1
350 1
351 1
352 1
353 1
355 1
356 1
357 0
358 0
360 0
362 1
363 0
364 0
366 0
368 0
369 0
370 0
371 0
372 1
374 0
375 0
376 1
379 0
380 1
383 0
385 0
386 1
391 0
392 0
393 0
397 0
400 0
401 0
402 1
409 0
412 1
414 1
415 0
416 0
419 0
423 0
425 0
426 1
427 0
428 0
429 0
430 0
433 1
434 0
437 0
441 1
443 0
444 0
445 0
446 0
447 0
448 1
449 0
454 0
455 0
456 0
457 0
459 1
463 0
464 0
468 0
471 0
473 0
474 0
475 0
478 0
479 0
485 0
486 0
487 0
488 0
491 0
Failed to process patient 491: [Errno 2] No such file or directory: 'datasets/DAIC-WOZ/ConcatenatedMFCC/concatenated_mfcc_491.npz'
302 0
307 0
331 0
335 1
346 1
367 1
377 1
381 1
382 0
388 1
389 1
390 0
395 0
403 0
404 0
406 0
413 1
417 0
418 1
420 0
422 1
436 0
439 0
440 1
451 0
458 0
472 0
476 0
477 0
482 0
483 1
484 0
489 0
Failed to process patient 489

In [54]:
dataset = AudioDataset(dataset["train"], labels["test"])

  self.features = torch.FloatTensor(mfcc_features)


In [55]:
loader = DataLoader(dataset, batch_size=2, shuffle=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Simple1DCNN(input_length=MAX_SEGMENT_LENGTH).to(device)
#criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, loader, loader, num_epochs=50, learning_rate=0.001)

Epoch 1/50 - Train Loss: 1.2197 - Train Acc: 61.88% - Val Loss: 0.5791 - Val Acc: 69.76%
Epoch 2/50 - Train Loss: 0.6191 - Train Acc: 66.94% - Val Loss: 0.5883 - Val Acc: 68.94%
Epoch 3/50 - Train Loss: 0.6023 - Train Acc: 67.78% - Val Loss: 0.5789 - Val Acc: 68.57%
Epoch 4/50 - Train Loss: 0.5902 - Train Acc: 68.81% - Val Loss: 0.5424 - Val Acc: 71.36%
Epoch 5/50 - Train Loss: 0.5852 - Train Acc: 70.54% - Val Loss: 0.6216 - Val Acc: 65.96%
Epoch 6/50 - Train Loss: 0.5721 - Train Acc: 69.39% - Val Loss: 0.5219 - Val Acc: 73.55%
Epoch 7/50 - Train Loss: 0.5551 - Train Acc: 71.55% - Val Loss: 0.5115 - Val Acc: 73.39%
Epoch 8/50 - Train Loss: 0.5530 - Train Acc: 71.62% - Val Loss: 0.5590 - Val Acc: 68.25%
Epoch 9/50 - Train Loss: 0.5468 - Train Acc: 73.12% - Val Loss: 0.5198 - Val Acc: 74.94%
Epoch 10/50 - Train Loss: 0.5310 - Train Acc: 73.07% - Val Loss: 0.5080 - Val Acc: 74.07%
Epoch 11/50 - Train Loss: 0.5089 - Train Acc: 74.28% - Val Loss: 0.5220 - Val Acc: 74.57%
Epoch 12/50 - Train

### Random dummy for development purposes

In [None]:
    # Dummy data (10 samples, 40 MFCC features, varying length of 150)
PARTICIPANTS = 200 # automatically has to be defined 
MFCC_LENGTH = 500 # I have to control the numbeer of MFCC features (so like cutting or whatever)

mfcc_features = np.random.randn(PARTICIPANTS, 40, MFCC_LENGTH) # 10 pcs of 40 MFCC features with length of 150
# (200, 40, 500) 
labels = np.random.randint(0, 2, size=(PARTICIPANTS,)) # so the labels 0 2 means labels between 0 and 1

dataset = AudioDataset(mfcc_features, labels)
loader = DataLoader(dataset, batch_size=2, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Simple1DCNN(input_length=MFCC_LENGTH).to(device)
#criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, loader, loader, num_epochs=50, learning_rate=0.001)