In [1]:

import os
import glob



In [19]:
tensor_paths = glob.glob('./FINISHED_PRODUCT_thresh_0.5_step_0.1/*_combined.pt')
label_paths = glob.glob('./FINISHED_PRODUCT_thresh_0.5_step_0.1/*_label.pt')
# Print out the paths to ensure they are not empty
#print(f"Tensor paths: {tensor_paths}")
#print(f"Label paths: {label_paths}")


In [16]:
#previous dataloading technique
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import glob

class CustomTensorDataset(Dataset):
    def __init__(self, tensor_paths, label_paths):
        self.tensor_paths = tensor_paths
        self.label_paths = label_paths

    def __len__(self):
        return len(self.tensor_paths)

    def __getitem__(self, idx):
        tensor = torch.load(self.tensor_paths[idx])
        label = torch.load(self.label_paths[idx])
        return tensor, label


# Create a dataset and DataLoader
dataset = CustomTensorDataset(tensor_paths, label_paths)
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Example to iterate through the dataloader
for batch_tensors, batch_labels in dataloader:
    print(f"Batch Tensors Shape: {batch_tensors.shape}")
    print(f"Batch Labels Shape: {batch_labels.shape}")
    # Add your training or evaluation code here
    break  # Remove this line to iterate through the entire dataset


Batch Tensors Shape: torch.Size([32, 3, 128, 128])
Batch Labels Shape: torch.Size([32])


  tensor = torch.load(self.tensor_paths[idx])
  label = torch.load(self.label_paths[idx])


In [11]:
# trying to change the stuff such that it doesn't mess up the accuracy
# mainly addressing the issue of having spectrograms from the same audio file in validation, training and test
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from collections import defaultdict
import time

class CustomTensorDataset(Dataset):
    def __init__(self, tensor_paths, label_paths):
        self.tensor_paths = tensor_paths
        self.label_paths = label_paths

    def __len__(self):
        return len(self.tensor_paths)

    def __getitem__(self, idx):
        tensor = torch.load(self.tensor_paths[idx])
        label = torch.load(self.label_paths[idx])
        return tensor, label

def group_paths_by_audio_file(tensor_paths, label_paths):
    grouped_tensor_paths = defaultdict(list)
    grouped_label_paths = defaultdict(list)

    for tensor_path, label_path in zip(tensor_paths, label_paths):
        base_name = '_'.join(os.path.basename(tensor_path).split('_')[:-2])
        grouped_tensor_paths[base_name].append(tensor_path)
        grouped_label_paths[base_name].append(label_path)

    return list(grouped_tensor_paths.values()), list(grouped_label_paths.values())


# Ensure the tensor and label paths are sorted so they match
tensor_paths.sort()
label_paths.sort()

# Group paths by audio file
grouped_tensor_paths, grouped_label_paths = group_paths_by_audio_file(tensor_paths, label_paths)

# Ensure there are tensors and labels
assert len(grouped_tensor_paths) > 0, "No tensor files found."
assert len(grouped_label_paths) > 0, "No label files found."

# Split groups into training, validation, and testing sets
num_files = len(grouped_tensor_paths)
train_size = int(0.7 * num_files)
val_size = int(0.15 * num_files)
test_size = num_files - train_size - val_size

train_tensor_groups, val_tensor_groups, test_tensor_groups = random_split(grouped_tensor_paths, [train_size, val_size, test_size])
train_label_groups, val_label_groups, test_label_groups = random_split(grouped_label_paths, [train_size, val_size, test_size])

# Flatten the grouped lists
train_tensors = [item for sublist in train_tensor_groups for item in sublist]
train_labels = [item for sublist in train_label_groups for item in sublist]
val_tensors = [item for sublist in val_tensor_groups for item in sublist]
val_labels = [item for sublist in val_label_groups for item in sublist]
test_tensors = [item for sublist in test_tensor_groups for item in sublist]
test_labels = [item for sublist in test_label_groups for item in sublist]

# Create datasets and data loaders
train_dataset = CustomTensorDataset(train_tensors, train_labels)
val_dataset = CustomTensorDataset(val_tensors, val_labels)
test_dataset = CustomTensorDataset(test_tensors, test_labels)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# Model Definition

In [7]:
#Resnet implementation of it
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=2):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

def resnet18(num_classes=2):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)

# Example usage:
model = resnet18(num_classes=2)
# model = model.to(device)


In [None]:
#GNet - which is just a modifies google-net for 3x128x128 input files
import torch
import torch.nn as nn
import torch.nn.functional as F

class InceptionModule(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3reduce, ch3x3, ch5x5reduce, ch5x5, pool_proj):
        super(InceptionModule, self).__init__()
        self.branch1 = nn.Conv2d(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, ch3x3reduce, kernel_size=1),
            nn.Conv2d(ch3x3reduce, ch3x3, kernel_size=3, padding=1)
        )

        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, ch5x5reduce, kernel_size=1),
            nn.Conv2d(ch5x5reduce, ch5x5, kernel_size=5, padding=2)
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, pool_proj, kernel_size=1)
        )

    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        outputs = [branch1, branch2, branch3, branch4]
        return torch.cat(outputs, 1)

class GNet(nn.Module):
    def __init__(self):
        super(GNet, self).__init__()

        #Upsample to get the input size from 64x128 to 128x128
        self.upsample = nn.Upsample(size=(128, 128), mode='bilinear', align_corners=True)

        # Initial layers
        self.conv1 = nn.Conv2d(3, 16, kernel_size=7, stride=1, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.conv2 = nn.Conv2d(16, 48, kernel_size=3, stride=1, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Inception modules
        self.inception3a = InceptionModule(48, 64, 48, 128, 16, 32, 32)
        self.inception3b = InceptionModule(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception4a = InceptionModule(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = InceptionModule(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = InceptionModule(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = InceptionModule(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = InceptionModule(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception5a = InceptionModule(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = InceptionModule(832, 384, 192, 384, 48, 128, 128)

        self.avgpool = nn.AvgPool2d(kernel_size=8, stride=1)
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(1024, 2)

    def forward(self, x):
        x = self.upsamle(x)
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)

        x = F.relu(self.conv2(x))
        x = self.maxpool2(x)

        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool3(x)

        x = self.inception4a(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        x = self.inception4e(x)
        x = self.maxpool4(x)

        x = self.inception5a(x)
        x = self.inception5b(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        x = F.softmax(x, dim=1)

        return x


# Model training

In [8]:
import torch

# Set device to MPS if available, otherwise fallback to CPU
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: mps


In [17]:
#previous way
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import time
from torchvision.models import resnet18  # Assuming you're using resnet18



# Split the dataset into training, validation, and testing sets
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create data loaders for each subset
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# Check if GPU is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize model
model = resnet18(num_classes=2)
#model = GNet()

# Parallelize the model if more than one GPU is available
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")
model.to(device)

# Calculate class weights
total_labels = torch.cat([labels for _, labels in DataLoader(train_dataset, batch_size=len(train_dataset))], dim=0)
total_0 = (total_labels == 0).sum().item()
total_1 = (total_labels == 1).sum().item()
class_counts = [total_0, total_1]
# Increase the weight for class 1 to penalize more for misclassification
class_weights = torch.tensor([1.0, 5.0], dtype=torch.float)  # Adjust the weight for class 1 as needed
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

num_epochs = 10
start_time = time.time()

# Training and validation loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    model.eval()
    val_loss = 0.0
    correct_0 = 0
    total_0 = 0
    correct_1 = 0
    total_1 = 0

    with torch.no_grad():
        for batch_idx, (data, labels) in enumerate(val_loader):
            data, labels = data.to(device), labels.to(device)
            outputs = model(data)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_0 += (labels == 0).sum().item()
            correct_0 += ((predicted == labels) & (labels == 0)).sum().item()
            total_1 += (labels == 1).sum().item()
            correct_1 += ((predicted == labels) & (labels == 1)).sum().item()

    val_loss /= len(val_loader)
    val_accuracy_0 = correct_0 / total_0 if total_0 > 0 else 0
    val_accuracy_1 = correct_1 / total_1 if total_1 > 0 else 0

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy (0s): {val_accuracy_0:.4f}, Val Accuracy (1s): {val_accuracy_1:.4f}')

end_time = time.time()

# Save the model
torch.save(model.state_dict(), '/content/gdrive/My Drive/resnet18_model_state.pth')

# Testing phase
model.eval()
test_loss = 0.0
correct_0 = 0
total_0 = 0
correct_1 = 0
total_1 = 0

with torch.no_grad():
    for batch_idx, (data, labels) in enumerate(test_loader):
        data, labels = data.to(device), labels.to(device)
        outputs = model(data)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_0 += (labels == 0).sum().item()
        correct_0 += ((predicted == labels) & (labels == 0)).sum().item()
        total_1 += (labels == 1).sum().item()
        correct_1 += ((predicted == labels) & (labels == 1)).sum().item()

test_loss /= len(test_loader)
test_accuracy_0 = correct_0 / total_0 if total_0 > 0 else 0
test_accuracy_1 = correct_1 / total_1 if total_1 > 0 else 0

print(f'Test Loss: {test_loss:.4f}, Test Accuracy (0s): {test_accuracy_0:.4f}, Test Accuracy (1s): {test_accuracy_1:.4f}')
print(f'Training completed in: {end_time - start_time:.2f} seconds')


Using device: mps
Using device: mps


  tensor = torch.load(self.tensor_paths[idx])
  label = torch.load(self.label_paths[idx])
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/yannbaglinbunod/miniconda3/envs/pytorch_mps_env/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/yannbaglinbunod/miniconda3/envs/pytorch_mps_env/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'CustomTensorDataset' on <module '__main__' (built-in)>


KeyboardInterrupt: 

In [13]:
assert len(tensor_paths) == len(label_paths), "Mismatch between tensor and label paths."


In [14]:
# Check if MPS is available and set device
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize model
model = resnet18(num_classes=2)
model.to(device)

# Calculate class weights
total_labels = torch.cat([labels for _, labels in DataLoader(train_dataset, batch_size=len(train_dataset))], dim=0)
total_0 = (total_labels == 0).sum().item()
total_1 = (total_labels == 1).sum().item()
class_weights = torch.tensor([1.0, 5.0], dtype=torch.float)  # Adjust the weight for class 1 as needed
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

num_epochs = 10
start_time = time.time()

# Training and validation loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    model.eval()
    val_loss = 0.0
    correct_0 = 0
    total_0 = 0
    correct_1 = 0
    total_1 = 0

    with torch.no_grad():
        for batch_idx, (data, labels) in enumerate(val_loader):
            data, labels = data.to(device), labels.to(device)
            outputs = model(data)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_0 += (labels == 0).sum().item()
            correct_0 += ((predicted == labels) & (labels == 0)).sum().item()
            total_1 += (labels == 1).sum().item()
            correct_1 += ((predicted == labels) & (labels == 1)).sum().item()

    val_loss /= len(val_loader)
    val_accuracy_0 = correct_0 / total_0 if total_0 > 0 else 0
    val_accuracy_1 = correct_1 / total_1 if total_1 > 0 else 0

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy (0s): {val_accuracy_0:.4f}, Val Accuracy (1s): {val_accuracy_1:.4f}')

end_time = time.time()

# Save the model
torch.save(model.state_dict(), '/content/gdrive/My Drive/resnet18_model_state.pth')

# Testing phase
model.eval()
test_loss = 0.0
correct_0 = 0
total_0 = 0
correct_1 = 0
total_1 = 0

with torch.no_grad():
    for batch_idx, (data, labels) in enumerate(test_loader):
        data, labels = data.to(device), labels.to(device)
        outputs = model(data)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_0 += (labels == 0).sum().item()
        correct_0 += ((predicted == labels) & (labels == 0)).sum().item()
        total_1 += (labels == 1).sum().item()
        correct_1 += ((predicted == labels) & (labels == 1)).sum().item()

test_loss /= len(test_loader)
test_accuracy_0 = correct_0 / total_0 if total_0 > 0 else 0
test_accuracy_1 = correct_1 / total_1 if total_1 > 0 else 0

print(f'Test Loss: {test_loss:.4f}, Test Accuracy (0s): {test_accuracy_0:.4f}, Test Accuracy (1s): {test_accuracy_1:.4f}')
print(f'Training completed in: {end_time - start_time:.2f} seconds')

Using device: mps


  tensor = torch.load(self.tensor_paths[idx])
  label = torch.load(self.label_paths[idx])


IndexError: list index out of range

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (110,) + inhomogeneous part.

In [None]:
torch.save(model.state_dict(), '/content/gdrive/My Drive/resnet18_model_state.pth')


# Model Evaluation


In [None]:
#Try training many times and see how well the model learns every time

# Define hyperparameters to test
best_hyperparams = {'lr': 0.01, 'momentum': 0.9, 'weight_decay': 0.0}

# Run the test 100 times
results_0 = []
results_1 = []

for i in range(25):
    print(f"Run {i + 1}/100")
    val_accuracies_0, val_accuracies_1 = train_and_evaluate(**best_hyperparams)
    results_0.append(val_accuracies_0)
    results_1.append(val_accuracies_1)

def plot_accuracy_results(results_0, results_1):
    # Convert lists to numpy arrays for easier manipulation
    results_0 = np.array(results_0)
    results_1 = np.array(results_1)

    # Calculate mean and standard deviation
    mean_accuracies_0 = results_0.mean(axis=0)
    std_accuracies_0 = results_0.std(axis=0)
    mean_accuracies_1 = results_1.mean(axis=0)
    std_accuracies_1 = results_1.std(axis=0)

    epochs = np.arange(1, len(mean_accuracies_0) + 1)

    # Plot results for Class 0
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=epochs, y=mean_accuracies_0,
        mode='lines+markers',
        name='Class 0 Accuracy',
        line=dict(color='blue')
    ))
    fig.add_trace(go.Scatter(
        x=epochs, y=mean_accuracies_0 + std_accuracies_0,
        mode='lines',
        name='Class 0 Accuracy + 1 Std',
        line=dict(color='blue', dash='dash')
    ))
    fig.add_trace(go.Scatter(
        x=epochs, y=mean_accuracies_0 - std_accuracies_0,
        mode='lines',
        name='Class 0 Accuracy - 1 Std',
        line=dict(color='blue', dash='dash'),
        fill='tonexty'
    ))
    fig.update_layout(title='Validation Accuracy for Class 0',
                      xaxis_title='Epoch',
                      yaxis_title='Accuracy',
                      width=600, height=400)
    fig.show()

    # Plot results for Class 1
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=epochs, y=mean_accuracies_1,
        mode='lines+markers',
        name='Class 1 Accuracy',
        line=dict(color='red')
    ))
    fig.add_trace(go.Scatter(
        x=epochs, y=mean_accuracies_1 + std_accuracies_1,
        mode='lines',
        name='Class 1 Accuracy + 1 Std',
        line=dict(color='red', dash='dash')
    ))
    fig.add_trace(go.Scatter(
        x=epochs, y=mean_accuracies_1 - std_accuracies_1,
        mode='lines',
        name='Class 1 Accuracy - 1 Std',
        line=dict(color='red', dash='dash'),
        fill='tonexty'
    ))
    fig.update_layout(title='Validation Accuracy for Class 1',
                      xaxis_title='Epoch',
                      yaxis_title='Accuracy',
                      width=600, height=400)
    fig.show()

# Plot the results
plot_accuracy_results(results_0, results_1)


# FULL training and evaluation that works

In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Device Count:", torch.cuda.device_count())
print("CUDA Device Name:", torch.cuda.get_device_name(0))


CUDA Available: True
CUDA Device Count: 1
CUDA Device Name: Tesla T4


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset
import glob
import time
from torchvision.models import resnet18

class CustomTensorDataset(Dataset):
    def __init__(self, tensor_paths, label_paths):
        self.tensor_paths = tensor_paths
        self.label_paths = label_paths

    def __len__(self):
        return len(self.tensor_paths)

    def __getitem__(self, idx):
        tensor = torch.load(self.tensor_paths[idx]).float()
        label = torch.load(self.label_paths[idx]).long()
        return tensor, label

# Confirm GPU availability
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Device Count:", torch.cuda.device_count())
print("CUDA Device Name:", torch.cuda.get_device_name(0))

# Load all paths to tensors and labels
print("Getting all tensor paths")
tensor_paths = glob.glob('/content/gdrive/My Drive/FINISHED_PRODUCT.4/*_combined.pt')
label_paths = glob.glob('/content/gdrive/My Drive/FINISHED_PRODUCT.4/*_label.pt')

# Ensure the tensor and label paths are sorted so they match
tensor_paths.sort()
label_paths.sort()

print("Creating dataset and dataloader")

# Create a dataset and DataLoader
dataset = CustomTensorDataset(tensor_paths, label_paths)

# Split the dataset into training, validation, and testing sets
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create data loaders for each subset
batch_size = 64  # Increase batch size if memory allows
num_workers = 2  # Adjusted based on the system's suggestion
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

# Check if GPU is being used and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize model
model = resnet18(num_classes=2)

# Move model to device
model.to(device)

# Calculate class weights
class_weights = torch.tensor([1.0, 5.0], dtype=torch.float)  # Adjust the weight for class 1 as needed
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
print('Model and criterion moved to device')

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

num_epochs = 10
start_time = time.time()
print('Starting training')
# Training and validation loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        if batch_idx == 0:
            print(f"First batch data device: {data.device}, labels device: {labels.device}")
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    model.eval()
    val_loss = 0.0
    correct_0 = 0
    total_0 = 0
    correct_1 = 0
    total_1 = 0

    with torch.no_grad():
        for batch_idx, (data, labels) in enumerate(val_loader):
            data, labels = data.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            if epoch == 0 and batch_idx == 0:
                print(f"First validation batch data device: {data.device}, labels device: {labels.device}")
            outputs = model(data)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_0 += (labels == 0).sum().item()
            correct_0 += ((predicted == labels) & (labels == 0)).sum().item()
            total_1 += (labels == 1).sum().item()
            correct_1 += ((predicted == labels) & (labels == 1)).sum().item()

    val_loss /= len(val_loader)
    val_accuracy_0 = correct_0 / total_0 if total_0 > 0 else 0
    val_accuracy_1 = correct_1 / total_1 if total_1 > 0 else 0

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy (0s): {val_accuracy_0:.4f}, Val Accuracy (1s): {val_accuracy_1:.4f}')

end_time = time.time()

# Save the model
torch.save(model.state_dict(), 'resnet18_model.pth')

# Testing phase
model.eval()
test_loss = 0.0
correct_0 = 0
total_0 = 0
correct_1 = 0
total_1 = 0

with torch.no_grad():
    for batch_idx, (data, labels) in enumerate(test_loader):
        data, labels = data.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        if batch_idx == 0:
            print(f"First test batch data device: {data.device}, labels device: {labels.device}")
        outputs = model(data)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_0 += (labels == 0).sum().item()
        correct_0 += ((predicted == labels) & (labels == 0)).sum().item()
        total_1 += (labels == 1).sum().item()
        correct_1 += ((predicted == labels) & (labels == 1)).sum().item()

test_loss /= len(test_loader)
test_accuracy_0 = correct_0 / total_0 if total_0 > 0 else 0
test_accuracy_1 = correct_1 / total_1 if total_1 > 0 else 0

print(f'Test Loss: {test_loss:.4f}, Test Accuracy (0s): {test_accuracy_0:.4f}, Test Accuracy (1s): {test_accuracy_1:.4f}')
print(f'Training completed in: {end_time - start_time:.2f} seconds')


CUDA Available: True
CUDA Device Count: 1
CUDA Device Name: Tesla T4
Getting all tensor paths
Creating dataset and dataloader
Using device: cuda
Model and criterion moved to device
Starting training


KeyboardInterrupt: 

In [20]:
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from collections import defaultdict
import time
from torchvision.models import resnet18

class CustomTensorDataset(Dataset):
    def __init__(self, tensor_paths, label_paths):
        self.tensor_paths = tensor_paths
        self.label_paths = label_paths

        # Ensure paths have the same length
        assert len(self.tensor_paths) == len(self.label_paths), f"Mismatch between tensor and label paths: {len(self.tensor_paths)} tensors, {len(self.label_paths)} labels."

    def __len__(self):
        return len(self.tensor_paths)

    def __getitem__(self, idx):
        print(f"Accessing index {idx} - tensor: {self.tensor_paths[idx]}, label: {self.label_paths[idx]}")
        tensor = torch.load(self.tensor_paths[idx])
        label = torch.load(self.label_paths[idx])
        return tensor, label

def group_paths_by_audio_file(tensor_paths, label_paths):
    grouped_tensor_paths = defaultdict(list)
    grouped_label_paths = defaultdict(list)

    for tensor_path, label_path in zip(tensor_paths, label_paths):
        base_name = '_'.join(os.path.basename(tensor_path).split('_')[:-2])
        grouped_tensor_paths[base_name].append(tensor_path)
        grouped_label_paths[base_name].append(label_path)

    return list(grouped_tensor_paths.values()), list(grouped_label_paths.values())


# Ensure the tensor and label paths are sorted so they match
tensor_paths.sort()
label_paths.sort()

# Debug: Check lengths
print(f"Number of tensors: {len(tensor_paths)}")
print(f"Number of labels: {len(label_paths)}")

# Group paths by audio file
grouped_tensor_paths, grouped_label_paths = group_paths_by_audio_file(tensor_paths, label_paths)

# Ensure there are tensors and labels
assert len(grouped_tensor_paths) > 0, "No tensor files found."
assert len(grouped_label_paths) > 0, "No label files found."

# Split groups into training, validation, and testing sets
num_files = len(grouped_tensor_paths)
train_size = int(0.7 * num_files)
val_size = int(0.15 * num_files)
test_size = num_files - train_size - val_size

train_tensor_groups, val_tensor_groups, test_tensor_groups = random_split(grouped_tensor_paths, [train_size, val_size, test_size])
train_label_groups, val_label_groups, test_label_groups = random_split(grouped_label_paths, [train_size, val_size, test_size])

# Flatten the grouped lists
train_tensors = [item for sublist in train_tensor_groups for item in sublist]
train_labels = [item for sublist in train_label_groups for item in sublist]
val_tensors = [item for sublist in val_tensor_groups for item in sublist]
val_labels = [item for sublist in val_label_groups for item in sublist]
test_tensors = [item for sublist in test_tensor_groups for item in sublist]
test_labels = [item for sublist in test_label_groups for item in sublist]

# Debug: Print lengths after flattening
print(f"Train tensors: {len(train_tensors)}, Train labels: {len(train_labels)}")
print(f"Val tensors: {len(val_tensors)}, Val labels: {len(val_labels)}")
print(f"Test tensors: {len(test_tensors)}, Test labels: {len(test_labels)}")

# Create datasets and data loaders
train_dataset = CustomTensorDataset(train_tensors, train_labels)
val_dataset = CustomTensorDataset(val_tensors, val_labels)
test_dataset = CustomTensorDataset(test_tensors, test_labels)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Check if MPS is available and set device
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize model
model = resnet18(num_classes=2)
model.to(device)

# Calculate class weights
total_labels = torch.cat([labels for _, labels in DataLoader(train_dataset, batch_size=len(train_dataset))], dim=0)
total_0 = (total_labels == 0).sum().item()
total_1 = (total_labels == 1).sum().item()
class_weights = torch.tensor([1.0, 5.0], dtype=torch.float)  # Adjust the weight for class 1 as needed
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

num_epochs = 10
start_time = time.time()

# Training and validation loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    model.eval()
    val_loss = 0.0
    correct_0 = 0
    total_0 = 0
    correct_1 = 0
    total_1 = 0

    with torch.no_grad():
        for batch_idx, (data, labels) in enumerate(val_loader):
            data, labels = data.to(device), labels.to(device)
            outputs = model(data)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_0 += (labels == 0).sum().item()
            correct_0 += ((predicted == labels) & (labels == 0)).sum().item()
            total_1 += (labels == 1).sum().item()
            correct_1 += ((predicted == labels) & (labels == 1)).sum().item()

    val_loss /= len(val_loader)
    val_accuracy_0 = correct_0 / total_0 if total_0 > 0 else 0
    val_accuracy_1 = correct_1 / total_1 if total_1 > 0 else 0

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy (0s): {val_accuracy_0:.4f}, Val Accuracy (1s): {val_accuracy_1:.4f}')

end_time = time.time()

# Save the model
torch.save(model.state_dict(), '/content/gdrive/My Drive/resnet18_model_state.pth')

# Testing phase
model.eval()
test_loss = 0.0
correct_0 = 0
total_0 = 0
correct_1 = 0
total_1 = 0

with torch.no_grad():
    for batch_idx, (data, labels) in enumerate(test_loader):
        data, labels = data.to(device), labels.to(device)
        outputs = model(data)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_0 += (labels == 0).sum().item()
        correct_0 += ((predicted == labels) & (labels == 0)).sum().item()
        total_1 += (labels == 1).sum().item()
        correct_1 += ((predicted == labels) & (labels == 1)).sum().item()

test_loss /= len(test_loader)
test_accuracy_0 = correct_0 / total_0 if total_0 > 0 else 0
test_accuracy_1 = correct_1 / total_1 if total_1 > 0 else 0

print(f'Test Loss: {test_loss:.4f}, Test Accuracy (0s): {test_accuracy_0:.4f}, Test Accuracy (1s): {test_accuracy_1:.4f}')
print(f'Training completed in: {end_time - start_time:.2f} seconds')


Number of tensors: 8459
Number of labels: 8459
Train tensors: 5650, Train labels: 6130
Val tensors: 1538, Val labels: 1094
Test tensors: 1271, Test labels: 1235


AssertionError: Mismatch between tensor and label paths: 5650 tensors, 6130 labels.

In [21]:
extra_labels = set(label_paths) - set(tensor_paths)
missing_tensors = set(tensor_paths) - set(label_paths)

print(f"Extra labels: {extra_labels}")
print(f"Missing tensors: {missing_tensors}")


Extra labels: {'./FINISHED_PRODUCT_thresh_0.5_step_0.1/05_swallow_banana_segment_21_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/15_swallow_banana_segment_16_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/09_swallow_banana_segment_10_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/19_swallow_banana_N2_segment_59_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/19_swallow_banana_segment_78_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/18_swallow_banana_segment_96_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/10_swallow_banana_N3_segment_38_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/12_swallow_dry_segment_5_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/27_swallow_banana_N2_segment_13_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/15_swallow_banana_N2_segment_101_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/16_swallow_banana_N2_segment_37_label.pt', './FINISHED_PRODUCT_thresh_0.5_step_0.1/01_swallow_water_segment_10_label.pt', './FINISHED_PR