In [2]:
# the cnn module provides classes for training/predicting with various types of CNNs
from opensoundscape import AudioFileDataset, SpectrogramPreprocessor
from opensoundscape.ml.utils import collate_audio_samples_to_tensors
from torch.utils.data import DataLoader

#other utilities and packages
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import sklearn
from tqdm.auto import tqdm
import wandb

#non-tutorial
import os

torch.manual_seed(0)
np.random.seed(0)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  from tqdm.autonotebook import tqdm


In [3]:
#Create metadata CSV for OpenSoundScape module

data_dir = os.path.join(os.path.dirname(os.getcwd()), 'data')

filepath_presence_dict = {"filepath":[], "presence":[]}

weto_train_dir = os.path.join(data_dir, 'weto', 'train')
for set_key in ['positive', 'negative']:
    set_dir = os.path.join(weto_train_dir, set_key)
    filenames = os.listdir(set_dir)
    filepath_list = filepath_presence_dict["filepath"]
    presence_list = filepath_presence_dict["presence"]
    for name in filenames:
        filepath = os.path.join(set_dir, name)
        filepath_list.append(filepath)
    filepath_presence_dict.update({'filepath': filepath_list})
    if set_key == "positive":
        presence_list = list(np.repeat(1, len(filenames)))
        filepath_presence_dict.update({'presence': presence_list})
    if set_key == "negative":
        presence_list.extend(list(np.repeat(0, len(filenames))))
        filepath_presence_dict.update({'presence': presence_list})       

meta_weto = pd.DataFrame(filepath_presence_dict).set_index('filepath')

train_df, valid_df = sklearn.model_selection.train_test_split(meta_weto, test_size=0.15, random_state=0)
print(f"created train_df (len {len(train_df)}) and valid_df (len {len(valid_df)})")
print(f"There are {np.sum(train_df['presence'])} positive samples in train_df")
print(f"There are {np.sum(train_df['presence']==0)} negative samples in train_df")

created train_df (len 1143) and valid_df (len 202)
There are 500 positive samples in train_df
There are 643 negative samples in train_df


In [4]:
train_df = train_df.sample(n=20, axis = 0, random_state=3)
valid_df = train_df.sample(n=10, axis = 0, random_state=3)

print(f"created train_df (len {len(train_df)}) and valid_df (len {len(valid_df)})")
print(f"There are {np.sum(train_df['presence'])} positive samples in train_df")
print(f"There are {np.sum(train_df['presence']==0)} negative samples in train_df")

created train_df (len 20) and valid_df (len 10)
There are 6 positive samples in train_df
There are 14 negative samples in train_df


In [5]:
from torchvision import models

# List available models
all_models = models.list_models()
classification_models = models.list_models(module=models)
list(classification_models)

['alexnet',
 'convnext_base',
 'convnext_large',
 'convnext_small',
 'convnext_tiny',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b2',
 'efficientnet_b3',
 'efficientnet_b4',
 'efficientnet_b5',
 'efficientnet_b6',
 'efficientnet_b7',
 'efficientnet_v2_l',
 'efficientnet_v2_m',
 'efficientnet_v2_s',
 'googlenet',
 'inception_v3',
 'maxvit_t',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet_v2',
 'mobilenet_v3_large',
 'mobilenet_v3_small',
 'regnet_x_16gf',
 'regnet_x_1_6gf',
 'regnet_x_32gf',
 'regnet_x_3_2gf',
 'regnet_x_400mf',
 'regnet_x_800mf',
 'regnet_x_8gf',
 'regnet_y_128gf',
 'regnet_y_16gf',
 'regnet_y_1_6gf',
 'regnet_y_32gf',
 'regnet_y_3_2gf',
 'regnet_y_400mf',
 'regnet_y_800mf',
 'regnet_y_8gf',
 'resnet101',
 'resnet152',
 'resnet18',
 'resnet34',
 'resnet50',
 'resnext101_32x8d',
 'resnext101_64x4d',
 'resnext50_32x4d',
 'shufflenet_v2_x0_5',
 'shufflenet_v2_x1_0',
 'sh

In [6]:
# Define sweep config
sweep_configuration = {
    "name": "sweepdemo",
    "method": "bayes",
    "metric": {"goal": "minimize", "name": "validation_loss"},
    "parameters": {
        "learning_rate": {"min": 0.0001, "max": 0.1},
        "batch_size": {"values": [64]},
        "epochs": {"values": [2, 5]},
        "optimizer": {"values": ["adam", "sgd"]},
        "weights": {"values": [None]},
        "architecture": {"values": ["resnet50"]},
        "window_samples": {"values": [200]},
        "prob_cutoff":{"values":[0.5]}
    },
}

# Initialize sweep by passing in config.
# (Optional) Provide a name of the project.
sweep_id = wandb.sweep(sweep=sweep_configuration, project="my-first-sweep")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: bcov2mt1
Sweep URL: https://wandb.ai/pca_bioacoustics/my-first-sweep/sweeps/bcov2mt1


#### Define Pipeline
1. Make the model,
2. Train the model

In [7]:
import wandb

def model_pipeline():

  # tell wandb to get started
  run = wandb.init(project="my-first-sweep", 
                  #config=hyperparameters, 
                  entity='pca_bioacoustics',
                  name='sweeper')


  # access all HPs through wandb.config, so logging matches execution!
  config = wandb.config

  # make the model, data, and optimization problem
  model, train_loader, test_loader, criterion, optimizer = make(config)

  # and use them to train the model
  train(model, train_loader, test_loader, criterion, optimizer, config, log_images = False)

  # and test its final performance
  #test(model, test_loader)

  return model

In [8]:
def make(config):
    # Make the data
    train_data, valid_data = get_data(train=True), get_data(train=False)
    train_loader = make_loader(train_data, batch_size=config.batch_size)
    test_loader = make_loader(valid_data, batch_size=config.batch_size)

    # Make the model
    model = get_model(config.batch_size, config.architecture)

    # Make the loss and optimizer
    #criterion = nn.BCEWithLogitsLoss() #Sigmoid built-in
    criterion = nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)
    
    return model, train_loader, test_loader, criterion, optimizer


#### Define the data loading

In [9]:
#Define data preprocessors and use dataloader collate function to return PyTorch DataLoader format

def get_data(train=True):
    preprocessor = SpectrogramPreprocessor(sample_duration=3,height=224,width=224)
    preprocessor.pipeline.to_spec.params.window_samples = 200
    preprocessor.pipeline.bandpass.bypass=True # Doesn't work with <22k hz samples
    if train == True:
        dataset = AudioFileDataset(train_df,preprocessor)
    if train == False:
        dataset = AudioFileDataset(valid_df,preprocessor)
        dataset.bypass_augmentations = True # Remove augmentations
    
    return dataset


def make_loader(dataset, batch_size):
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=batch_size, 
                                         shuffle=True,
                                         collate_fn = collate_audio_samples_to_tensors)
    return loader

#### Define the model

In [10]:
def get_model(batch_size, architecture):
    # Define the model
    # No weights - random initialization
    model = getattr(models, architecture)(weights=None)

    # Replace the last layer (number of classes; sigmoid)
    num_features = model.fc.in_features
    #model.fc = nn.Linear(num_features, 1) #No need for sigmoid - located in loss func
                                          #Binary classifier, therefore output is of length 1
    model.fc = nn.Sequential(nn.Linear(in_features=num_features,
                                       out_features=1),
                             nn.Sigmoid()
                             )
    # Replace the first and second layers (number of channels and batch size)
    model.conv1 = nn.Conv2d(1, batch_size, kernel_size=7, stride=2, padding=3,bias=False)
    model.bn1 = nn.BatchNorm2d(batch_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    model.to(device)

    return(model)

#### Define training logic

In [16]:
def train(model, train_loader, valid_loader, criterion, optimizer, config, log_images = True):
    # tell wandb to watch what the model gets up to: gradients, weights, and more!
    wandb.watch(model, criterion, log="all", log_freq=10)

    #  creating log
    log_dict = {
        'training_loss_per_batch': [],
        'validation_loss_per_batch': [],
        'training_accuracy_per_epoch': [],
        'validation_accuracy_per_epoch': []
    } 

    model.train()

    for epoch in tqdm(range(config.epochs)):
        print(f'Epoch {epoch+1}/{config.epochs}')

        #  training
        print('training...')
        train_losses = []
        model.train()
        for _, (inputs, labels) in enumerate(train_loader):
            # Move the data to the device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward + backward + optimize
            outputs = model(inputs)
            train_loss = criterion(outputs.float(), labels.float())
            log_dict['training_loss_per_batch'].append(train_loss.item())
            train_losses.append(train_loss.item())
            train_loss.backward()
            optimizer.step()
        with torch.no_grad():
            # Update the training metrics
            print('deriving training accuracy...')
            train_acc = accuracy(model, train_loader)
            log_dict['training_accuracy_per_epoch'].append(train_acc)
            print(f'training accuracy: {train_acc}')



        #  validation
        print('validating...')
        val_losses = []

        #Setting network to eval mode
        model.eval()
        with torch.no_grad():
            for _, (inputs, labels) in enumerate(valid_loader):
                # Move the data to the device
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Predict
                outputs = model(inputs)
                val_loss = criterion(outputs.float(), labels.float())
                log_dict['validation_loss_per_batch'].append(val_loss.item())
                val_losses.append(val_loss.item())

            #  computing accuracy
            print('deriving validation accuracy...')
            val_acc = accuracy(model, valid_loader)
            log_dict['validation_accuracy_per_epoch'].append(val_acc)

        wandb.log({"epoch": epoch,
                   "train_loss": train_loss,
                   "train_acc": train_acc,
                   "val_loss": val_loss,
                   "val_acc": val_acc})

In [15]:
#  defining accuracy function
def accuracy(network, dataloader):
  network.eval()
  total_correct = 0
  total_instances = 0
  for images, labels in tqdm(dataloader):
    images, labels = images.to(device), labels.to(device)
    predictions = torch.argmax(network(images), dim=1)
    correct_predictions = sum(predictions==labels).item()
    total_correct+=correct_predictions
    total_instances+=len(images)
  return round(total_correct/total_instances, 3)

Try this: https://pytorch.org/torcheval/main/metric_example.html

In [13]:
dconfig = dict(
    epochs=3,
    classes=1,
    kernels=[16, 32],
    batch_size=64,
    learning_rate=0.005,
    weights = None,
    architecture="resnet50",
    window_samples=200)

In [18]:
# Build, train and analyze the model with the pipeline
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#model = model_pipeline()
wandb.agent(sweep_id, function=model_pipeline) #, count=4)

wandb: Agent Starting Run: p3znoeck with config:
wandb: 	architecture: resnet50
wandb: 	batch_size: 64
wandb: 	epochs: 2
wandb: 	learning_rate: 0.02127632018351355
wandb: 	optimizer: sgd
wandb: 	prob_cutoff: 0.5
wandb: 	weights: None
wandb: 	window_samples: 200
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
  self._sweep_config = config_util.dict_from_config_file(


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1/2
training...
deriving training accuracy...


  0%|          | 0/1 [00:00<?, ?it/s]

  from IPython.core.display import HTML, display  # type: ignore


  from IPython.core.display import display


VBox(children=(Label(value='0.001 MB of 0.005 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.226661…

Run p3znoeck errored: RuntimeError('a Tensor with 20 elements cannot be converted to Scalar')
wandb: ERROR Run p3znoeck errored: RuntimeError('a Tensor with 20 elements cannot be converted to Scalar')
wandb: Agent Starting Run: p01h1s1d with config:
wandb: 	architecture: resnet50
wandb: 	batch_size: 64
wandb: 	epochs: 2
wandb: 	learning_rate: 0.056124006750139586
wandb: 	optimizer: adam
wandb: 	prob_cutoff: 0.5
wandb: 	weights: None
wandb: 	window_samples: 200
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
  self._sweep_config = config_util.dict_from_config_file(


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1/2
training...
deriving training accuracy...


  0%|          | 0/1 [00:00<?, ?it/s]

  super().__setattr__('_state_dict_pre_hooks', OrderedDict())
  super().__setattr__('_state_dict_pre_hooks', OrderedDict())
  super().__setattr__('_state_dict_pre_hooks', OrderedDict())
  super().__setattr__('_state_dict_pre_hooks', OrderedDict())
  super().__setattr__('_state_dict_pre_hooks', OrderedDict())
  super().__setattr__('_state_dict_pre_hooks', OrderedDict())
  from IPython.core.display import HTML, display  # type: ignore


Run p01h1s1d errored: RuntimeError('a Tensor with 20 elements cannot be converted to Scalar')
wandb: ERROR Run p01h1s1d errored: RuntimeError('a Tensor with 20 elements cannot be converted to Scalar')
wandb: Agent Starting Run: a51b5x7d with config:
wandb: 	architecture: resnet50
wandb: 	batch_size: 64
wandb: 	epochs: 5
wandb: 	learning_rate: 0.01105753168240567
wandb: 	optimizer: adam
wandb: 	prob_cutoff: 0.5
wandb: 	weights: None
wandb: 	window_samples: 200
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
  self._sweep_config = config_util.dict_from_config_file(


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5
training...


In [17]:
wandb.finish()