In [1]:
%load_ext autoreload
%autoreload 2
from PyHa.statistics import *
from PyHa.IsoAutio import *
from PyHa.visualizations import *
from PyHa.annotation_post_processing import *
import pandas as pd

In [2]:
path = "./mixed_bird/025_Mixed_Bird_Outputs_Train/"

In [3]:
from PyHa.tweetynet_package.tweetynet.Load_data_functions import compute_features, predictions_to_kaleidoscope
from PyHa.microfaune_package.microfaune import audio
import torch.optim as optim
import torch.nn as nn

# PREP

In [4]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
class kaledoscope_dataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file,filepath, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.manual_df = pd.read_csv(csv_file)
        self.manual_df["FOLDER"] = self.manual_df["IN FILE"].apply(lambda x: filepath)
        self.audio_files = self.manual_df[["FOLDER","IN FILE"]].drop_duplicates().reset_index(drop=True)
        self.transform = transform
    def __len__(self):
        return len(self.manual_df)

    def __getitem__(self, idx):
        #TODO: FIX MAGIC NUMBERS
        normalized_sample_rate=44100

        audio_df = self.audio_files.iloc[idx]
        
        audio_dif = audio_df["FOLDER"]
        audio_file = audio_df["IN FILE"]
        try:
            SAMPLE_RATE, SIGNAL = audio.load_wav(audio_dif + audio_file)
        except BaseException as e:
            #print("Failed to load", audio_dif + audio_file)
            #print(e)
            return (-1,-1)
        
        try:
            if SAMPLE_RATE != normalized_sample_rate:
                rate_ratio = normalized_sample_rate / SAMPLE_RATE
                SIGNAL = scipy_signal.resample(
                    SIGNAL, int(len(SIGNAL) * rate_ratio))
                SAMPLE_RATE = normalized_sample_rate
        except:
            print("Failed to Downsample" + audio_file)
            return (-1,-1)
            
        if len(SIGNAL.shape) == 2:
            SIGNAL = SIGNAL.sum(axis=1) / 2
        # detection
        try:
            tweetynet_features = compute_features([SIGNAL])
        except:
            print("Failed to compute features" + audio_file)
            return (-1,-1)
        

            
        label_df = self.manual_df[(self.manual_df["FOLDER"] == audio_df["FOLDER"]) & (self.manual_df["IN FILE"] == audio_df["IN FILE"])]
        return (tweetynet_features, label_df, audio_dif, audio_file, SIGNAL, SAMPLE_RATE)
    
trainloader = kaledoscope_dataset("mixed_bird_manual.csv", path)
trainloader

<__main__.kaledoscope_dataset at 0x1c8e704ac08>

In [5]:
# init detector
device = torch.device('cpu')
net_wrapper = TweetyNetModel(2, (1, 86, 86), 86, device)

# Training

In [6]:
# TODO: DEFINE TEST LOADER

In [7]:
def IOU_Loss(automated_df, manual_df):
    IoUMatrix = torch.tensor(clip_IoU(automated_df, manual_df));
    return torch.mean(IoUMatrix)

def convert_label_to_local_score(manual_df, size_of_local_sorce, start_time = 0):
    duration_of_clip = manual_df.iloc[0]["CLIP LENGTH"]
    seconds_per_index = duration_of_clip/size_of_local_sorce
    local_score = np.zeros(size_of_local_sorce)
    for i in range(size_of_local_sorce):
        current_seconds = i * seconds_per_index + start_time
        annotations_at_time = manual_df[(manual_df["OFFSET"] <= current_seconds) & (manual_df["OFFSET"] +manual_df["DURATION"] >=  current_seconds)]
        if (not annotations_at_time.empty):
            local_score[i] = 1
    
    return torch.tensor(local_score).float()



manual_df  = pd.read_csv("ScreamingPiha_Manual_Labels.csv")
convert_label_to_local_score(manual_df[manual_df["IN FILE"] == "ScreamingPiha5.wav"], 500)

tensor([0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1.,
        1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0.,
        0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
        0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 

In [8]:
net = net_wrapper.model
print(list(net.parameters())[0].grad)

None


In [None]:
%reload_ext autoreload
torch.autograd.set_detect_anomaly(True)
net = net_wrapper.model
for param in net.parameters():
    param.requires_grad = True
optimizer = optim.Adam(net.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

for epoch in range(1):  # loop over the dataset multiple times
    running_loss = 0.0
    batch_loss = 0.0
    mini_batch_count = 0
    batch_count = 0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        try:
            inputs, labels, audio_dif, audio_file, SIGNAL, SAMPLE_RATE = data
        except:
            continue

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        device = torch.device('cpu')
        net = net_wrapper.model
        for param in net.parameters():
            param.requires_grad = True
        
        batch_size=1
        window_size=2
        test_data_loader = DataLoader(inputs, batch_size=batch_size)
        predictions = pd.DataFrame()
        net_wrapper.model.eval()
        local_score = torch.tensor([])
        tmp_local_score = []
        dataiter = iter(test_data_loader)
        _, label, uid = dataiter.next()
        time_bin = float(window_size)/label.shape[1]
        st_time = np.array([time_bin*n for n in range(label.shape[1])])
        count = []
        
        for i, data in enumerate(test_data_loader):
                
            #Run Model
            sub_inputs, sub_labels, uids = data
            sub_inputs = sub_inputs.clone().detach().requires_grad_(True)
            sub_inputs, sub_labels = sub_inputs.to(device), sub_labels.to(device)
            output = net_wrapper.model(sub_inputs, sub_inputs.shape[0], sub_inputs.shape[0])
            
            #Get labels for window
            bins = st_time + (int(uids[0].split("_")[0])*window_size)
            label_for_output = convert_label_to_local_score(manual_df, len([x for x in output[0, 1, :]]), bins[0])
            
            #Set up output for correct batch * class matrix
            output_for_loss = torch.transpose(output.clone()[0].clone(), 0, 1).clone().float()
            
            #Compute Loss
            output = output_for_loss
            label_array =label_for_output
            label_array = label_array.long()
            loss = criterion(output, label_array)
            
            #Backprop + Check for learning
            state_a = net_wrapper.model.state_dict().__str__()
            loss.backward()
            optimizer.step()
            state_b = net_wrapper.model.state_dict().__str__()
            if state_a == state_b:
                print("Network not updating.")


            # print statistics
            running_loss += loss.item()
            batch_loss += loss.item()
            mini_batch_count += 1
            batch_count += 1
            print("=======================")

        print("DONE ONE FILE")
        print(f'[{epoch + 1, mini_batch_count}] loss: {running_loss / mini_batch_count:.3f}')
    print(f'[{epoch + 1, batch_count}] loss: {batch_loss / batch_count:.3f}')
    batch_loss = 0.0
    batch_count = 0

print('Finished Training')
torch.save(net.state_dict(), "./test.h5")

DONE ONE FILE
tensor(0.1808, grad_fn=<NllLossBackward0>)
[(1, 2)] loss: 0.430 None
DONE ONE FILE
tensor(0.0003, grad_fn=<NllLossBackward0>)
[(1, 12)] loss: 0.087 None
DONE ONE FILE
tensor(0.0077, grad_fn=<NllLossBackward0>)
[(1, 16)] loss: 0.076 None
DONE ONE FILE
tensor(0.7278, grad_fn=<NllLossBackward0>)
[(1, 35)] loss: 0.188 None
DONE ONE FILE
tensor(0.0215, grad_fn=<NllLossBackward0>)
[(1, 38)] loss: 0.177 None
DONE ONE FILE
tensor(0.0954, grad_fn=<NllLossBackward0>)
[(1, 39)] loss: 0.175 None
DONE ONE FILE
tensor(2.3931, grad_fn=<NllLossBackward0>)
[(1, 60)] loss: 0.307 None
DONE ONE FILE
tensor(1.0450, grad_fn=<NllLossBackward0>)
[(1, 73)] loss: 0.283 None
DONE ONE FILE
tensor(0.2984, grad_fn=<NllLossBackward0>)
[(1, 90)] loss: 0.255 None
DONE ONE FILE
tensor(0.5601, grad_fn=<NllLossBackward0>)
[(1, 104)] loss: 0.243 None
DONE ONE FILE
tensor(0.0003, grad_fn=<NllLossBackward0>)
[(1, 114)] loss: 0.225 None
DONE ONE FILE
tensor(0.0006, grad_fn=<NllLossBackward0>)
[(1, 118)] loss: 0

DONE ONE FILE
tensor(1.1506, grad_fn=<NllLossBackward0>)
[(1, 307)] loss: 0.455 None
DONE ONE FILE
tensor(0.1357, grad_fn=<NllLossBackward0>)
[(1, 318)] loss: 0.449 None
DONE ONE FILE
tensor(3.2851, grad_fn=<NllLossBackward0>)
[(1, 348)] loss: 0.541 None
DONE ONE FILE
tensor(0.0037, grad_fn=<NllLossBackward0>)
[(1, 351)] loss: 0.537 None
DONE ONE FILE
tensor(0.2372, grad_fn=<NllLossBackward0>)
[(1, 362)] loss: 0.524 None
DONE ONE FILE
tensor(3.1865, grad_fn=<NllLossBackward0>)
[(1, 396)] loss: 0.584 None
DONE ONE FILE
tensor(1.2166, grad_fn=<NllLossBackward0>)
[(1, 421)] loss: 0.577 None
DONE ONE FILE
tensor(2.5673, grad_fn=<NllLossBackward0>)
[(1, 443)] loss: 0.583 None
DONE ONE FILE
tensor(5.9631e-06, grad_fn=<NllLossBackward0>)
[(1, 447)] loss: 0.579 None
DONE ONE FILE
tensor(0.1291, grad_fn=<NllLossBackward0>)
[(1, 455)] loss: 0.569 None
DONE ONE FILE
tensor(1.6168, grad_fn=<NllLossBackward0>)
[(1, 479)] loss: 0.573 None
DONE ONE FILE
tensor(0.0016, grad_fn=<NllLossBackward0>)
[(1,

DONE ONE FILE
tensor(0.1914, grad_fn=<NllLossBackward0>)
[(1, 564)] loss: 0.530 None
DONE ONE FILE
tensor(0.0016, grad_fn=<NllLossBackward0>)
[(1, 571)] loss: 0.524 None
DONE ONE FILE
tensor(0.4547, grad_fn=<NllLossBackward0>)
[(1, 585)] loss: 0.515 None
DONE ONE FILE
tensor(0.0001, grad_fn=<NllLossBackward0>)
[(1, 595)] loss: 0.507 None
DONE ONE FILE
tensor(0.7166, grad_fn=<NllLossBackward0>)
[(1, 608)] loss: 0.498 None
DONE ONE FILE
tensor(1.7371, grad_fn=<NllLossBackward0>)
[(1, 630)] loss: 0.498 None
DONE ONE FILE
tensor(0.9727, grad_fn=<NllLossBackward0>)
[(1, 651)] loss: 0.492 None
DONE ONE FILE
tensor(0.0723, grad_fn=<NllLossBackward0>)
[(1, 662)] loss: 0.484 None
DONE ONE FILE
tensor(0.0254, grad_fn=<NllLossBackward0>)
[(1, 665)] loss: 0.482 None
DONE ONE FILE
tensor(10.5328, grad_fn=<NllLossBackward0>)
[(1, 729)] loss: 0.653 None
DONE ONE FILE
tensor(0.0021, grad_fn=<NllLossBackward0>)
[(1, 739)] loss: 0.645 None


In [None]:
#%reload_ext autoreload
#correct = 0
#total = 0
# since we're not training, we don't need to calculate the gradients for our outputs


#isolation_parameters = {
#     "model" : "tweetynet",
#     "tweety_output": True,
#    "technique" : "steinberg",
#     "threshold_type" : "median",
#     "threshold_const" : 2.0,
#     "threshold_min" : 0.0,
#     "window_size" : 2.0,
#     "chunk_size" : 5.0
#}
#automated_df = generate_automated_labels(path,isolation_parameters, weight_path="./test.h5");
#display(automated_df)
#manual_df = pd.read_csv("mixed_bird_manual.csv")
#statistics_df = automated_labeling_statistics(automated_df,manual_df,stats_type = "general");
#statistics_df
#global_dataset_statistics(statistics_df)