# Deep Learning Classification of fdom ANomaly Peaks with Resnet
Using k-fold validation

In [1]:
# imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import sys
from sklearn import preprocessing
from resnet import ResNet1D
from tqdm import tqdm
from sklearn.model_selection import KFold

sys.path.insert(1, "../")

from datasets import fdomDataset


In [2]:
# util functions
def reset_weights(model):
    for layer in model.children():
        if hasattr(layer, 'reset_parameters'):
            print(f'reset trainable params of layer = {layer}')
            layer.reset_parameters()

In [3]:
# Hyperparams
WINDOW_SIZE = 15 # the size of each data segment
SEED = 42
BATCH_SIZE = 32
EPOCHS = 2
SPLITS = 5

In [4]:
# Paths to data files
fdom_raw_data = (
    "../Data/converted_data/julian_format/fDOM_raw_10.1.2011-9.4.2020.csv"
)
stage_raw_data = "../Data/converted_data/julian_format/stage_10.1.11-1.1.19.csv"
turb_raw_data = (
    "../Data/converted_data/julian_format/turbidity_raw_10.1.2011_9.4.2020.csv"
)

fdom_labeled = "../Data/labeled_data/ground_truths/fDOM/fDOM_all_julian_0k-300k.csv"


In [5]:
# get device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [6]:
# create dataset
classes = ["NAP", "FSK", "FPT", "PLP", "PP", "SKP"]
le = preprocessing.LabelEncoder()

targets = le.fit_transform(classes)

dataset = fdomDataset(
    le,
    fdom_raw_data,
    stage_raw_data,
    turb_raw_data,
    fdom_labeled,
    window_size=WINDOW_SIZE
)

  peaks, props = find_peaks(
  peaks, props = find_peaks(


In [7]:
# TODO: implement k-fold cross validation
torch.manual_seed(42)
results = {}

kfold = KFold(n_splits=SPLITS, shuffle=True)

criterion = nn.CrossEntropyLoss().to(device)

In [8]:
# K-fold training
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    print(f"FOLD {fold}")

    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)

    trainloader = torch.utils.data.DataLoader(
        dataset, batch_size=BATCH_SIZE, sampler=train_subsampler
    )

    testloader = torch.utils.data.DataLoader(
        dataset, batch_size=BATCH_SIZE, sampler=test_subsampler
    )

    # init model?
    model = ResNet1D(
        in_channels=WINDOW_SIZE * 2 + 1,
        base_filters=64,
        kernel_size=16,
        stride=2,
        n_block=48,
        groups=1,  # check this
        n_classes=len(classes),
        downsample_gap=6,
        increasefilter_gap=12,
        verbose=False,
    ).to(device)

    model = model.float()

    # init optimizer
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    for epoch in range(0, EPOCHS):
        print(f"Starting epoch {epoch + 1}")

        current_loss = 0

        for i, data in enumerate(trainloader, 0):
            x = data[0].to(device)
            y = data[1].squeeze().to(device)

            optimizer.zero_grad()

            pred = model(x.float())
            loss = criterion(pred, y)

            loss.backward()
            optimizer.step()

            # print stats
            current_loss += loss.item()
            if i % 500 == 499:
                print("Loss after mini-batch %5d: %.3f" % (i + 1, current_loss / 500))
                current_loss = 0.0

    # completed training, now test
    print(f"Training for fold {fold} has completed, now testing")

    save_path = f"./results/models/fdom/model-fold={fold}.pth"
    torch.save(model.state_dict(), save_path)

    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}
    total, correct = 0, 0

    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            x = data[0].to(device)
            y = data[1].squeeze().to(device)

            outputs = model(x.float())

            _, preds = torch.max(outputs, 1)

            for label, prediction in zip(y, preds):
                # convert label and prediction to current vals
                label = le.inverse_transform([label])[0]
                prediction = le.inverse_transform([prediction])[0]

                if label == prediction:
                    correct_pred[label] += 1  # this may not work
                    correct += 1
                total_pred[label] += 1
                total += 1
        
        # Print accuracy
        print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
        print('--------------------------------')
        results[fold] = 100.0 * (correct / total)

# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {SPLITS} FOLDS')
print('--------------------------------')
sum = 0.0
for key, value in results.items():
    print(f'Fold {key}: {value} %')
    sum += value
print(f'Average: {sum/len(results.items())} %')


FOLD 0
Starting epoch 1
Starting epoch 2
