In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader

import time 
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split 
import scipy.io as sio
import copy
import pickle
import numpy as np
from scipy.signal import find_peaks
from os.path import dirname, join as pjoin
import datetime
import csv
import math
import sys
sys.path.insert(1,'../')
import Tools.data_processing as dp
import Tools.data_movement as dm 
from auxiliary_functions import get_candidates, detect_flat_plat, outdated_detect_stage_rises

fDOM_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/fDOM_raw_10.1.2011-9.4.2020.csv')
stage_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/stage_10.1.11-1.1.19.csv')
turb_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/turbidity_raw_10.1.2011_9.4.2020.csv')
stage_data = dp.align_stage_to_fDOM(fDOM_data, stage_data)


# same approach but with stage: give window before and after single stage point with label, only takes one timeseries... 
# so can be flattened input 

In [2]:
def create_turbPP_dataset(fDOM_data, stage_data, turb_data, window_size = 15):
    
    # Get turb candidate peaks - these were the values used to get the set of turb candidate peaks
    turb_cand_params = {'prom' : [6,None],
                    'width': [None, None],
                    'wlen' : 200,
                    'dist' : 1,
                    'rel_h': .6}
    
    turb_peaks, turb_props = get_candidates(turb_data, turb_cand_params)
    turb_peaks, turb_props = dp.delete_missing_data_peaks(turb_data, turb_peaks, turb_props, '/Users/zachfogg/Desktop/DB-SRRW/Data/misc/flat_plat_ranges.txt')

    # Import ground truths for these peaks
    gt_fname_t = '/Users/zachfogg/Desktop/DB-SRRW/Data/manual_annotating_data/processed_data/turb/julian_time/turb_pp_0k-300k-2_labeled'
    with open(gt_fname_t, 'r', newline = '') as f:
        reader = csv.reader(f, delimiter = ',')
        # gt entries in form: ['timestamp_of_peak', 'value_of_peak','label_of_peak','idx_of_peak']
        next(reader)
        ground_truth = [0 if row[2] == 'NPP' else 1 for row in reader] 
        f.close()  

    # Reshape data 
    fDOM_data = fDOM_data[:,1].reshape(-1,1)
    stage_data = stage_data[:,1].reshape(-1,1)
    turb_data = turb_data[:,1].reshape(-1,1)
                                         
        
    # Use Robust scaler to scale data
    fDOM_scaler = RobustScaler().fit(fDOM_data)
    fDOM_data_scaled = fDOM_scaler.transform(fDOM_data)
    
    turb_scaler = RobustScaler().fit(turb_data)
    turb_data_scaled = turb_scaler.transform(turb_data)
    
    stage_scaler = RobustScaler().fit(stage_data)
    stage_data_scaled = stage_scaler.transform(stage_data)
    
    # Created "sequenced" data, where middle point is the peak --> sequence is (window_size * 2) + 1
    X = []
    y = []
    
    for i,peak in enumerate(turb_peaks):
        if peak - window_size > 0 and peak + window_size < len(fDOM_data): 
            sample = np.vstack((fDOM_data_scaled[peak - window_size:peak + window_size + 1].T,
                                stage_data_scaled[peak - window_size:peak + window_size + 1].T,
                                turb_data_scaled[peak - window_size:peak + window_size + 1].T)).T
            y.append(ground_truth[i])
            X.append(sample)
    
    return X, y
    # Create dataset where X is shape (num_candidates, window_size+1, 3) and Y is shape (num_candidates, 1)
    # Each X samples is a window around a candidate peak for each turb, fDOM, stage, where the center point is the peak -> (window_size+1, 3)
    # Each Y sample is 0 if peak is not PP and 1 if peak is PP 
batch_size = 8 

X,y = create_turbPP_dataset(fDOM_data, stage_data, turb_data, window_size = 30)
X = torch.Tensor(X)
y = torch.Tensor(y)
y = y.type(torch.LongTensor)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 1)

# train_dataloader = DataLoader(TensorDataset(torch.Tensor(X_train),torch.Tensor(y_train)), batch_size = batch_size)
# test_dataloader = DataLoader(TensorDataset(torch.Tensor(X_test),torch.Tensor(y_test)), batch_size = batch_size)
train_dataloader = DataLoader(TensorDataset(X_train,y_train), batch_size = batch_size)
test_dataloader = DataLoader(TensorDataset(X_test,y_test), batch_size = batch_size)

for X, y in train_dataloader: 
    print(X.shape)
    break

torch.Size([8, 61, 3])


In [6]:
x = 50
y = 75
# print(stage_data[x:y,1])
stage_data1 = stage_data[:,1].reshape(-1,1)
stage_scaler = RobustScaler().fit(stage_data1)
stage_data_scaled = stage_scaler.transform(stage_data1)
# print(stage_data_scaled[x:y])
print(max(stage_data[:,1]))
print(max(stage_data_scaled))

turb_data1 = turb_data[:,1].reshape(-1,1)
turb_scaler = RobustScaler().fit(turb_data1)
turb_data_scaled = turb_scaler.transform(turb_data1)
# print(stage_data_scaled[x:y])
print(max(turb_data[:,1]))
print(max(turb_data_scaled))


1.566
[8.79020267]
10428.23
[8839.98657214]


In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"

class FCN(nn.Module):
    """
    Convolutional Neural Network that is comprised of three convolutional layers
    No dimensionality reduction until the pooling layer
    Softmax ouput for binary output 
    """
    def __init__(self, num_classes, features):
        super(FCN, self).__init__()
        in_channels = 30 * 2 + 1
        self.conv1 = nn.Conv1d(in_channels = in_channels, out_channels = 128, kernel_size = 8, padding = "same")
        self.batchNorm1 = nn.BatchNorm1d(128)
        self.relu1 = nn.ReLU()
        
        self.conv2 = nn.Conv1d(in_channels = 128 , out_channels = 256 , kernel_size = 5, padding = "same")
        self.batchNorm2 = nn.BatchNorm1d(256)
        self.relu2 = nn.ReLU()
        
        self.conv3 = nn.Conv1d(in_channels = 256, out_channels = 128, kernel_size = 3, padding = "same")
        self.batchNorm3 = nn.BatchNorm1d(128)
        self.relu3 = nn.ReLU()
        
        self.pool = nn.AvgPool1d(kernel_size = 3)
        
        self.fc = nn.Linear(in_features = 128, out_features = num_classes)
        self.softmax = nn.Softmax(dim = 1)
        
        self.flatten = nn.Flatten()
        self.fc2 = nn.Linear(in_features = 128, out_features = 128)

    def forward(self, x):
        x = self.conv1(x)
#         print("C1 ", x.shape)
        x = self.batchNorm1(x)
        x = self.relu1(x)
        
        x = self.conv2(x)
#         print("C2 ", x.shape)        
        x = self.batchNorm2(x)
        x = self.relu2(x)
        
        x = self.conv3(x)
#         print("C3 ", x.shape)  
        x = self.batchNorm3(x)
        x = self.relu3(x)

        x = self.pool(x)
#         print("P ", x.shape)
        x = self.flatten(x)
#         print("F ", x.shape)
        x = self.fc2(x)
        x = self.fc(x)
#         print(x.shape)
#         print(x)
        logits = self.softmax(x)
#         print(logits.shape)
#         print(logits)
        return logits
    
# class FCN(nn.Module):
#     def __init__(self, window_size, features):
#         super(FCN, self).__init__()
# #         self.conv1 = nn.Conv1d(in_channels = )
#         self.flatten = nn.Flatten()
#         self.linear_relu_stack = nn.Sequential(
#             nn.Linear(window_size * features, 256),
#             nn.ReLU(),
#             nn.Linear(256, 128),
#             nn.ReLU(),
#             nn.Linear(128,1)
#         )
#     def forward(self, x):
#         x = self.flatten(x)
#         logits = self.linear_relu_stack(x)
#         return logits

def train(dataLoader, model, loss_fn, optimizer):
    size = len(dataLoader.dataset)
    model.train()
    for batch, (X,y) in enumerate(dataLoader):
        X, y = X.to(device), y.to(device)
        
        # Compute prediction error 
        pred = model(X)
#         loss = loss_fn(pred,y.unsqueeze(1))

        loss = loss_fn(pred,y)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
#         if batch % 10 == 0:
#             loss, current = loss.item(), batch * len(X)
#             print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
def test(dataLoader, model, loss_fn):
    size = len(dataLoader.dataset)
    num_batches = len(dataLoader)
    model.eval()
    test_loss, correct = 0,0
    
    with torch.no_grad():
        for X,y in dataLoader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred,y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
        test_loss /= num_batches 
        correct /= size 
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [12]:
num_classes = 2
num_channels = 3
model = FCN(num_classes,num_channels).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

epochs = 50
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done")

Epoch 1
-------------------------------
Test Error: 
 Accuracy: 64.8%, Avg loss: 0.623195 

Epoch 2
-------------------------------
Test Error: 
 Accuracy: 67.6%, Avg loss: 0.595769 

Epoch 3
-------------------------------
Test Error: 
 Accuracy: 63.3%, Avg loss: 0.659419 

Epoch 4
-------------------------------
Test Error: 
 Accuracy: 61.0%, Avg loss: 0.676276 

Epoch 5
-------------------------------
Test Error: 
 Accuracy: 63.8%, Avg loss: 0.636171 

Epoch 6
-------------------------------
Test Error: 
 Accuracy: 62.4%, Avg loss: 0.659903 

Epoch 7
-------------------------------
Test Error: 
 Accuracy: 71.9%, Avg loss: 0.569327 

Epoch 8
-------------------------------
Test Error: 
 Accuracy: 67.1%, Avg loss: 0.614442 

Epoch 9
-------------------------------
Test Error: 
 Accuracy: 68.6%, Avg loss: 0.602227 

Epoch 10
-------------------------------
Test Error: 
 Accuracy: 63.3%, Avg loss: 0.660541 

Epoch 11
-------------------------------
Test Error: 
 Accuracy: 66.2%, Avg los