# Problem statement and data generation

For each class, there is one pattern occurring in the data. Solving just a classification task be scored no higher than 70%. 

Please provide either pdf with the report or add some notes in the notebook. Demonstrate some examples of how your approach works.

A short reminder of your task:
 - Correct classification of provided time series
 - Explanation of the prediction

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
np.random.seed(42)

In [None]:
n = 50000

In [None]:
def createRow(n, classes):
    base = np.sin(np.linspace((np.random.rand(3)),(np.random.rand(3) + np.array([10,15,7])),n))
    if classes[0] > 0:
        base[np.random.randint(0,n), 0] += 2
    if classes[1] > 0:
        base[np.random.randint(0,n), 1] -= 2
    if classes[2] > 0:
        x = np.random.randint(0,n-5)
        base[x:x+4,2] = 0
    if classes[3] > 0:
        x = np.random.randint(0,n-10)
        base[x:x+8,1] += 1.5
    if classes[4] > 0:
        x = np.random.randint(0,n-7)
        base[x:x+6,0] += 1.5
        base[x:x+6,2] -= 1.5
    base += np.random.rand(*base.shape)*.2
    return base

In [None]:
xl, yl = [], []
for _ in range(n):
    cl = np.random.rand(5)<.25
    xl.append(createRow(np.random.randint(40,60), cl))
    yl.append(cl)
    

In [None]:
#for i in range(3):
#    plt.plot(xl[i])
#    print(yl[i])
#    plt.show()

---

In [None]:
from typing import List

import torch
import torch.nn as nn

from src.data import *
from src.metrics import *
from src.model import *

In [None]:
print(f"Is cuda available:\t{str(torch.cuda.is_available()).lower()}")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Selected device:\t{device}")

# Dataset and dataloaders

| Set   |   %   |
|---    |---    |
| Train |   60  |
| Eval  |   20  |
| Test  |   20  | 

In [None]:
dataset = Dataset(xl, yl)
train, eval = dataset_split(dataset, train_size=0.6)
eval, test = dataset_split(eval, train_size=0.5)

In [None]:
train_dataloader = torch.utils.data.DataLoader(dataset=train, batch_size=1)
eval_dataloader = torch.utils.data.DataLoader(dataset=eval, batch_size=1)
test_dataloader = torch.utils.data.DataLoader(dataset=test, batch_size=1)

# Model

In [None]:
model = Encoder()
_ = model.to(device)

In [None]:
epochs = 10
criterion = nn.BCELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

In [None]:
eval_f1 = []
for e in range(epochs):
    model.train()
    ls = []
    for i, (x, y) in enumerate(train_dataloader):
        optimizer.zero_grad()
        x = x.to(device)
        y = y.to(device)
        y_hat = model(x)
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()
        ls.append(loss)
    print(f"Epoch: {e}")
    print(f"[train] Binary Cross Entropy: {sum(ls) / len(ls):.4f}")
    
    model.eval()
    serialize(model, e)
    with torch.no_grad():
        y_true, y_pred = [], []
        for i, (x, y) in enumerate(eval_dataloader):
            x = x.to(device)
            y_hat = model(x)
            y_hat = y_hat.cpu()                
            y_true.append(y)
            y_pred.append(y_hat)
        
        y_true = torch.cat(y_true, dim=0)
        y_pred = torch.cat(y_pred, dim=0)
        (loss, acc, prec, rec, f1) = compute_metrics(y_true, y_pred)
        eval_f1.append(f1)
        
        print(f"[eval] Binary Cross Entropy: {loss:.4f}, Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}")        
        if e >= 4 and all(np.array(eval_f1[-4:-1]) >= eval_f1[-1]):
            print(f"Early stopping")
            break

In [None]:
for i, (x, y) in enumerate(test_dataloader):
    x = x.to(device)
    y = y.to(device)
    if i == 10:
        break
    y_hat = model(x)
    print(y.cpu().numpy(), (y_hat > 0.5).cpu().int())