In [1]:
import numpy as np
np.random.seed(1)
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import class_weight as clw
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

%matplotlib inline

%load_ext autoreload
%autoreload 2

# Data Preprocessing

In [2]:
df=pd.read_pickle("data/train_data.pkl", compression='gzip')
example = df.loc[[0]][1396:1406]

In [3]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,timestamp,raw_acc:magnitude_stats:mean,raw_acc:magnitude_stats:std,raw_acc:magnitude_stats:moment3,raw_acc:magnitude_stats:moment4,raw_acc:magnitude_stats:percentile25,raw_acc:magnitude_stats:percentile50,raw_acc:magnitude_stats:percentile75,raw_acc:magnitude_stats:value_entropy,raw_acc:magnitude_stats:time_entropy,...,discrete:time_of_day:between9and15,discrete:time_of_day:between12and18,discrete:time_of_day:between15and21,discrete:time_of_day:between18and24,discrete:time_of_day:between21and3,label:LYING_DOWN,label:SITTING,label:FIX_walking,label:TALKING,label:OR_standing
ID,Time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,1,1444756418,0.993676,0.003359,0.002244,0.006066,0.992219,0.993594,0.995141,1.831648,6.684606,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
0,2,1444756478,0.993416,0.004462,-0.00426,0.009438,0.99173,0.993517,0.995347,1.475749,6.684602,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
0,3,1444756538,0.993398,0.005047,0.006932,0.015842,0.991777,0.993346,0.99508,0.877374,6.684599,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
0,4,1444756598,0.993372,0.005542,-0.006457,0.018952,0.991942,0.993357,0.994779,0.835945,6.684596,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
0,5,1444756658,0.996412,0.065061,0.135599,0.235594,0.990989,0.993436,0.996219,0.894449,6.682735,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [4]:
# create train, test, val splits with separated timestamps, sensor readings and action labels
df_train = df.loc[list(range(11))+list(range(12,21))]
ts_train, X_train, y_train = df_train.iloc[:,0], df_train.iloc[:,1:226], df_train.iloc[:,226:]
df_test = df.loc[[11]+list(range(21,23))]
ts_test, X_test, y_test = df_test.iloc[:,0], df_test.iloc[:,1:226], df_test.iloc[:,226:]
df_val = df.loc[list(range(23,26))]
ts_val, X_val, y_val = df_val.iloc[:,0], df_val.iloc[:,1:226], df_val.iloc[:,226:]

In [5]:
def stats(arr : np.ndarray):
    print("Shape: ", arr.shape)
    print("DType: ", arr.dtype)
    print("NaNcount: ", np.count_nonzero(np.isnan(arr)))
    
def to_numpy(ts, xx, yy, msk, scaler):
    t = []
    x = []
    y = []
    m = []
    
    for i,_ in ts.groupby(level=0):
        # timestamps
        t.append(ts.loc[i].values)
        # features
        x.append(scaler.transform(xx.loc[i].values))
        # labels
        sample_df = yy.loc[i].loc[:,'label:LYING_DOWN':'label:OR_standing']
        sample_df.interpolate(method='nearest', axis=0, limit_direction='both', inplace=True)
        sample_df.fillna(method='ffill', axis=0, inplace=True)
        sample_df.fillna(method='bfill', axis=0, inplace=True)
        sample_df.fillna(0., axis=0, inplace=True)
        y.append(sample_df.values)
        # masks
        m.append(msk.loc[i].values)
        
    return (t, x, y, m)

In [6]:
# calculate the missing mask
missing_mask_train = X_train.isna().astype(int)
missing_mask_test = X_test.isna().astype(int)
missing_mask_val = X_val.isna().astype(int)

# compute the means and variances of the training set and save for later
scaler = MinMaxScaler((-1,1))
scaler.fit(X_train.values)

# fill missing features with column means
X_train.fillna(X_train.mean(), inplace=True)
X_val.fillna(X_train.mean(), inplace=True)
X_test.fillna(X_train.mean(), inplace=True)

In [29]:
import pickle
with open('mean.pkl', 'wb') as f:
    pickle.dump(X_train.mean(), f)

In [20]:
from sklearn.externals import joblib 
joblib.dump(scaler, 'scaler.pkl') 



['scaler.pkl']

In [22]:
ts_tr, X_tr, y_tr, m_tr = to_numpy(ts_train, X_train, y_train, missing_mask_train, scaler)
ts_va, X_va, y_va, m_va = to_numpy(ts_val, X_val, y_val, missing_mask_val, scaler)
ts_te, X_te, y_te, m_te = to_numpy(ts_test, X_test, y_test, missing_mask_test, scaler)

In [28]:
X_te[0]

array([[-0.71599877, -0.95000505, -0.63030798, -0.89462349,  0.42149384,
        -0.48290535, -0.8231565 , -0.85087855,  0.99665714,  0.26852568,
        -0.99967272, -0.99891594, -0.99911275, -0.98235356, -0.99044966,
        -0.99057786, -0.23234863, -0.05095584, -0.03422228,  1.00332665,
        -0.9841299 , -0.98763938, -0.9343293 ,  0.25314969,  0.75340846,
        -0.22363464, -0.98066595, -0.96655997, -0.78196558, -0.94258484,
        -0.97516858, -0.98009119, -0.99260628, -0.89829374,  0.86795662,
        -0.03364608, -0.008911  , -0.05586188, -0.04890962, -0.08970991,
        -0.78410967, -0.76401906, -0.94072851,  0.07036987,  0.22953433,
         0.27742445, -0.95895354, -0.97432833, -0.9992111 , -0.57235793,
        -0.46893167, -0.04140756, -0.87584145, -0.99477729, -0.1234155 ,
        -0.99431807, -0.87694889, -0.87678137, -0.87376594,  0.24506248,
         0.99980892, -0.029258  , -0.99823022, -0.99816503, -0.99991095,
        -0.99946917, -0.80196392, -1.        , -1. 

In [8]:
def create_examples(timestamps, Xs, ys, masks, seq_len=30):
    X = []
    X_y = []
    Y = []
    Y_x = []
    for user in range(len(Xs)): #, desc='User: '):
        print("Processing User {}".format(user+1))
        ts_user = timestamps[user]
        X_user = Xs[user]
        y_user = ys[user]
        msk_user = masks[user]
        n_train = 0
        for i in range(X_user.shape[0]): #, desc='Batch: '):
            end_ix = i+np.random.choice(np.arange(1,seq_len+1))
#             end_ix = i+seq_len
            if end_ix > Xs[user].shape[0]-1:
                break
            xx = X_user[i:end_ix, :] # input features
            msk = msk_user[i:end_ix, :] # input masks
            msk_comp = np.where(msk==0, 1, -1)
            xy = y_user[i:end_ix] # input labels
            
            # computation for delta_t
            ts = ts_user[i:end_ix]
            S = np.ones_like(msk) * ts.reshape(-1,1)
            delta_t = np.zeros_like(msk) # input delta times
            for t in range(1,delta_t.shape[0]):
                delta_t[t,:] = S[t,:] - S[t-1,:] + msk[t-1,:]*delta_t[t-1,:]
            
            # find the last target within 1 hr from now
            y_ix = end_ix
            last_ts = ts_user[end_ix-1]
            choose_y_ix = []
            while (y_ix < Xs[user].shape[0]) and (ts_user[y_ix]-last_ts <= 3600):
                choose_y_ix.append(y_ix)
                y_ix += 1
                
            if (y_ix == end_ix): # no targets within 1 hr
                continue
#             y = y_user[end_ix:y_ix]
#             yx = np.ones(msk.shape[1])*ts_user[end_ix:y_ix][:,None] - S[-1,:] + msk[-1,:]*delta_t[-1,:]  
            y_ix = np.random.choice(choose_y_ix) # pick a random target within 1 hr
            y = y_user[y_ix] # target label
            yx = np.ones(msk.shape[1])*ts_user[y_ix] - S[-1,:] + msk[-1,:]*delta_t[-1,:] # target delta time
            Y.append(y.astype(np.int64))
            Y_x.append(yx/3600.)
            X.append(np.hstack((xx, msk_comp, delta_t/3600.)))
            X_y.append(xy.astype(np.int64))
                
            n_train += 1
            
        print("Added {} examples from User {}".format(n_train, user+1))

    return (X, X_y, np.array(Y), np.array(Y_x))
        

In [9]:
#Run the data generator
tr_X, tr_X_y, tr_Y, tr_Y_x = create_examples(ts_tr, X_tr, y_tr, m_tr)
# np.savez_compressed('data/train_multi_one.npz', tr_X=tr_X, tr_X_y=tr_X_y, tr_Y=tr_Y, tr_Y_x=tr_Y_x)
val_X, val_X_y, val_Y, val_Y_x = create_examples(ts_va, X_va, y_va, m_va)
# np.savez_compressed('data/val_multi_one.npz', val_X=val_X, val_X_y=val_X_y, val_Y=val_Y, val_Y_x=val_Y_x)
test_X, test_X_y, test_Y, test_Y_x = create_examples(ts_te, X_te, y_te, m_te)
# np.savez_compressed('data/test_multi_one.npz', test_X=test_X, test_X_y=test_X_y, test_Y=test_Y, test_Y_x=test_Y_x)

Processing User 1
Added 1858 examples from User 1
Processing User 2
Added 3917 examples from User 2
Processing User 3
Added 8236 examples from User 3
Processing User 4
Added 1907 examples from User 4
Processing User 5
Added 1909 examples from User 5
Processing User 6
Added 1946 examples from User 6
Processing User 7
Added 241 examples from User 7
Processing User 8
Added 6473 examples from User 8
Processing User 9
Added 1591 examples from User 9
Processing User 10
Added 3952 examples from User 10
Processing User 11
Added 5767 examples from User 11
Processing User 12
Added 4349 examples from User 12
Processing User 13
Added 6286 examples from User 13
Processing User 14
Added 704 examples from User 14
Processing User 15
Added 4994 examples from User 15
Processing User 16
Added 1772 examples from User 16
Processing User 17
Added 2763 examples from User 17
Processing User 18
Added 7479 examples from User 18
Processing User 19
Added 6755 examples from User 19
Processing User 20
Added 3053 ex

In [12]:
# show number of training examples in sequence length between 1 and 30
slens = []
for i,yy in enumerate(tr_X):
    slens.append(yy.shape[0])
slens = np.array(slens)
hist, bin_edges = np.histogram(slens, bins=30)
print(hist)

[2527 2614 2515 2616 2519 2582 2464 2544 2510 2514 2499 2492 2555 2551
 2508 2573 2583 2510 2502 2568 2582 2517 2537 2576 2490 2575 2456 2465
 2460 2582]


In [47]:
print(tr_Y.shape, val_Y.shape, test_Y.shape)

(75986, 5) (14587, 5) (6784, 5)


In [None]:
# To load the dataset from file
# train = np.load('data/train_multi_one.npz')
# val = np.load('data/val_multi_one.npz')
# test = np.load('data/test_multi_one.npz')
# tr_X, tr_X_y, tr_Y, tr_Y_x = train['tr_X'], train['tr_X_y'], train['tr_Y'], train['tr_Y_x']
# val_X, val_X_y, val_Y, val_Y_x = val['val_X'], val['val_X_y'], val['val_Y'], val['val_Y_x']
# test_X, test_X_y, test_Y, test_Y_x = test['test_X'], test['test_X_y'], test['test_Y'], test['test_Y_x']

In [30]:
# Persistence baseline
from sklearn.metrics import f1_score
preds = [xy[-1,:] for xy in val_X_y]
preds = np.array(preds)
f_scores = []
for i in range(5):
    f_scores.append(f1_score(val_Y[:,i], preds[:,i], average='micro'))
print(f_scores)

# fixed-one [0.9581417690249091, 0.8230288890413779, 0.9139504563233377, 0.9490153022713237, 0.9035888286557333]
# multi-one [0.9583876054020704, 0.8259409062864194, 0.9167066566120519, 0.9518064029615411, 0.9021046136971277]

[0.9583876054020704, 0.8259409062864194, 0.9167066566120519, 0.9518064029615411, 0.9021046136971277]


# Models

In [10]:
import sys
sys.path.append("code/")
# import warnings
# warnings.filterwarnings("ignore")
import torch
import time
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from sklearn.metrics import f1_score
torch.manual_seed(1)
torch.set_default_dtype(torch.float64)
dtype = torch.float64
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


### Utility functions for neural nets

In [30]:
def fit(loader_train, loader_val, loader_test, model, model_name, optimizer, epochs=5):

    for e in tqdm(range(epochs), desc='Epochs: '):
        for t, (x, x_y, y_x, y) in enumerate(tqdm(loader_train, desc='Iterations: ', leave=True)):
            model.train()
            
            x = x.to(device=device, dtype=dtype)
            x_y = x_y.to(device=device, dtype=torch.long)
            y_x = y_x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=dtype)

            preds = model(x, x_y, y_x)
            loss = F.binary_cross_entropy(preds, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        val_fscore = f_score(loader_val, model)
        print('Epoch {0}, loss = {1:.4f}, avg_val_fscore = {2:.4f}'.format(e, loss.item(), val_fscore))
    final_test_fscore = f_score(loader_test, model)
    print('Final test fscore: {0:.4f}'.format(final_test_fscore))
    model = model.to(device=torch.device('cpu'))
    torch.save(model.state_dict(), 'code/best_model_{}.pth'.format(model_name))
    return final_test_fscore

def f_score(loader_val, model, average='micro'):

    model.eval()  # set model to evaluation mode
    true = []
    pred = []
    with torch.no_grad():
        for x, x_y, y_x, y in loader_val:
            x = x.to(device=device, dtype=dtype)
            x_y = x_y.to(device=device, dtype=torch.long)
            y_x = y_x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)
            preds = model(x, x_y, y_x)
            preds = preds > 0.5
            true.append(y.cpu().numpy())
            pred.append(preds.cpu().numpy())
    
    true = np.concatenate(true, axis=0)
    pred = np.concatenate(pred, axis=0)

    f_scores = []
    for i in range(5):
        f_scores.append(f1_score(true[:,i], pred[:,i], average=average))
    print('Per class f1_score: ', f_scores)
    return (np.mean(f_scores))

### Custom sampler and dataloader for variable length sequences

In [13]:
from torch.utils.data import Sampler, Dataset
from collections import OrderedDict
import random
random.seed(1)
# train_data = TensorDataset(torch.from_numpy(tr_X[:,:,:]).float(),
#                           torch.from_numpy(tr_X_y[:,:,:]).long(),
#                           torch.from_numpy(tr_Y_x).float(),
#                           torch.from_numpy(tr_Y).float())

# val_data = TensorDataset(torch.from_numpy(val_X[:,:,:]).float(),
#                           torch.from_numpy(val_X_y[:,:,:]).long(),
#                           torch.from_numpy(val_Y_x).float(),
#                           torch.from_numpy(val_Y).long())


# loader_train = DataLoader(train_data, batch_size=64, shuffle=True, num_workers=4)
# loader_val = DataLoader(val_data, batch_size=64, shuffle=False, num_workers=4)
class ActivityDataset(Dataset):
    """Custom dataset for variable length inputs"""
    def __init__(self, X, X_y, Y_x, Y):
        self.X = X # (75k, 1-30, 675)
        self.X_y = X_y # (75k, 1-30, 5)
        self.Y_x = Y_x # (75k, 675)
        self.Y = Y # (75k, 5)

    def __len__(self):
        return len(self.Y)

    def __getitem__(self, index):
        return (torch.from_numpy(self.X[index]).double(), 
                torch.from_numpy(self.X_y[index]).long(), 
                torch.from_numpy(self.Y_x[index]).double(), 
                torch.from_numpy(self.Y[index]).long())


class BucketBatchSampler(Sampler):
    """Custom sampler to batch equal length inputs"""
    def __init__(self, X_y, train=True, batch_size=64):
        self.batch_size = batch_size
        self.train = train
        ind_n_len = []
        for i, xy in enumerate(X_y):
            ind_n_len.append((i, xy.shape[0]))
        self.ind_n_len = ind_n_len
        self.batch_list = self._generate_batch_map()
        self.num_batches = len(self.batch_list)

    def _generate_batch_map(self):
        # shuffle all of the indices first so they are put into buckets differently
        if self.train:
            random.shuffle(self.ind_n_len)
        # Organize lengths, e.g., batch_map[10] = [30, 124, 203, ...] <= indices of sequences of length 10
        batch_map = OrderedDict()
        for idx, length in self.ind_n_len:
            if length not in batch_map:
                batch_map[length] = [idx]
            else:
                batch_map[length].append(idx)
        # Use batch_map to split indices into batches of equal size
        # e.g., for batch_size=3, batch_list = [[23,45,47], [49,50,62], [63,65,66], ...]
        batch_list = []
        for length, indices in batch_map.items():
            for group in [indices[i:(i + self.batch_size)] for i in range(0, len(indices), self.batch_size)]:
                batch_list.append(group)
        return batch_list

    def batch_count(self):
        return self.num_batches

    def __len__(self):
        return len(self.ind_n_len)

    def __iter__(self):
        self.batch_list = self._generate_batch_map()
        # shuffle all the batches so they arent ordered by bucket size
        if self.train:
            random.shuffle(self.batch_list)
        for ix in self.batch_list:
            yield ix
            

train_batch_sampler = BucketBatchSampler(tr_X_y, train=True, batch_size=64)
train_dataset = ActivityDataset(tr_X, tr_X_y, tr_Y_x, tr_Y)
loader_train = DataLoader(train_dataset, batch_size=1, batch_sampler=train_batch_sampler, shuffle=False, num_workers=4, drop_last=False)

val_batch_sampler = BucketBatchSampler(val_X_y, train=False, batch_size=64)
val_dataset = ActivityDataset(val_X, val_X_y, val_Y_x, val_Y)
loader_val = DataLoader(val_dataset, batch_size=1, batch_sampler=val_batch_sampler, shuffle=False, num_workers=4, drop_last=False)

test_batch_sampler = BucketBatchSampler(test_X_y, train=False, batch_size=64)
test_dataset = ActivityDataset(test_X, test_X_y, test_Y_x, test_Y)
loader_test = DataLoader(test_dataset, batch_size=1, batch_sampler=test_batch_sampler, shuffle=False, num_workers=4, drop_last=False)



## Logistic Regression

In [14]:
from sklearn.linear_model import LogisticRegression
# data_preprocess
xx_tr = []
xx_val = []
xx_test = []
for t, (x, x_y, y_x, y) in enumerate(loader_train):
    xx_tr.append(x.mean(dim=1).numpy())
for t, (x, x_y, y_x, y) in enumerate(loader_val):
    xx_val.append(x.mean(dim=1).numpy())
for t, (x, x_y, y_x, y) in enumerate(loader_test):
    xx_test.append(x.mean(dim=1).numpy())
xx_tr = np.concatenate((np.concatenate(xx_tr, axis=0), tr_Y_x), axis=1)
xx_val = np.concatenate((np.concatenate(xx_val, axis=0), val_Y_x), axis=1)
xx_test = np.concatenate((np.concatenate(xx_test, axis=0), test_Y_x), axis=1)

In [17]:
# lr1 = LogisticRegression('l1', random_state=1, verbose=1, solver='saga', n_jobs=8)
# lr1.fit(xx_tr, tr_Y[:,0])
lr2 = LogisticRegression('l1', random_state=1, verbose=1, solver='saga', n_jobs=8)
lr2.fit(xx_tr, tr_Y[:,1])
lr3 = LogisticRegression('l1', random_state=1, verbose=1, solver='saga', n_jobs=8)
lr3.fit(xx_tr, tr_Y[:,2])
lr4 = LogisticRegression('l1', random_state=1, verbose=1, solver='saga', n_jobs=8)
lr4.fit(xx_tr, tr_Y[:,3])
lr5 = LogisticRegression('l1', random_state=1, verbose=1, solver='saga', n_jobs=8)
lr5.fit(xx_tr, tr_Y[:,4])

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.


max_iter reached after 141 seconds


[Parallel(n_jobs=8)]: Done   1 out of   1 | elapsed:  2.4min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.


max_iter reached after 139 seconds


[Parallel(n_jobs=8)]: Done   1 out of   1 | elapsed:  2.3min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.


max_iter reached after 141 seconds


[Parallel(n_jobs=8)]: Done   1 out of   1 | elapsed:  2.4min finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.


max_iter reached after 138 seconds


[Parallel(n_jobs=8)]: Done   1 out of   1 | elapsed:  2.3min finished


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=8, penalty='l1', random_state=1,
                   solver='saga', tol=0.0001, verbose=1, warm_start=False)

In [18]:
y_pred1 = lr1.predict(xx_val)
y_pred2 = lr2.predict(xx_val)
y_pred3 = lr3.predict(xx_val)
y_pred4 = lr4.predict(xx_val)
y_pred5 = lr5.predict(xx_val)
fscore = [f1_score(val_Y[:,0], y_pred1, average='micro'),
          f1_score(val_Y[:,1], y_pred2, average='micro'),
          f1_score(val_Y[:,2], y_pred3, average='micro'),
          f1_score(val_Y[:,3], y_pred4, average='micro'),
          f1_score(val_Y[:,4], y_pred5, average='micro')]
print(fscore)
print(np.mean(fscore))

[0.6992460589444826, 0.5190541466758053, 0.9315284441398218, 0.921932830705963, 0.8919808087731322]
0.792748457847841


(75986, 225)

## Deep Averaging Network

In [45]:
from DAN import DAN
model = DAN(d_emb=32, hidden_sizes=[512, 128], dropout=0.25)
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
model = model.to(device=device)
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.0001)
test_fscore = fit(loader_train, loader_val, loader_test, model, 'dan', optimizer, epochs=5)

HBox(children=(FloatProgress(value=0.0, description='Epochs: ', max=5.0, style=ProgressStyle(description_width…

HBox(children=(FloatProgress(value=0.0, description='Iterations: ', max=75986.0, style=ProgressStyle(descripti…


Per class f1_score:  [0.9517378487694522, 0.8036607938575444, 0.9290464111880441, 0.9446767669843011, 0.9085487077534792]
Epoch 0, loss = 0.3036, avg_val_fscore = 0.9075


HBox(children=(FloatProgress(value=0.0, description='Iterations: ', max=75986.0, style=ProgressStyle(descripti…


Per class f1_score:  [0.9544800164530061, 0.7955713991910605, 0.9302803866456434, 0.945156646328923, 0.908617261945568]
Epoch 1, loss = 0.2218, avg_val_fscore = 0.9068


HBox(children=(FloatProgress(value=0.0, description='Iterations: ', max=75986.0, style=ProgressStyle(descripti…


Per class f1_score:  [0.9542057996846507, 0.8057174196202098, 0.9307602659902653, 0.9448138753684788, 0.9088229245218346]
Epoch 2, loss = 0.1617, avg_val_fscore = 0.9089


HBox(children=(FloatProgress(value=0.0, description='Iterations: ', max=75986.0, style=ProgressStyle(descripti…


Per class f1_score:  [0.9537944745321176, 0.8027010351683005, 0.9313772537190648, 0.9425515870295469, 0.9100568999794337]
Epoch 3, loss = 0.2298, avg_val_fscore = 0.9081


HBox(children=(FloatProgress(value=0.0, description='Iterations: ', max=75986.0, style=ProgressStyle(descripti…


Per class f1_score:  [0.9544114622609172, 0.8040035648179886, 0.9319256872557757, 0.9417974909165695, 0.9105367793240556]
Epoch 4, loss = 0.2213, avg_val_fscore = 0.9085

Per class f1_score:  [0.9460495283018868, 0.8394752358490566, 0.9122936320754718, 0.8621757075471698, 0.9246757075471698]
Final test fscore: 0.8969


## Gated Recurrent Unit

In [31]:
from GRU import APM

model = APM(d_emb=32, d_hid=128, n_layers=2, dropout=0.25)
model = model.to(device=device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
test_fscore = fit(loader_train, loader_val, loader_val, model, 'gru', optimizer, epochs=3)

# [0.9282234268853359, 0.7537226377547518, 0.927743086529884, 0.9264393055650861, 0.8911685994647636]
# multi-one [0.9583876054020704, 0.8259409062864194, 0.9167066566120519, 0.9518064029615411, 0.9021046136971277]

HBox(children=(FloatProgress(value=0.0, description='Epochs: ', max=3.0, style=ProgressStyle(description_width…

HBox(children=(FloatProgress(value=0.0, description='Iterations: ', max=75952.0, style=ProgressStyle(descripti…


Per class f1_score:  [0.9592186429061, 0.8267306374228925, 0.9383139136394791, 0.9481151473612063, 0.9134338588074024]
Epoch 0, loss = 0.3244, avg_val_fscore = 0.9172


HBox(children=(FloatProgress(value=0.0, description='Iterations: ', max=75952.0, style=ProgressStyle(descripti…


Per class f1_score:  [0.9552433173406443, 0.8267991775188486, 0.9332419465387252, 0.9462645647703907, 0.9110349554489376]
Epoch 1, loss = 0.1792, avg_val_fscore = 0.9145


HBox(children=(FloatProgress(value=0.0, description='Iterations: ', max=75952.0, style=ProgressStyle(descripti…


Per class f1_score:  [0.9538039753255655, 0.8228238519533927, 0.9368060315284441, 0.9465387251542152, 0.9123372172721042]
Epoch 2, loss = 0.2844, avg_val_fscore = 0.9145

Per class f1_score:  [0.9538039753255655, 0.8228238519533927, 0.9368060315284441, 0.9465387251542152, 0.9123372172721042]
Final test fscore: 0.9145
