# BIGRU model met embeddings
## met embedding pre-training

In [1]:
from torch import optim
from data.dataloader import DatasetLoader, PretrainLoader
from torch.utils.data import DataLoader
import torch
from models.BiGRU import BiGRU
from models.PreTrainEmbedding import PreTrainEmbedding
from torch import nn
from tqdm import tqdm
import pandas as pd

## 1. Dataset uitlezen

In [2]:
removeapps = ["Screen on (locked)",
              "Screen off (locked)",
              "Screen on (unlocked)",
              "Screen off",
              "Samsung Experience Service",
              "Package installer",
              "System UI",
              "Customisation Service",
              "Configuration update",
              "EmergencyManagerService",
              "DeviceKeystring",
              "Samsung Keyboard",
              "HwModuleTest",
              "Device shutdown",
              "Device boot"]
pretraindataset = PretrainLoader("eventlog/phone_usage_cleaned.csv",
                                 nr_samples=2000, d=900, T=1200,
                                 removeapps=removeapps,
                                 nr_generated=100000)
pretrain_dataloader = DataLoader(pretraindataset, batch_size=128)

dataset = DatasetLoader("eventlog/phone_usage_cleaned.csv",
                        seq_length=10,
                        max_apps=200)
dataset.clean(removeapps=removeapps)
train_dataloader = DataLoader(dataset)

## 2. Embedding pre-train

In [3]:
# TODO: heeft niet dezeflde accuracy als in tensorflow
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pretrainembedding = PreTrainEmbedding(max_apps=pretraindataset.nr_apps,
                                      src_dim=4)
pretrainembedding.to(device=device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(pretrainembedding.parameters(), lr=0.0015)
epochs = 100
running_loss = 0.0
total = 0
correct = 0
for j in range(epochs):
    for prev_app_index, next_app_index in pretrain_dataloader:
        prev_app_index, next_app_index = prev_app_index.to(device=device), next_app_index.to(device=device)

        optimizer.zero_grad()
        outputs = pretrainembedding(prev_app_index)
        loss = criterion(outputs[0], next_app_index)
        loss.backward()
        optimizer.step()

        total += 128
        arg_maxs = torch.argmax(outputs, dim=2)  # collapse cols
        num_correct = torch.sum(next_app_index==arg_maxs).item()
        correct += num_correct

        running_loss += loss.item()
    if j%20==0:
        print('loss: %.3f' %
                  (running_loss / 2000))
        running_loss = 0.0
        print('Accuracy of the network is: ' + str(100 * correct / total))
        total = 0
        correct = 0

loss: 1.486
Accuracy of the network is: 7.696611253196931
loss: 22.544
Accuracy of the network is: 14.61616847826087
loss: 21.455
Accuracy of the network is: 15.40086516943734
loss: 21.249
Accuracy of the network is: 15.638986572890026
loss: 21.135
Accuracy of the network is: 15.769511269181585


## 3. bigru model aanmaken en trainen

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
bigru = BiGRU(max_apps=200,
              seq_length=10,
              n_gru=64)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(bigru.parameters(), lr=0.0015)
bigru.to(device=device)
bigru.src_embedding.weight=torch.nn.Parameter(torch.cat((pretrainembedding.src_embedding.weight,
                                      bigru.src_embedding.weight[pretraindataset.nr_apps:]),
                                     dim=0))

In [8]:
running_loss = 0.0
running_accuracy_list = []
i = 0
total = 0
correct = 0
epochs = 4
for prev_apps_indices, curr_app_index in tqdm(train_dataloader):
    prev_apps_indices, curr_app_index = prev_apps_indices.to(device), curr_app_index.to(device)

    for j in range(epochs):
        optimizer.zero_grad()

        outputs = bigru(prev_apps_indices[0])

        loss = criterion(outputs, curr_app_index)
        loss.backward()
        optimizer.step()

        total += 1
        _, predicted = torch.topk(outputs.data, 5)
        correct += (curr_app_index in predicted)*1

        running_loss += loss.item()

    if i % 2000 == 1999:
        print('loss: %.3f' %
              (running_loss / 2000))
        running_loss = 0.0
        print('Accuracy of the network is:' + str(100 * correct / total))
        running_accuracy_list.append(100 * correct / total)
        total = 0
        correct = 0
    i += 1

100%|██████████| 33627/33627 [04:47<00:00, 117.03it/s]


loss: 10.741
Accuracy of the network is:71.4875
loss: 11.859
Accuracy of the network is:64.2625
loss: 11.772
Accuracy of the network is:64.3
loss: 11.783
Accuracy of the network is:63.525
loss: 10.302
Accuracy of the network is:74.3
loss: 11.338
Accuracy of the network is:71.7
loss: 11.188
Accuracy of the network is:68.8625
loss: 11.363
Accuracy of the network is:72.3375
loss: 10.610
Accuracy of the network is:76.375
loss: 11.949
Accuracy of the network is:72.425
loss: 11.640
Accuracy of the network is:73.075
loss: 12.703
Accuracy of the network is:68.7375
loss: 12.989
Accuracy of the network is:64.6125
loss: 11.088
Accuracy of the network is:71.9375
loss: 9.902
Accuracy of the network is:77.525
loss: 10.998
Accuracy of the network is:71.975


## 3. Running accuracy opslaan

In [9]:
AccuracyComparison = pd.read_csv("outputdata/accuracycomparison.csv", index_col=False)
AccuracyComparison["OnehotPretrain"] = running_accuracy_list
AccuracyComparison.to_csv("outputdata/accuracycomparison.csv", index=False)
