# Load packages

In [1]:
import sys
from pathlib import Path

PARENT_DIR = Path.cwd().parent.parent
sys.path.append(str(PARENT_DIR))

import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score

from lib.models import TrainConfig, NeuralNetwork, save_model
from lib.data_processing import load_data, split_data, encode_data, VaderSentimentEncoder

from tqdm import tqdm

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print("Device: cuda")
        print(torch.cuda.get_device_name(i))
else:
    print("Device: cpu")

Device: cuda
NVIDIA GeForce RTX 3050 Ti Laptop GPU


# Load data

In [2]:
data = load_data(file_path_list=[str(PARENT_DIR) + "/data/power/power-gb-train.tsv"])
train_raw, test_raw = split_data(data, test_size=0.2, random_state=0)

# Set training configurations

In [3]:
train_config = TrainConfig(num_epochs=10,early_stop=False,violation_limit=5)

# Experiment

In [4]:
dims = [4]
models_dir = Path(str(PARENT_DIR) + '/models/feature_engineering')
if not models_dir.exists():
    models_dir.mkdir(parents=True, exist_ok=True)
    
for i in range(len(dims)):
    print(f"Experiment {i+1}: VADER with {dims[i]}-dimentional features")
    
    # Encode
    print("Prepare data...")
    vader_encoder = VaderSentimentEncoder(dimensions=dims[i])
    
    train_data_nn = encode_data(train_raw, vader_encoder)
    test_data_nn = encode_data(test_raw, vader_encoder)
    
    dataloader = DataLoader(train_data_nn, batch_size=128, shuffle=True)
    
    # Train
    print("Train model...")
    model_nn = NeuralNetwork(input_size=train_data_nn[0][0].shape[0], hidden_size=128, device="cuda")
    model_nn.fit(dataloader, train_config, disable_progress_bar=False)
    
    save_model(model_nn, models_dir, f"model_vadersv_dim_{dims[i]}.pt")
    
    # Test
    with torch.no_grad():
        X_test = torch.stack([test[0] for test in test_data_nn]).to(model_nn.device)
        y_test = torch.stack([test[1] for test in test_data_nn]).to(model_nn.device)
        y_pred = model_nn.predict(X_test)
    
    # Evaluate
    y_test = y_test.cpu()
    y_pred = y_pred.cpu()
    print("Evaluation results")
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")
    
    precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1: {fscore:.4f}")
    
    auc = roc_auc_score(y_test, y_pred)
    print(f"AUC: {auc:.4f}")
    print()

Experiment 1: VADER with 4-dimentional features
Prepare data...
Train model...



Epoch 1: 100%|██████████| 209/209 [00:01<00:00, 160.67batch/s, batch_accuracy=0.714, loss=0.512]
Epoch 2: 100%|██████████| 209/209 [00:00<00:00, 236.89batch/s, batch_accuracy=0.857, loss=0.541]
Epoch 3: 100%|██████████| 209/209 [00:01<00:00, 161.96batch/s, batch_accuracy=0.857, loss=0.541]
Epoch 4: 100%|██████████| 209/209 [00:01<00:00, 178.70batch/s, batch_accuracy=0.714, loss=0.628]
Epoch 5: 100%|██████████| 209/209 [00:01<00:00, 183.90batch/s, batch_accuracy=0.429, loss=0.808]
Epoch 6: 100%|██████████| 209/209 [00:00<00:00, 220.43batch/s, batch_accuracy=0.857, loss=0.429]
Epoch 7: 100%|██████████| 209/209 [00:00<00:00, 216.83batch/s, batch_accuracy=1, loss=0.496]    
Epoch 8: 100%|██████████| 209/209 [00:01<00:00, 196.66batch/s, batch_accuracy=0.571, loss=0.713]
Epoch 9: 100%|██████████| 209/209 [00:01<00:00, 174.01batch/s, batch_accuracy=0.714, loss=0.704]
Epoch 10: 100%|██████████| 209/209 [00:01<00:00, 196.78batch/s, batch_accuracy=0.714, loss=0.557]


Evaluation results
Accuracy: 0.6238
Precision: 0.6385
Recall: 0.7525
F1: 0.6908
AUC: 0.6067

