In [None]:
"""
The Deam dataset is based on Arousal-Valence 2D emotional model.
The Valence/Arousal ratings were collected using Amazon Mechanical Turks service.
Each turk from the collected crowd were asked to mark his own emotion for the current song on a 2D plane, Arousal/Valence.
For more information please read: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0173392
"""

import torch
import pandas
from torch.optim import SGD
from torch.nn import MSELoss
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from config import DATA_DIR, MODELS_DIR
from src.runner import Runner
from src.utils import (
    create_dataset,
    DataConverter,
)
from src.data_set import RatingsDataset
from src.model import MF

# model = "mean_centralised"
# model = "standardized"
model = "raw"

include_bias = model == "raw"

DF_PATH = f"{DATA_DIR}" \
          f"/DEAM/annotations/annotations per each rater/" \
          f"song_level/static_annotations_songs_1_2000_{model}.csv"

In [None]:
def select_n_random(trainset: RatingsDataset):
    """
    Selects n random data points and their corresponding labels from a dataset
    """
    perm = torch.randperm(len(trainset))
    return trainset[perm][:100]

In [None]:
"""
This block of code calculates the outliers alongside the valence axis
"""

columns = ["workerID", "SongId", "Valence"]
original_df = pandas.read_csv(DF_PATH, skipinitialspace=True, usecols=columns)
original_df.columns = ["user_id", "item_id", "rating"]

valence_data_converter = DataConverter(original_df=original_df)

valence_model = MF(
        n_users=valence_data_converter.n_users,
        n_items=valence_data_converter.n_item,
        include_bias=include_bias
)


epochs = 100

criterion = MSELoss()
optimizer = SGD(valence_model.parameters(), lr=5, weight_decay=1e-3)
runner = Runner(
    model=valence_model,
    criterion=criterion,
    optimizer=optimizer,
    epochs=epochs
)

train_set = create_dataset(data_frame=valence_data_converter.encoded_df)
train_load = DataLoader(train_set, batch_size=1000, shuffle=True)
users, items, ratings = select_n_random(train_set)

with SummaryWriter(f"runs/DEAM/{model}/valence") as writer:
    writer.add_graph(valence_model, (users, items))

    for epoch in range(epochs):
        epoch_loss = runner.train(train_loader=train_load, epoch=epoch, writer=writer)

        print(f"epoch={epoch + 1}, loss={epoch_loss}")

torch.save(valence_model.state_dict(), f"{MODELS_DIR}/DEAM/{model}/valence.pt")

In [None]:
"""
This block of code calculates the outliers alongside the Arousal axis
"""

columns = ["workerID", "SongId", "Arousal"]
original_df = pandas.read_csv(DF_PATH, skipinitialspace=True, usecols=columns)
original_df.columns = ["user_id", "item_id", "rating"]

arousal_data_converter = DataConverter(original_df=original_df)

arousal_model = MF(
        n_users=arousal_data_converter.n_users,
        n_items=arousal_data_converter.n_item,
        include_bias=include_bias
)


epochs = 100

criterion = MSELoss()
optimizer = SGD(arousal_model.parameters(), lr=5, weight_decay=1e-3)
runner = Runner(
    model=arousal_model,
    criterion=criterion,
    optimizer=optimizer,
    epochs=epochs,
)

train_set = create_dataset(data_frame=arousal_data_converter.encoded_df)
train_load = DataLoader(train_set, batch_size=1000, shuffle=True)
users, items, ratings = select_n_random(train_set)

with SummaryWriter(f"runs/DEAM/{model}/arousal") as writer:
    writer.add_graph(arousal_model, (users, items))

    for epoch in range(epochs):
        epoch_loss = runner.train(train_loader=train_load, epoch=epoch, writer=writer)
        print(f"epoch={epoch + 1}, loss={epoch_loss}")

torch.save(arousal_model.state_dict(), f"{MODELS_DIR}/DEAM/{model}/arousal.pt")