In [1]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch import from_numpy

from machine_learning.classes import OptimizerConfigs, TrainConfigs
from machine_learning.machines.factorization_machine import ModelConfigs, FactorizationMachine, load, save, train
from machine_learning.strings import sigmoid_str

In [2]:
# not a good dataset for factorization machine, as there is no interation between features
n_samples = 10000
n_features = 10
centers = 2
random_state = 0
X, Y = make_blobs(n_samples, n_features, centers=centers, random_state=random_state)

In [3]:
X, Y = X.astype(np.float32), Y.astype(np.int32)

In [4]:
X_temp, X_test, Y_temp, Y_test = train_test_split(X, Y, test_size=0.2, random_state=random_state)

In [5]:
X_train, X_val, Y_train, Y_val = train_test_split(X_temp, Y_temp, test_size=0.25, random_state=random_state)

In [6]:
X_train_t = from_numpy(X_train)
Y_train_t = from_numpy(Y_train).view(-1, 1).float()
X_val_t = from_numpy(X_val)
Y_val_t = from_numpy(Y_val).view(-1, 1)
X_test_t = from_numpy(X_test)
Y_test_t = from_numpy(Y_test).view(-1, 1)

In [7]:
n_factors = 2
activation_name = sigmoid_str

model_configs = ModelConfigs(n_features=n_features, 
                             n_factors=n_factors, 
                             activation_name=activation_name,
                            )

In [8]:
model = FactorizationMachine(**model_configs.dict())

In [9]:
lr = 0.1
optimizer_configs = OptimizerConfigs(lr=lr)

In [10]:
n_epochs = 500
patience = 500

train_configs = TrainConfigs(optimizer_configs=optimizer_configs, 
                             n_epochs=n_epochs,
                             patience=patience,
                             )

In [11]:
best_model, best_score, best_epoch = train(train_configs, model, X_train_t, Y_train_t, X_val_t, Y_val_t, verbose=True, verbose_freq=10)

epoch 0, train loss 60.66188430786133, val score 0.4775
epoch 10, train loss 4.171582986600697e-06, val score 1.0
epoch 20, train loss 0.25596821308135986, val score 0.994
epoch 30, train loss 0.21405665576457977, val score 0.9975
epoch 40, train loss 0.035855308175086975, val score 1.0
epoch 50, train loss 0.017039833590388298, val score 1.0
epoch 60, train loss 0.016666699200868607, val score 1.0
epoch 70, train loss 0.01666666753590107, val score 1.0
epoch 80, train loss 0.01666666753590107, val score 1.0
epoch 90, train loss 0.01666666753590107, val score 1.0
epoch 100, train loss 0.01666666753590107, val score 1.0
epoch 110, train loss 0.01666666753590107, val score 1.0
epoch 120, train loss 0.01666666753590107, val score 1.0
epoch 130, train loss 0.01666666753590107, val score 1.0
epoch 140, train loss 0.01666666753590107, val score 1.0
epoch 150, train loss 0.01666666753590107, val score 1.0
epoch 160, train loss 0.01666666753590107, val score 1.0
epoch 170, train loss 0.0166666

In [12]:
# note val scores in the latter epochs are low and best val score happen way earlier. overfitting?
best_score, best_epoch

(1.0, 9)

In [13]:
filepath = "factorization_machine.pt"
save(best_model, filepath)

### predicting

In [14]:
best_model = load(filepath)
best_model.eval()
Y_test_pred = best_model(X_test_t)

In [15]:
score = accuracy_score(np.around(Y_test_pred.detach().numpy()), Y_test_t) * 100
print(f"accuracy score {score:0.2f}%")

accuracy score 100.00%
