In [1]:
import numpy as np
import tensorflow as tf
import time
from tqdm import tqdm

Init Plugin
Init Graph Optimizer
Init Kernel


# Load data

In [2]:
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

mnist = tf.keras.datasets.mnist
(X_tr, y_tr), (X_te, y_te) = mnist.load_data()

X_tr = np.reshape(X_tr, [-1, 784])
X_te = np.reshape(X_te, [-1, 784])

X_all = np.vstack([
    X_tr[y_tr == 3, :],
    X_te[y_te == 3, :],
    X_tr[y_tr == 5, :],
    X_te[y_te == 5, :]    
]).astype(np.float32)

X_all = X_all * (np.random.uniform(0, 1, X_all.shape) > 0.8)

n_three, n_five = sum(y_tr == 3) + sum(y_te == 3), sum(y_tr == 5) + sum(y_te == 5)
y_all = np.array([1]*n_three + [0]*n_five)

print('Dataset shape: {}'.format(X_all.shape))
print('Non-zeros rate: {:.05f}'.format(np.mean(X_all != 0)))
print('Classes balance: {:.03f} / {:.03f}'.format(np.mean(y_all==0), np.mean(y_all==1)))

X_tr, X_te, y_tr, y_te = train_test_split(X_all, y_all, random_state=42, test_size=0.3)

Dataset shape: (13454, 784)
Non-zeros rate: 0.04038
Classes balance: 0.469 / 0.531


# Baselines

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
for model in [
                LogisticRegression(), 
                RandomForestClassifier(n_jobs=-1, n_estimators=200)
            ]:
    model.fit(X_tr, y_tr)
    predictions = model.predict(X_te)
    acc = accuracy_score(y_te, predictions)
    print('model: {}'.format(model.__str__()))
    print('accuracy: {}'.format(acc))
    print()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


model: LogisticRegression()
accuracy: 0.8922467178597969



# Dense example

In [None]:
from tffm2 import TFFMClassifier

for order in [2, 3]:
    model = TFFMClassifier(
        order=order, 
        rank=10, 
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001), 
        n_epochs=50, 
        batch_size=1024,
        init_std=0.001,
        reg=0.01,
        seed=42
    )
    model.fit(X_train=X_tr, y_train=y_tr, show_progress=True)
    predictions = list(model.predict(X=X_te, pred_batch_size=5000))
    print('[order={}] accuracy: {}'.format(order, accuracy_score(y_te, predictions[0]["pred"])))

# Regression example

In [None]:
from tffm2 import TFFMRegressor
from sklearn.metrics import mean_squared_error

order = 3

model = TFFMRegressor(
    order=order, 
    rank=10, 
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001),  
    n_epochs=50, 
    batch_size=1024,
    init_std=0.001,
    reg=0.01,
)
# translate Y from {0,1} to {-10, 10}
model.fit(X_train=X_tr, y_train=y_tr*20-10, show_progress=True)
predictions = list(model.predict(X_te, pred_batch_size=5000))
print('[order={}] accuracy: {}'.format(order, accuracy_score(y_te, predictions[0]["pred_raw"] > 0)))
print('MSE: {}'.format(mean_squared_error(y_te*20-10, predictions[0]["pred_raw"])))

# Save model

In [None]:
model.save("/tmp/saved_model")