In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.utils import shuffle
from scipy.sparse import save_npz, load_npz

import keras.backend as K 
from keras.models import Model
from keras.layers import Input, Dropout, Dense
from keras.regularizers import l2 
from keras.optimizers import SGD

In [2]:
# config 
batch_size = 128 
epochs = 20 
reg = 0.0001 

In [3]:
A = load_npz('./data/Atrain.npz')
A_test = load_npz("./data/Atest.npz")
mask = (A > 0 ) * 1.0
mask_test = (A_test > 0) * 1.0

# make copies since we will shuffle 
A_copy = A.copy() 
mask_copy = mask.copy() 
A_test_copy = A_test.copy() 
mask_test_copy = mask_test.copy() 

In [4]:
N, M = A.shape 
N, M

(138492, 26744)

In [5]:
# center the data 
mu = A.sum() / mask.sum() 
mu

3.5255983354188305

In [6]:
# build the model - just a 1 hidden layer autoencoder 
i = Input(shape=(M,))

# bigger hidden layer size seems to help 
x = Dropout(0.7)(i) 
x = Dense(700, activation='tanh', kernel_regularizer=l2(reg))(x) 

x = Dense(M, kernel_regularizer=l2(reg))(x) 

2023-04-07 14:00:53.453072: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
def custom_loss(y_true, y_pred): 
    mask = K.cast(K.not_equal(y_true, 0), dtype='float32')
    diff = y_pred - y_true
    sqdiff = diff * diff * mask 
    sse = K.sum(K.sum(sqdiff))
    n = K.sum(K.sum(mask))
    return sse / n

def generator(A, M): 
    while True: 
        A, M = shuffle(A, M)
        for i in range(A.shape[0] // batch_size + 1):
            upper = min((i+1)*batch_size, A.shape[0])
            a = A[i*batch_size : upper].toarray()
            m = M[i*batch_size : upper].toarray() 
            a = a - mu * m # must keep zeros at zero 
            # m2 = (np.random.random(a.shape) > 0.5) 
            # noisy = a * m2 
            noisy = a # no noise 
            yield noisy, a

def test_generator(A, M, A_test, M_test): 
    # assumes A and A_test are in corresponding order 
    # both of size N x M 
    while True: 
        for i in range(A.shape[0] // batch_size + 1):
            upper = min((i+1)*batch_size, A.shape[0])
            a = A[i*batch_size : upper].toarray() 
            m = M[i*batch_size : upper].toarray() 
            at = A_test[i*batch_size : upper].toarray() 
            mt = M_test[i*batch_size : upper].toarray() 
            a = a - mu * m 
            at = at - mu * mt 
            yield a, at 

model = Model(i, x) 
model.compile(
    loss= custom_loss,
    optimizer=SGD(lr=0.08, momentum=0.9), 
    # optimizer = 'adam'
    metrics=[custom_loss]
)

  super().__init__(name, **kwargs)


In [8]:
r = model.fit_generator(
    generator(A, mask),
    validation_data=test_generator(A_copy, mask_copy, A_test_copy, mask_test_copy), 
    epochs=epochs, 
    steps_per_epoch=A.shape[0] // batch_size + 3, 
    validation_steps=A_test.shape[0] // batch_size + 1
)
r.history.keys()

  r = model.fit_generator(


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
 211/1084 [====>.........................] - ETA: 2:16 - loss: 0.6595 - custom_loss: 0.5816

KeyboardInterrupt: 

In [None]:
# plot losses 
plt.plot(r.history['loss'], label='train loss')
plt.plot(r.history['val_loss'], label='test loss')
plt.legend() 
plt.show()

In [None]:
# plot mse 
plt.plot(r.history['custom_loss'], label='train mse')
plt.plot(r.history['val_custom_loss'], label='test mse')
plt.legend() 
plt.show()