In [1]:
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from deepctr.feature_column import SparseFeat,get_feature_names
from deepctr.models import FLEN, DeepFM

In [4]:
if __name__ == "__main__":

    data = pd.read_csv('./movielens.csv')

    sparse_features = ['userId', 'title', 'genres', 'tag', 'rating']
    target = ['target']

    # 1.Label Encoding for sparse features,and do simple Transformation for dense features
    for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])


    # 2.count #unique features for each sparse field,and record dense feature field name

    field_info = dict(userId = 'user', title = 'context', genres = 'context', tag ='context', rating = 'context')


    # fixlen_feature_columns = [
    #     SparseFeat(name, vocabulary_size=data[name].max() + 1, embedding_dim=16, use_hash=False, dtype='int32',
    #                group_name=field_info[name]) for name in sparse_features]

    # dnn_feature_columns = fixlen_feature_columns
    # linear_feature_columns = fixlen_feature_columns



    fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
                          for feat in sparse_features]
    dnn_feature_columns = fixlen_feature_columns
    linear_feature_columns = fixlen_feature_columns

    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

    # 3.generate input data for model

    train, test = train_test_split(data, test_size=0.2, random_state=2020)
    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}

    # 4.Define Model,train,predict and evaluate
    model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
    model.compile("adam", "binary_crossentropy",
                  metrics=['binary_crossentropy'], )

    history = model.fit(train_model_input, train[target].values,
                        batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
    pred_ans = model.predict(test_model_input, batch_size=256)
    print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
    print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))

    



    predictions = pd.DataFrame({
    'userId': test['userId']+1,
    'title': test['title'],
    'prediction': pred_ans.flatten()})
    
    

# 6. CSV 파일로 저장
    data = pd.read_csv('./movielens.csv')

    lbe = LabelEncoder()
    lbe.fit(data['title'])
    predictions['title'] = lbe.inverse_transform(predictions['title'])
    predictions.to_csv('./decoded_predictions.csv', index=False)

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2023-12-07 10:39:40.267025: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-07 10:39:40.267139: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-12-07 10:39:40.764215: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/10


2023-12-07 10:39:41.160318: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
loc("mps_select"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/495c257e-668e-11ee-93ce-926038f30c31/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":294:0)): error: 'anec.gain_offset_control' op result #0 must be 4D/5D memref of 16-bit float or 8-bit signed integer or 8-bit unsigned integer values, but got 'memref<1x256x1x1xi1>'
loc("mps_select"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/495c257e-668e-11ee-93ce-926038f30c31/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":294:0)): error: 'anec.gain_offset_control' op result #0 must be 4D/5D memref of 16-bit float or 8-bit signed integer or 8-bit unsigned integer values, but got 'memref<1x256x1x1xi1>'
2

250/250 - 6s - loss: 0.4864 - binary_crossentropy: 0.4864 - val_loss: 0.4459 - val_binary_crossentropy: 0.4458
Epoch 2/10
250/250 - 5s - loss: 0.4370 - binary_crossentropy: 0.4369 - val_loss: 0.4428 - val_binary_crossentropy: 0.4426
Epoch 3/10
250/250 - 5s - loss: 0.4297 - binary_crossentropy: 0.4296 - val_loss: 0.4358 - val_binary_crossentropy: 0.4356
Epoch 4/10
250/250 - 5s - loss: 0.4198 - binary_crossentropy: 0.4196 - val_loss: 0.4242 - val_binary_crossentropy: 0.4240
Epoch 5/10
250/250 - 5s - loss: 0.4017 - binary_crossentropy: 0.4015 - val_loss: 0.4021 - val_binary_crossentropy: 0.4019
Epoch 6/10
250/250 - 5s - loss: 0.3824 - binary_crossentropy: 0.3822 - val_loss: 0.3946 - val_binary_crossentropy: 0.3943
Epoch 7/10
250/250 - 5s - loss: 0.3704 - binary_crossentropy: 0.3701 - val_loss: 0.3875 - val_binary_crossentropy: 0.3872
Epoch 8/10
250/250 - 5s - loss: 0.3626 - binary_crossentropy: 0.3623 - val_loss: 0.3844 - val_binary_crossentropy: 0.3841
Epoch 9/10
250/250 - 5s - loss: 0.3

2023-12-07 10:40:29.766723: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


test LogLoss 0.3848
test AUC 0.8701
