Reference: https://deepctr-torch.readthedocs.io/en/latest/Examples.html

Tuning: https://github.com/shenweichen/DeepCTR-Torch/issues/274

In [40]:
import pandas as pd
import numpy as np
import pickle

from sklearn.preprocessing import LabelEncoder
from deepctr_torch.inputs import SparseFeat, get_feature_names

# Models
import torch
from deepctr_torch.models import DeepFM

# Tuning 
from sklearn.metrics import mean_squared_error, mean_absolute_error, roc_auc_score
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV, StratifiedKFold

import warnings
warnings.filterwarnings("ignore")

#### Importing Data

In [41]:
# data = pd.read_csv("../../Data Files/Raw Data/users_ratings.csv")
x_train = pd.read_csv('../../Data Files/Training Data/x_train.csv')
x_test = pd.read_csv('../../Data Files/Training Data/x_test.csv')
y_train = pd.read_csv('../../Data Files/Training Data/y_train.csv')
y_test = pd.read_csv('../../Data Files/Training Data/y_test.csv')

data = x_train.append(x_test)
# print(data)

#### Label Encoding for sparse features

In [42]:
sparse_data = data.iloc[:, 5:]
print(sparse_data.dtypes)

#remove columns that causes key errors
sparse_data = sparse_data.loc[:, sparse_data.columns!='children']
sparse_data = sparse_data.loc[:, sparse_data.columns!='clear']
sparse_data = sparse_data.loc[:, sparse_data.columns!='forward']
sparse_data = sparse_data.loc[:, sparse_data.columns!='half']
sparse_data = sparse_data.loc[:, sparse_data.columns!='pop']
sparse_data = sparse_data.loc[:, sparse_data.columns!='train']

sparse_features = list(sparse_data.columns)
# print(sparse_features)

target = ['User_Rating']

Ad/SciFi/Fantasy      int64
Animation             int64
Comedy                int64
Crime/Mystery         int64
Drama                 int64
                     ...   
zootopia            float64
zorba               float64
zorro               float64
zumbo               float64
½th                 float64
Length: 6886, dtype: object


In [43]:
for feat in sparse_features:
    
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat])

# print(sparse_features)
    
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique()) for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [44]:
lbe_y_train = LabelEncoder()
y_train['User_Rating'] = lbe_y_train.fit_transform(y_train[target])
print(y_train[target].values)
print(len(y_train[target].values))

[[8]
 [7]
 [7]
 ...
 [6]
 [8]
 [5]]
12476


In [45]:
#for feat in sparse_features:
    #lbe_x_test = LabelEncoder()
    #x_test[feat] = lbe_x_test.fit_transform(x_test[feat])

In [46]:
lbe_y_test = LabelEncoder()
y_test['User_Rating'] = lbe_y_test.fit_transform(y_test[target])
print(len(y_test[target].values))

3119


In [47]:
# print(feature_names)
x_train = {name: x_train[name] for name in feature_names}
#print(x_train)
x_test = {name: x_test[name] for name in feature_names}
# print(x_test)

# print(x_train)
# y_train = y_train.to_dict()
# print(y_train)

 #### Defining DeepFM Architecture

In [48]:
device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'
    
model = DeepFM(linear_feature_columns,dnn_feature_columns,
               task='regression',device=device)

model.compile('adam', 'mse', metrics=['mse'])

print(model)

DeepFM(
  (embedding_dict): ModuleDict(
    (Ad/SciFi/Fantasy): Embedding(2, 4)
    (Animation): Embedding(2, 4)
    (Comedy): Embedding(2, 4)
    (Crime/Mystery): Embedding(2, 4)
    (Drama): Embedding(2, 4)
    (Family): Embedding(2, 4)
    (Film-Noir): Embedding(2, 4)
    (Horror/Thriller): Embedding(2, 4)
    (Music): Embedding(2, 4)
    (Reality): Embedding(2, 4)
    (Romance): Embedding(2, 4)
    (Short): Embedding(2, 4)
    (Sport/Action): Embedding(2, 4)
    (War/History): Embedding(2, 4)
    (Western): Embedding(2, 4)
    (aaron): Embedding(3, 4)
    (abassin): Embedding(2, 4)
    (abba): Embedding(2, 4)
    (abbey): Embedding(3, 4)
    (abduct): Embedding(2, 4)
    (abdul): Embedding(2, 4)
    (abe): Embedding(2, 4)
    (abid): Embedding(2, 4)
    (abiquiu): Embedding(2, 4)
    (abomin): Embedding(4, 4)
    (abq): Embedding(2, 4)
    (abraham): Embedding(2, 4)
    (abroad): Embedding(2, 4)
    (absenc): Embedding(2, 4)
    (absent): Embedding(2, 4)
    (absolut): Embedding(3,

In [49]:
print(len(y_train[target].values))
print(len(y_train))
print(len(x_train))

12476
12476
6880


In [50]:
#history = model.fit(x_train, y_train, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

history = model.fit(x_train, y_train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cpu
Train on 9980 samples, validate on 2496 samples, 39 steps per epoch
Epoch 1/10
113s - loss:  750.5400 - mse:  750.2764 - val_mse:  106.0240
Epoch 2/10
112s - loss:  17.0558 - mse:  17.0509 - val_mse:  3.8326
Epoch 3/10
112s - loss:  3.7222 - mse:  3.7222 - val_mse:  3.3551
Epoch 4/10
112s - loss:  3.4767 - mse:  3.4767 - val_mse:  3.3911
Epoch 5/10
112s - loss:  3.4333 - mse:  3.4332 - val_mse:  3.5044
Epoch 6/10
113s - loss:  3.5045 - mse:  3.5045 - val_mse:  3.3550
Epoch 7/10
113s - loss:  3.3909 - mse:  3.3908 - val_mse:  3.3393
Epoch 8/10
112s - loss:  3.4705 - mse:  3.4706 - val_mse:  3.3320
Epoch 9/10
112s - loss:  3.3585 - mse:  3.3585 - val_mse:  3.3200
Epoch 10/10
113s - loss:  3.3484 - mse:  3.3482 - val_mse:  3.4715


#### Training DeepFM

In [51]:
print(x_test)
pred_ans = model.predict(x_test, batch_size=256)

{'Ad/SciFi/Fantasy': 0       1
1       1
2       1
3       0
4       0
       ..
3114    0
3115    0
3116    0
3117    0
3118    0
Name: Ad/SciFi/Fantasy, Length: 3119, dtype: int64, 'Animation': 0       0
1       0
2       0
3       0
4       0
       ..
3114    0
3115    0
3116    0
3117    0
3118    0
Name: Animation, Length: 3119, dtype: int64, 'Comedy': 0       0
1       0
2       0
3       0
4       0
       ..
3114    0
3115    0
3116    0
3117    0
3118    0
Name: Comedy, Length: 3119, dtype: int64, 'Crime/Mystery': 0       0
1       0
2       0
3       1
4       1
       ..
3114    0
3115    1
3116    0
3117    0
3118    0
Name: Crime/Mystery, Length: 3119, dtype: int64, 'Drama': 0       1
1       0
2       1
3       1
4       1
       ..
3114    0
3115    1
3116    1
3117    0
3118    1
Name: Drama, Length: 3119, dtype: int64, 'Family': 0       0
1       0
2       1
3       0
4       0
       ..
3114    0
3115    0
3116    0
3117    0
3118    0
Name: Family, Length: 3119, dty

In [52]:
print("test MSE", round(mean_squared_error(y_test[target].values, pred_ans), 4))

mse = mean_squared_error(y_test[target].values, pred_ans)
print("mean_squared_error = {:.3}".format(mse))
 

test MSE 3.6064
mean_squared_error = 3.61


#### Tuning for Hyperparameters with Cross Validation

In [66]:
def build_dfm_model():
    device = 'cpu'
    use_cuda = True
    if use_cuda and torch.cuda.is_available():
        print('cuda ready...')
        device = 'cuda:0'
    
    model = DeepFM(linear_feature_columns,dnn_feature_columns,
               task='regression',device=device)

    model.compile('adam', 'mse', metrics=['mse'])
    return model

In [None]:
#param_grid = {
    #'dnn_hidden_units': [(128, 128), (256, 256)],
    #'dropout': [0, 0.1],
    #'nb_epoch': [10, 20, 30],
    #'batch_size': [16, 64],
#}

param_grid = {'activation_fn': ['tanh', 'softplus', 'relu'],
              #'optimizer_fn': ['adam', 'sgd'],
              'batch_size': [16],
              'epochs': [10]}

keras_reg = KerasRegressor(build_fn=build_dfm_model)

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=1)

grid_search = GridSearchCV(estimator=keras_reg, param_grid=param_grid, verbose=3, error_score='raise')
x_train_df = pd.DataFrame(x_train)
#y_train_df = y_train.iloc[:6880]
#print(y_train_df)
#print(grid_search)
grid_search.fit(x_train_df, y_train[target].values)

In [None]:
optimal_clf = grid_search.best_estimator_

y_pred = optimal_clf.predict(x_test)
mse_after_tuning = mean_squared_error(y_test[target].values, y_pred)
print("mean_absolute_error = {:.3}".format(mse_after_tuning)) 
print(grid_search.best_params_)

#### Generating Predictions

In [None]:
best_params = grid_search.best_params_
tuned_dfm = DeepFM(**best_params)
dfm_history = tuned_dfm.fit(x_train, y_train, batch_size=best_params['batch_size'], epochs=best_params['epochs'], verbose=1)
y_pred_dfm = tuned_dfm.predict(x_test)

#### Saving Model File and Predictions

In [None]:
save_path = '../../Data Files/'
pickle.dump(tuned_deepfm, open(save_path + 'Model Files/' + 'deepfm.pkl', 'wb'))
np.savetxt(save_path + 'Predictions/' + 'deepfm_output.csv', y_pred_dfm, delimiter=",")