In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.losses import BinaryCrossentropy

# 0 - Dataload 

In [2]:
G_train_df      = pd.read_csv ('data/preprocessed_data/G_train.csv')
T_train_df      = pd.read_csv ('data/preprocessed_data/T_train.csv')
G_T_train_df    = pd.read_csv ('data/preprocessed_data/balanced_G_T_train.csv')

In [3]:
random_state = 13
frac = 0.1

sampled_G_train_df      = G_train_df.sample(frac = frac, random_state=random_state)
sampled_T_train_df      = T_train_df.sample(frac = frac, random_state=random_state)
sampled_G_T_train_df    = G_T_train_df.sample(frac = frac, random_state=random_state)


In [4]:
print (sampled_G_T_train_df.shape)
print (sampled_G_train_df.shape)
print (sampled_T_train_df.shape)

(25757, 3)
(25757, 464)
(25757, 55)


In [5]:
G_train_df.head()

Unnamed: 0,group ID,S0002,S0003,S0004,S0005,S0006,S0008,S0009,S0010,S0012,...,S1035,S1037,S1039,S1046,S1047,S1051,S1058,S1059,S1060,S1072
0,G0049,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,G0056,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,G0129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,G0114,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,G0028,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


- removing ID columns and converting to numpy array

In [6]:
y_train = sampled_G_T_train_df.drop(columns = ['group ID','technique ID' ]).values
y_train.dtype

# G_train = sampled_G_train_df.drop(columns = ids)
G_train = sampled_G_train_df.drop(columns = 'group ID').values

# T_train = sampled_T_train_df.drop(columns = ids)
T_train = sampled_T_train_df.drop(columns = 'technique ID').values



In [7]:
print (y_train.shape)
print (G_train.shape)
print (T_train.shape)

(25757, 1)
(25757, 463)
(25757, 54)


In [8]:
type(T_train)

numpy.ndarray

# 1-Model architecture

In [9]:
# input shapes config
num_G_features = G_train.shape[1]  # remove Group ID during training
num_T_features = T_train.shape[1]   # remove Movie ID during training

# output
num_outputs = 32



In [11]:
tf.random.set_seed(random_state)

# Group NN
Group_NN = tf.keras.models.Sequential(
    layers=[
    tf.keras.layers.Dense (256, activation = 'relu'),
    tf.keras.layers.Dense (128, activation = 'relu'),
    tf.keras.layers.Dense (num_outputs, activation  = 'linear'),
    ], 
    name= "Group_NN")
# input vector for user_NN
input_Group = tf.keras.layers.Input(shape = (num_G_features), name = "input_Group")
vg = Group_NN(input_Group)
# vg = tf.linalg.l2_normalize(vg, axis=1)

# Technique NN
Technique_NN = tf.keras.models.Sequential(
    layers = [
    tf.keras.layers.Dense (256, activation = 'relu'),
    tf.keras.layers.Dense (128, activation = 'relu'),
    tf.keras.layers.Dense (num_outputs, activation  = 'linear'),  
    ],
    name = "Technique_NN")
# input vector for Technique_NN
input_Technique = tf.keras.layers.Input (shape= (num_T_features), name = "input_Technique")
vt = Technique_NN (input_Technique)
# vt = tf.linalg.l2_normalize (vt, axis = 1)

output = tf.keras.layers.Dot (axes=1)(inputs= [vg, vt])

model = tf.keras.Model (inputs = [input_Group, input_Technique],
                        outputs = output, name = 'recsysNN_model')



tf.random.set_seed(random_state)
opt = keras.optimizers.Adam (learning_rate= 0.05)
model.compile (optimizer = opt, 
               loss = BinaryCrossentropy (from_logits= True),
               metrics = ['accuracy'])

## Define early stopping callback

In [58]:
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',   # Monitor validation loss
    patience=5,           # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True   # Restore the model weights from the epoch with the best validation loss
)

Model: "recsysNN_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_Group (InputLayer)       [(None, 463)]        0           []                               
                                                                                                  
 input_Technique (InputLayer)   [(None, 54)]         0           []                               
                                                                                                  
 Group_NN (Sequential)          (None, 32)           155808      ['input_Group[0][0]']            
                                                                                                  
 Technique_NN (Sequential)      (None, 32)           51104       ['input_Technique[0][0]']        
                                                                                     

# 2-Training the model

In [59]:
history = model.fit (
    x = [G_train, T_train],
    y =  y_train,
    batch_size = 32,
    epochs = 30
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x20025984820>