In [17]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.losses import BinaryCrossentropy

In [18]:
G_train_df      = pd.read_csv ('data/preprocessed_data/G_train.csv')
T_train_df      = pd.read_csv ('data/preprocessed_data/T_train.csv')
G_T_train_df    = pd.read_csv ('data/preprocessed_data/balanced_G_T_train.csv')

In [19]:
random_state = 13
frac = 0.2

sampled_G_train_df      = G_train_df.sample(frac = frac, random_state=random_state)
sampled_T_train_df      = T_train_df.sample(frac = frac, random_state=random_state)
sampled_G_T_train_df    = G_T_train_df.sample(frac = frac, random_state=random_state)


In [20]:
print (sampled_G_T_train_df.shape)
print (sampled_G_train_df.shape)
print (sampled_T_train_df.shape)

(25757, 3)
(25757, 464)
(25757, 55)


In [21]:
y_train = sampled_G_T_train_df.drop(columns = ['group ID','technique ID' ]).values
y_train.dtype

# G_train = sampled_G_train_df.drop(columns = ids)
G_train = sampled_G_train_df.drop(columns = 'group ID').values

# T_train = sampled_T_train_df.drop(columns = ids)
T_train = sampled_T_train_df.drop(columns = 'technique ID').values



In [22]:
# input shapes config
num_G_features = G_train.shape[1]  # remove Group ID during training
num_T_features = T_train.shape[1]   # remove Movie ID during training

# output
num_outputs = 32

tf.random.set_seed(random_state)

# Group NN
Group_NN = tf.keras.models.Sequential(
    layers=[
    tf.keras.layers.Dense (256, activation = 'relu'),
    tf.keras.layers.Dense (128, activation = 'relu'),
    tf.keras.layers.Dense (num_outputs, activation  = 'linear'),
    ], 
    name= "Group_NN")
# input vector for user_NN
input_Group = tf.keras.layers.Input(shape = (num_G_features), name = "input_Group")
vg = Group_NN(input_Group)
# vg = tf.linalg.l2_normalize(vg, axis=1)

# Technique NN
Technique_NN = tf.keras.models.Sequential(
    layers = [
    tf.keras.layers.Dense (256, activation = 'relu'),
    tf.keras.layers.Dense (128, activation = 'relu'),
    tf.keras.layers.Dense (num_outputs, activation  = 'linear'),  
    ],
    name = "Technique_NN")
# input vector for Technique_NN
input_Technique = tf.keras.layers.Input (shape= (num_T_features), name = "input_Technique")
vt = Technique_NN (input_Technique)
# vt = tf.linalg.l2_normalize (vt, axis = 1)

output = tf.keras.layers.Dot (axes=1)(inputs= [vg, vt])

model = tf.keras.Model (inputs = [input_Group, input_Technique],
                        outputs = output, name = 'recsysNN_model')



tf.random.set_seed(random_state)
opt = keras.optimizers.Adam (learning_rate= 0.05)
model.compile (optimizer = opt, loss = BinaryCrossentropy (from_logits= True))

In [23]:
G_train_dataset = tf.data.Dataset.from_tensor_slices(G_train)
T_train_dataset = tf.data.Dataset.from_tensor_slices(T_train)
y_train_dataset = tf.data.Dataset.from_tensor_slices(y_train)

model.fit (
    x = [G_train, T_train],
    y =  y_train,
    batch_size = 32,
    epochs = 30
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x20b925b7130>

In [24]:
# # Assuming you have your input datasets and target data as NumPy arrays
# input_data_group = G_train     # Replace [...] with your actual input data for 'input_Group'
# input_data_technique = T_train # Replace [...] with your actual input data for 'input_Technique'
# target_output = y_train          # Replace [...] with your actual target output data

# # Create separate TensorFlow datasets for 'input_Group', 'input_Technique', and target output
# input_dataset_group = tf.data.Dataset.from_tensor_slices(input_data_group)
# input_dataset_technique = tf.data.Dataset.from_tensor_slices(input_data_technique)
# target_output_dataset = tf.data.Dataset.from_tensor_slices(target_output)

# # Combine the input datasets and target output dataset into a single dataset using zip
# # combined_dataset = tf.data.Dataset.zip((input_dataset_group, input_dataset_technique, target_output_dataset))

# # Optionally, you can shuffle, batch, and prefetch the combined dataset
# batch_size = 32
# # combined_dataset = combined_dataset.shuffle(buffer_size=len(input_data_group))
# # combined_dataset = combined_dataset.batch(batch_size)
# # combined_dataset = combined_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

# # Define your model architecture
# Group_NN = tf.keras.models.Sequential(
#     layers=[
#         tf.keras.layers.Dense(256, activation='relu'),
#         tf.keras.layers.Dense(128, activation='relu'),
#         tf.keras.layers.Dense(num_outputs, activation='linear'),
#     ],
#     name="Group_NN"
# )

# Technique_NN = tf.keras.models.Sequential(
#     layers=[
#         tf.keras.layers.Dense(256, activation='relu'),
#         tf.keras.layers.Dense(128, activation='relu'),
#         tf.keras.layers.Dense(num_outputs, activation='linear'),
#     ],
#     name="Technique_NN"
# )

# input_layer_group = tf.keras.layers.Input(shape=(num_G_features), name="input_Group")
# input_layer_technique = tf.keras.layers.Input(shape=(num_T_features), name="input_Technique")

# vg = Group_NN(input_layer_group)
# vt = Technique_NN(input_layer_technique)

# output = tf.keras.layers.Dot(axes=1)(inputs=[vg, vt])

# model = tf.keras.Model(inputs=[input_layer_group, input_layer_technique], outputs=output, name='recsysNN_model')

# # Compile the model with an appropriate optimizer, loss, and metrics
# model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# # Train the model using the combined dataset
# num_epochs = 10
# model.fit(
#     x = [input_data_group, input_data_technique],
#     y = target_output,
#     epochs=num_epochs)