In [127]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.losses import BinaryCrossentropy

# 0 - Dataload 

In [128]:
G_train_df      = pd.read_csv ('preprocessed_data/G_train_df.csv')
T_train_df      = pd.read_csv ('preprocessed_data/T_train_df.csv')
G_T_train_df    = pd.read_csv ('preprocessed_data/G_T_train_df.csv')

In [129]:
random_state = 13
frac = 0.2

sampled_G_train_df      = G_train_df.sample(frac = frac, random_state=random_state)
sampled_T_train_df      = T_train_df.sample(frac = frac, random_state=random_state)
sampled_G_T_train_df    = G_T_train_df.sample(frac = frac, random_state=random_state)


In [130]:
print (sampled_G_T_train_df.shape)
print (sampled_G_train_df.shape)
print (sampled_T_train_df.shape)

(32410, 3)
(32410, 466)
(32410, 57)


In [131]:
G_train_df.head()

Unnamed: 0,group ID,S0002,S0003,S0004,S0005,S0006,S0008,S0009,S0010,S0012,...,S1039,S1046,S1047,S1051,S1058,S1059,S1060,S1072,technique ID,target
0,G0099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T1548,0.0
1,G0099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T1548.002,0.0
2,G0099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T1548.004,0.0
3,G0099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T1548.001,0.0
4,G0099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T1548.003,0.0


- removing ID columns and converting to numpy array

In [132]:
ids = ['group ID', 'technique ID']

y_train = sampled_G_T_train_df.drop(columns = ids).values
y_train.dtype

# G_train = sampled_G_train_df.drop(columns = ids)
G_train = sampled_G_train_df.drop(columns = ids).values

# T_train = sampled_T_train_df.drop(columns = ids)
T_train = sampled_T_train_df.drop(columns = ids).values



In [133]:
print (y_train.shape)
print (G_train.shape)
print (T_train.shape)

(32410, 1)
(32410, 464)
(32410, 55)


In [134]:
type(T_train)

numpy.ndarray

# 1-Model architecture

In [135]:
# input shapes config
num_G_features = G_train.shape[1]  # remove Group ID during training
num_T_features = T_train.shape[1]   # remove Movie ID during training

# output
num_outputs = 32



In [136]:
tf.random.set_seed(random_state)

# Group NN
Group_NN = tf.keras.models.Sequential(
    layers=[
    tf.keras.layers.Dense (256, activation = 'relu'),
    tf.keras.layers.Dense (128, activation = 'relu'),
    tf.keras.layers.Dense (num_outputs, activation  = 'linear'),
    ], 
    name= "Group_NN")
# input vector for user_NN
input_Group = tf.keras.layers.Input(shape = (num_G_features), name = "input_Group")
vg = Group_NN(input_Group)
# vg = tf.linalg.l2_normalize(vg, axis=1)

# Technique NN
Technique_NN = tf.keras.models.Sequential(
    layers = [
    tf.keras.layers.Dense (256, activation = 'relu'),
    tf.keras.layers.Dense (128, activation = 'relu'),
    tf.keras.layers.Dense (num_outputs, activation  = 'linear'),  
    ],
    name = "Technique_NN")
# input vector for Technique_NN
input_Technique = tf.keras.layers.Input (shape= (num_T_features), name = "input_Technique")
vt = Technique_NN (input_Technique)
# vt = tf.linalg.l2_normalize (vt, axis = 1)

output = tf.keras.layers.Dot (axes=1)(inputs= [vg, vt])

model = tf.keras.Model (inputs = [input_Group, input_Technique],
                        outputs = output, name = 'recsysNN_model')



tf.random.set_seed(random_state)
opt = keras.optimizers.Adam (learning_rate= 0.05)
model.compile (optimizer = opt, loss = BinaryCrossentropy (from_logits= True))

In [137]:
model.summary()

Model: "recsysNN_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_Group (InputLayer)       [(None, 464)]        0           []                               
                                                                                                  
 input_Technique (InputLayer)   [(None, 55)]         0           []                               
                                                                                                  
 Group_NN (Sequential)          (None, 32)           156064      ['input_Group[0][0]']            
                                                                                                  
 Technique_NN (Sequential)      (None, 32)           51360       ['input_Technique[0][0]']        
                                                                                     

In [138]:
from sklearn.model_selection import train_test_split

T_train, T_test = train_test_split(T_train, train_size= 0.8, shuffle= True, random_state= random_state)
G_train, G_test = train_test_split(G_train, train_size= 0.8, shuffle= True, random_state= random_state)
y_train, y_test = train_test_split(y_train, train_size= 0.8, shuffle= True, random_state= random_state)

print(f"G_train.shape: {G_train.shape}")
print(f"T_train.shape: {T_train.shape}")
print(f"G_test.shape: {G_test.shape}")
print(f"T_test.shape: {T_test.shape}")


G_train.shape: (25928, 464)
T_train.shape: (25928, 55)
G_test.shape: (6482, 464)
T_test.shape: (6482, 55)


In [139]:
tf.random.set_seed(random_state)

model.fit( 
    x = [
    G_train,
    T_train
    ],
    y = y_train,
    epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30

KeyboardInterrupt: 