In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Embedding, LSTM, Flatten, Concatenate, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [7]:
# Load your dataset
df = pd.read_csv('generated_dataset.csv')

In [8]:
# Encode categorical variables
df['UserGender'] = df['UserGender'].map({'M': 0, 'F': 1})

In [11]:
# Tokenize and pad liked topics
max_topics_length = 50
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['LikedTopic'])  
topics_seq = tokenizer.texts_to_sequences(df['LikedTopic'])  
topics_pad = pad_sequences(topics_seq, maxlen=max_topics_length)

In [21]:
# Split the data
X_age = df['UserAge'].values
X_gender = df['UserGender'].values
X_topics = topics_pad
y = np.ones(len(df))  # Binary label: 1 for 'liked', assuming all entries are liked

In [22]:
# Split the data into train and test sets
X_age_train, X_age_test, X_gender_train, X_gender_test, X_topics_train, X_topics_test, y_train, y_test = train_test_split(
    X_age, X_gender, X_topics, y, test_size=0.2, random_state=42)

In [23]:
# Define the model
embedding_dim = 32
num_age_groups = len(df['UserAge'].unique())
num_genders = len(df['UserGender'].unique())
num_topics = len(tokenizer.word_index) + 1

In [24]:
# Input layers
age_input = Input(shape=(1,), dtype='int32', name='age_input')
gender_input = Input(shape=(1,), dtype='int32', name='gender_input')
topics_input = Input(shape=(max_topics_length,), dtype='int32', name='topics_input')

In [25]:
# Embedding layers
age_embedding = Embedding(input_dim=num_age_groups, output_dim=embedding_dim)(age_input)
gender_embedding = Embedding(input_dim=num_genders, output_dim=embedding_dim)(gender_input)

In [26]:
# LSTM layer for processing liked topics
topics_embedding = Embedding(input_dim=num_topics, output_dim=embedding_dim)(topics_input)
lstm_layer = LSTM(32)(topics_embedding)

In [27]:
# Flatten embeddings
age_flatten = Flatten()(age_embedding)
gender_flatten = Flatten()(gender_embedding)

In [28]:
# Concatenate embeddings
concatenated = Concatenate()([age_flatten, gender_flatten, lstm_layer])

In [29]:
# Neural network layers for prediction
fc1 = Dense(64, activation='relu')(concatenated)
fc2 = Dense(32, activation='relu')(fc1)
output = Dense(1, activation='sigmoid')(fc2)

In [30]:
# Create the model
model = Model(inputs=[age_input, gender_input, topics_input], outputs=output)

In [31]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [33]:
# Train the model
model.fit(
    {
        'UserAge': X_age_train,
        'UserGender': X_gender_train,
        'LikedTopics': X_topics_train
    },
    y_train,
    epochs=10,
    batch_size=64,
    validation_split=0.2
)

Epoch 1/10


ValueError: in user code:

    File "C:\Dev\Python\Lib\site-packages\keras\src\engine\training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "C:\Dev\Python\Lib\site-packages\keras\src\engine\training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Dev\Python\Lib\site-packages\keras\src\engine\training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "C:\Dev\Python\Lib\site-packages\keras\src\engine\training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "C:\Dev\Python\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Dev\Python\Lib\site-packages\keras\src\engine\input_spec.py", line 197, in assert_input_compatibility
        raise ValueError(

    ValueError: Missing data for input "age_input". You passed a data dictionary with keys ['UserAge', 'UserGender', 'LikedTopics']. Expected the following keys: ['age_input', 'gender_input', 'topics_input']


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

In [None]:
# Make recommendations for a new user
new_user_age = np.array([25])
new_user_gender = np.array([0])  # 0 for Male, 1 for Female
new_user_liked_topics = tokenizer.texts_to_sequences(["topic1 topic2 topic3"])
new_user_liked_topics = pad_sequences(new_user_liked_topics, maxlen=max_topics_length)
predicted_likelihood = model.predict([new_user_age, new_user_gender, new_user_liked_topics])
print(f"Predicted Likelihood of Liking: {predicted_likelihood[0][0]}")