In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle

from .utils._logger import logger
from .utils._validation import config_args

In [None]:
# Data Loading
from pandas import DataFrame

try:
    ratings_df: DataFrame = pd.read_csv(config_args.data_path)
    logger.info("Data loaded successfully.")
except FileNotFoundError:
    logger.error(f"Data file not found at: {config_args.data_path}")
    raise
except Exception as e:
    logger.error(f"Error loading data: {e}")
    raise

In [None]:
# Feature Engineering
from pandas import Series

ratings_df["Book-Rating"] = ratings_df["Book-Rating"].astype(float)

ratings_df["User-ID"] = pd.Categorical(ratings_df["User-ID"])
user_ids: Series[int] = ratings_df["User-ID"].cat.codes
ratings_df["new_user"] = user_ids
ratings_df

ratings_df["ISBN"] = pd.Categorical(ratings_df["ISBN"])
book_ids: Series[int] = ratings_df["ISBN"].cat.codes
ratings_df["new_ISBN"] = book_ids
ratings_df

ratings_df = ratings_df.drop(["User-ID", "ISBN"], axis=1)

In [None]:
# Data Scaling
scaler = MinMaxScaler()
ratings_df["Book-Rating"] = scaler.fit_transform(ratings_df[["Book-Rating"]])


user_ids = ratings_df["new_user"].values
book_ids = ratings_df["new_ISBN"].values
ratings = ratings_df["Book-Rating"].values

In [None]:
# Data Splitting
try:
    user_ids, book_ids, ratings = shuffle(user_ids, book_ids, ratings, random_state=42)
except Exception as e:
    logger.error(f"Error shuffling data: {e}")
    raise

ntrain = int(0.8 * len(ratings))
train_user = user_ids[:ntrain]
train_book = book_ids[:ntrain]
train_ratings = ratings[:ntrain]

test_user = user_ids[ntrain:]
test_book = book_ids[ntrain:]
test_ratings = ratings[ntrain:]

In [None]:
# Model Building
def build_model(num_users, num_books, embedding_size):
    """Returns the model with given layers."""
    u = tf.keras.Input(shape=(1,), name="user_input")
    m = tf.keras.Input(shape=(1,), name="book_input")

    u_emb = tf.keras.layers.Embedding(num_users, embedding_size, name="user_embedding")(
        u
    )
    m_emb = tf.keras.layers.Embedding(num_books, embedding_size, name="book_embedding")(
        m
    )

    u_emb = tf.keras.layers.Flatten()(u_emb)
    m_emb = tf.keras.layers.Flatten()(m_emb)

    x = tf.keras.layers.Concatenate()([u_emb, m_emb])
    x = tf.keras.layers.Dense(embedding_size * 2, activation="relu")(x)
    x = tf.keras.layers.Dense(embedding_size, activation="relu")(x)
    x = tf.keras.layers.Dense(embedding_size, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(embedding_size, activation="relu")(x)
    output = tf.keras.layers.Dense(1, activation="linear")(x)

    return tf.keras.Model(inputs=[u, m], outputs=output)


model = build_model(len(set(user_ids)), len(set(book_ids)), config_args.embedding_size)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=config_args.learning_rate),
    loss="mean_squared_error",
    metrics=["mean_absolute_error"],
)
model.summary()

In [None]:
# Model Training
try:
    r = model.fit(
        x=[train_user, train_book],
        y=train_ratings,
        epochs=config_args.num_epochs,
        batch_size=config_args.batch_size,
        validation_data=([test_user, test_book], test_ratings),
    )
    logger.info(
        f"Loss history: {r.history['loss']}, val_loss history: {r.history['val_loss']}"
    )
except Exception as e:
    logger.error(f"Error during model training: {e}")
    raise

In [None]:
# Model Saving
try:
    model.save("model.keras")
    logger.info("Model saved successfully.")
except Exception as e:
    logger.error(f"Error saving model: {e}")
    raise