In [None]:
import os

os.chdir("../..")

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import clone_model
from typing import Tuple

from tf_agents.bandits.agents.lin_ucb_agent import LinearUCBAgent
from tf_agents.bandits.agents.neural_linucb_agent import NeuralLinUCBAgent
from tf_agents.drivers.dynamic_step_driver import DynamicStepDriver
from tf_agents.networks import network
from tf_agents.replay_buffers.tf_uniform_replay_buffer import TFUniformReplayBuffer
from tf_agents.specs import BoundedTensorSpec, TensorSpec


from src.practise.utils import predict
from src.practise.solution_metrics import RMSEMetric
from src.practise.solution_environment import SimpleEnvironment

## Data prep

In [None]:
BATCHSIZE = 1000
REPEATS = 2
DS_FOLDER = "data/4_dataset/"

context = pd.read_csv(f"{DS_FOLDER}user_context.csv")
ratings = pd.read_csv("data/4_dataset/reward_per_book.csv")
actions = ratings.columns[1:]

train_df = context.merge(ratings, how="left", on="user_id")

train_ds = tf.data.Dataset.from_tensor_slices(
    (
        train_df.drop(ratings.columns, axis=1),
        train_df[actions].astype("int32"),
    )
)

NUM_EPOCHS = int(train_df.shape[0] / BATCHSIZE * REPEATS)

In [None]:
action_spec = BoundedTensorSpec(
    shape=(), dtype=tf.int32, minimum=0, maximum=len(actions) - 1, name="action"
)

# weights = tf.constant([1, 1, 1, 1, 1, 1], dtype=tf.float32)
weights = tf.constant([100, 100, 110, 100, 105, 1], dtype=tf.float32)

## Elements of TF-Agents

In [None]:
env = SimpleEnvironment(
    dataset=train_ds.repeat(REPEATS),
    batch_size=BATCHSIZE,
    action_spec=action_spec,
    weights=weights,
)

In [None]:
class EncodingNetwork(network.Network):
    """
    This is a wrapper of keras.Model for TF-Agents
    """

    def __init__(self, model: tf.keras.Model, input_spec=None, name=None):
        """
        Args:
            action: tf.keras.Model
                can be Sequential or Functional, but !NOT! stateful (recurrent)
            input_spec: TensorSpec
                TensorSpec representing the input observations to the first layer
            name: str
                name of the network
        """
        super(EncodingNetwork, self).__init__(
            input_tensor_spec=input_spec, state_spec=(), name=name
        )
        self.model = model

    def copy(self, **kwargs) -> "EncodingNetwork":
        """Make a copy of a `Network` instance.
        !!The new instance will not share weights with the original - but it will start with the same weights!!

        Args:
            **kwargs:
                args to override when recreating this network, commonly overridden args include 'name'

        Returns:
            copy of this network

        Raises:
            RuntimeError:
                If not `tf.executing_eagerly()`; as this is required to be able to create deep copies of layers in `layers`
        """
        new_kwargs = dict(self._saved_kwargs, **kwargs)
        if "model" not in kwargs:
            new_model = clone_model(self.model)
            new_kwargs["model"] = new_model
        return type(self)(**new_kwargs)

    def call(self, inputs, **kwargs) -> Tuple[tf.Tensor, Tuple]:
        """Forward pass through the network

        Args:
          inputs: tf.Tensor
              data to be passed through the network
          **kwargs:
              kwargs to pass to the model.__call__()

        Returns:
          tuple(tf.Tensor, ())
            tuple of two elements:
              outpout of the network,
              empty tuple
        """
        # Only Networks are expected to know about step_type, network_state; not Keras models.
        model_kwargs = kwargs.copy()
        model_kwargs.pop("step_type", None)
        model_kwargs.pop("network_state", None)
        return self.model(inputs, **model_kwargs), ()

In [None]:
model = tf.keras.models.Sequential()

model.add(Dense(64, "tanh"))
model.add(Dense(32, "tanh"))
model.add(Dense(16, "tanh"))

net = EncodingNetwork(model, env.time_step_spec().observation)

agent = NeuralLinUCBAgent(
    time_step_spec=env.time_step_spec(),
    action_spec=env.action_spec(),
    encoding_network=net,
    encoding_network_num_train_steps=47,
    encoding_dim=net.model.layers[-1].units,
    optimizer=tf.keras.optimizers.Adam(),
    epsilon_greedy=0.95,
    emit_policy_info=(
        "predicted_rewards_mean",
        "predicted_rewards_optimistic",
    ),
)

In [None]:
replay_buffer = TFUniformReplayBuffer(
    data_spec=agent.policy.trajectory_spec,
    batch_size=BATCHSIZE,
    max_length=5,
)

rmse = RMSEMetric(env.action_spec())

replay_observer = [replay_buffer.add_batch, rmse]

driver = DynamicStepDriver(
    env=env,
    policy=agent.collect_policy,
    observers=replay_observer,
)

## Training loop

In [None]:
rmse_values = []
for i in range(NUM_EPOCHS):

    print(f"\rEpoch: {i+1}/{NUM_EPOCHS}", end="")

    # environment - agent interactions
    replay_buffer.clear()
    _ = driver.run()
    rmse_values.append(rmse.result().numpy())

    # collect data and train
    experience = replay_buffer.as_dataset(
        sample_batch_size=BATCHSIZE, num_steps=1, single_deterministic_pass=True
    )
    for t in experience:
        _ = agent.train(t[0])

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(30, 10))
fig.suptitle("RMSE value by action")
for i, ax in enumerate(axes.flatten()):
    vals = [r[i] for r in rmse_values]
    ax.plot(vals)
    ax.hlines(tf.math.reduce_mean(vals), xmin=0, xmax=NUM_EPOCHS, color="red")
    ax.title.set_text(actions[i])

## Predict

In [None]:
scored_ds = train_ds.batch(100).map(lambda x, y: predict(x, agent, env))
preds = list(scored_ds)

acts = tf.concat([i.action for i in preds], axis=0).numpy()

scored_df = train_df.assign(
    pred=acts,
)

score_rat = scored_df[actions.values]
score_rat.columns = range(score_rat.shape[1])

scored_df = scored_df.assign(rating=score_rat.idxmax(axis=1).values)

In [None]:
# accuracy
(scored_df.rating == scored_df.pred).mean()

In [None]:
# confusion matrix
confmat = (
    scored_df.groupby(["rating", "pred"], as_index=False)
    .user_id.count()
    .pivot_table(index="rating", columns="pred", values="user_id", fill_value=0)
)
confmat

In [None]:
# close rates
confmat.values.diagonal() / confmat.sum()