In [2]:
import pandas as pd
import numpy as np

import sys
sys.path.append('..')  # Add the parent directory to the Python path

from src.users import (
    CheapSeekerUser,
    BrandLoverUser,
    RandomChooserUser,
    ValueOptimizerUser,
    FamiliaritySeekerUser
)

from src.recommenders import (
    RandomRecommender,
    PopularityRecommender
)

from src.env import ShopEnv
from src.utils import (
    load_catalog,
    action_to_indices,
    snake_case_to_camel_case
)
from src.data.encoders import encode_items
from src.config import Config

## DL

In [2]:
config = Config()
user_params = config.get("user_params")
username_to_user = {
    user: globals()[snake_case_to_camel_case(user) + 'User'](user, **params)
    for user, params in user_params.items()
}
name_to_recommender = {
    "random": RandomRecommender(),
    "popularity": PopularityRecommender()
}

In [3]:
catalog = load_catalog('../src/data/catalog.csv')
catalog.head()

Unnamed: 0,product_id,name,category,subcategory,price,quality_score,brand,color,popularity,release_date,description,release_days
0,1,Chair TO##,Home,Chair,17.17,0.834,BrandG,White,76,2025-04-30,Send situation town sea media wonder party fee...,20208
1,2,Perfume MX##,Beauty,Perfume,64.88,0.985,BrandL,White,92,2025-04-21,Radio sense leave real knowledge four institut...,20199
2,3,Tennis Racket MS##,Sports,Tennis Racket,23.49,0.717,BrandL,Green,70,2024-06-14,World article describe answer economy enjoy do...,19888
3,4,Novel PB##,Books,Novel,81.27,0.112,BrandI,Black,27,2023-11-12,Dog reflect explain program check letter possi...,19673
4,5,Lamp BA##,Home,Lamp,28.42,0.029,BrandE,Black,17,2025-05-17,Stand guy argue interesting hot magazine any l...,20225


In [4]:
encoded_items = encode_items(catalog, config.get('catalog')['cat_features'])
encoded_items

Unnamed: 0,product_id,price,quality_score,popularity,release_days,category_Beauty,category_Books,category_Clothing,category_Electronics,category_Home,...,brand_BrandL,brand_BrandM,brand_BrandN,brand_BrandO,color_Black,color_Blue,color_Green,color_Red,color_White,color_Yellow
0,1,17.17,0.834,76,20,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
1,2,64.88,0.985,92,29,1,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,3,23.49,0.717,70,340,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
3,4,81.27,0.112,27,555,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,5,28.42,0.029,17,3,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,246,168.41,0.061,28,57,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
246,247,36.82,0.537,59,168,1,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0
247,248,9.82,0.555,82,699,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
248,249,33.48,0.128,26,378,1,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0


In [5]:
env = ShopEnv(catalog, username_to_user['cheap_seeker'])
state, info = env.reset()

In [6]:
recommender = name_to_recommender['random']
# recommender = name_to_recommender['popularity']
action = recommender.recommend(state, num_recommendations=10)
action

array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0])

In [20]:
state, info = env.reset()
done = False
while not done:
    action = recommender.recommend(state, num_recommendations=10)
    action_indices = action_to_indices(action)
    print("Action Indices:", action_indices)
    
    # Take a step in the environment
    state, reward, done, truncated, info = env.step(action)
    # print_state_info(info)
    print("reward:", reward)
    print("done:", done)
    print()

Action Indices: [2, 13, 15, 20, 21, 23, 29, 32, 39, 48]
reward: 0.4
done: False

Action Indices: [1, 2, 4, 7, 13, 29, 32, 34, 36, 48]
reward: 0.2
done: False

Action Indices: [3, 13, 15, 18, 22, 23, 25, 32, 35, 41]
reward: 0.6000000000000001
done: False

Action Indices: [0, 1, 3, 4, 8, 19, 27, 35, 41, 47]
reward: 0.30000000000000004
done: False

Action Indices: [0, 9, 11, 12, 15, 20, 21, 24, 36, 38]
reward: 0.4
done: True



In [14]:
info['recommended_items']

Unnamed: 0,product_id,name,category,subcategory,price,quality_score,brand,color,popularity,release_date,description,release_days
0,3,Tennis Racket MS##,Sports,Tennis Racket,23.49,0.717,BrandL,Green,70,2024-06-14,World article describe answer economy enjoy do...,19888
1,56,Biography MP##,Books,Biography,36.15,0.138,BrandL,Yellow,26,2023-08-01,Wrong bad Democrat idea person current world add.,19570
2,105,Table QJ##,Home,Table,16.03,0.652,BrandD,Black,72,2023-08-10,Author traditional provide notice two perform ...,19579
3,109,Novel HP##,Books,Novel,19.2,0.8,BrandK,Green,82,2024-11-16,Note recognize other answer development term i...,20043
4,132,Biography HG##,Books,Biography,27.39,0.637,BrandN,Black,80,2024-08-25,Bill bring reason what southern police second ...,19960
5,190,Board Game RB##,Toys,Board Game,16.16,0.663,BrandB,Black,73,2025-02-04,Industry class poor near study network thought...,20123
6,214,Puzzle JI##,Toys,Puzzle,26.98,0.748,BrandH,Green,72,2023-08-11,Structure become research soldier again would ...,19580
7,219,Cream VU##,Beauty,Cream,16.09,0.347,BrandJ,Yellow,46,2024-02-16,Eight allow fight everyone during because very...,19769
8,220,Cream BM##,Beauty,Cream,37.73,0.265,BrandH,Black,41,2024-04-07,Thousand serve strong radio through send reduce.,19820
9,226,T-Shirt AB##,Clothing,T-Shirt,19.01,0.426,BrandJ,Red,64,2023-07-31,Room five type new administration reflect reac...,19569


In [13]:
info['click_through_rate'], info['buy_through_rate']

(np.float64(0.1), np.float64(0.0))

## RL Recommender

In [None]:
class RLRecommender:
    def __init__(self, model_path=None):
        self.model = ...

    def recommend(self, state: dict, num_recommendations: int=10) -> np.ndarray:
        # Mocking random behavior for now
        num_candidates = state['candidates_num_features'].shape[0]
        action = np.zeros(num_candidates, dtype=int)
        indices = np.random.choice(num_candidates, size=num_recommendations, replace=False)
        action[indices] = 1
        return action

## Evaluation

### Imports

In [2]:
import pandas as pd
import numpy as np

import sys
sys.path.append('..')  # Add the parent directory to the Python path

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

from src.env import ShopEnv
from src.users import (
    CheapSeekerUser,
    BrandLoverUser,
    RandomChooserUser,
    ValueOptimizerUser,
    FamiliaritySeekerUser
)
from src.config import Config
from src.utils import (
    snake_case_to_camel_case,
    load_catalog,
    action_to_indices
)
from src.policies import TopKMultiInputPolicy
from src.recommenders import (
    RandomRecommender,
    PopularityRecommender,
    RLRecommender
)

### Global Variables

In [15]:
catalog_path = "../src/data/catalog.csv"
catalog = load_catalog(catalog_path)
config = Config()
user_params = config.get("user_params")

# Create user mapping
username_to_user = {
    user: globals()[snake_case_to_camel_case(user) + 'User'](user, **params)
    for user, params in user_params.items()
}

name_to_recommender = {
    "random": RandomRecommender(),
    "popularity": PopularityRecommender(),
    "rl": RLRecommender()
}

# DummyVecEnv with all users
users_subset = [
    "cheap_seeker",
    "brand_lover",
    "random_chooser",
    "value_optimizer",
    "familiarity_seeker"
]

env_params = {
    "catalog": catalog,
    "username_to_user": username_to_user,
    "users_subset": [
        "cheap_seeker",
        "brand_lover",
        "random_chooser",
        "value_optimizer",
        "familiarity_seeker"
    ]
}


In [16]:
rl_recommender = name_to_recommender['rl']
random_recommender = name_to_recommender['random']
popularity_recommender = name_to_recommender['popularity']
# rl_recommender.load_model('../models/ppo_with_all_users.zip')

In [None]:
rl_recommender.train(
    env_params=env_params,
    total_timesteps=10_0000,
    num_recommendations=10,
)

Output()

In [21]:
rl_recommender.evaluate(100)

AttributeError: 'RLRecommender' object has no attribute 'vec_env'

In [10]:
rl_recommender.save_model('../models/ppo_latest.zip')

Saving model to ../models/ppo_latest.zip


In [17]:
rl_recommender.load_model('../models/ppo_latest.zip', policy=TopKMultiInputPolicy)

Loading model from ../models/ppo_latest.zip


In [27]:
env = ShopEnv(catalog, username_to_user['cheap_seeker'])
state, info = env.reset()
done = False
while not done:
    random_action = random_recommender.recommend(state, num_recommendations=10)
    random_action_indices = action_to_indices(random_action)
    print("Random Action Indices:", random_action_indices)
    popularity_action = popularity_recommender.recommend(state, num_recommendations=10)
    popularity_action_indices = action_to_indices(popularity_action)
    print("Popularity Action Indices:", popularity_action_indices)
    action = rl_recommender.recommend(state, num_recommendations=10)
    action_indices = action_to_indices(action)
    print("RL Action Indices:", action_indices)
    print(len(action_indices))
    
    # Take a step in the environment
    state, reward, done, truncated, info = env.step(action)
    print("reward:", reward)
    print("done:", done)
    print()
    

Random Action Indices: [0, 3, 5, 10, 17, 24, 28, 34, 36, 45]
Popularity Action Indices: [1, 5, 7, 9, 10, 27, 29, 33, 34, 35]
RL Action Indices: [7, 9, 15, 17, 21, 30, 39, 41, 47, 49]
10
reward: 0.0
done: False

Random Action Indices: [0, 4, 13, 22, 26, 30, 32, 40, 44, 47]
Popularity Action Indices: [5, 7, 11, 19, 21, 24, 32, 36, 45, 46]
RL Action Indices: [8, 9, 11, 17, 18, 20, 22, 44, 45, 47]
10
reward: 0.0
done: False

Random Action Indices: [4, 9, 19, 24, 27, 31, 34, 43, 46, 48]
Popularity Action Indices: [3, 9, 11, 15, 23, 31, 33, 34, 35, 42]
RL Action Indices: [4, 7, 15, 17, 18, 21, 37, 39, 41, 44]
10
reward: 0.7
done: False

Random Action Indices: [10, 11, 15, 19, 34, 35, 36, 38, 41, 49]
Popularity Action Indices: [0, 1, 4, 6, 20, 21, 23, 26, 32, 46]
RL Action Indices: [0, 7, 9, 15, 17, 29, 41, 44, 47, 49]
10
reward: 0.4
done: False

Random Action Indices: [5, 8, 10, 11, 16, 19, 21, 32, 34, 47]
Popularity Action Indices: [0, 2, 9, 10, 16, 18, 21, 27, 44, 47]
RL Action Indices: [0

In [22]:
def evaluate_recommender(recommender, env, num_episodes=100):
    total_rewards = []
    for _ in range(num_episodes):
        state, info = env.reset()
        done = False
        total_reward = 0
        while not done:
            action = recommender.recommend(state, num_recommendations=10)
            state, reward, done, truncated, info = env.step(action)
            total_reward += reward
        total_rewards.append(total_reward)
    average_reward = np.mean(total_rewards)
    print(f"Average Reward over {num_episodes} episodes: {average_reward}")
    return average_reward

In [21]:
reward.shape

(5,)

## Result Analysis

In [5]:
import pandas as pd
metrics = pd.read_csv('../src/metrics/recommender_comparison.csv')
metrics

Unnamed: 0,Recommender,User,Average Reward
0,random,cheap_seeker,1.861
1,popularity,cheap_seeker,1.843
2,rl,cheap_seeker,1.834
3,random,brand_lover,1.754
4,popularity,brand_lover,1.65
5,rl,brand_lover,1.767
6,random,value_optimizer,1.743
7,popularity,value_optimizer,1.974
8,rl,value_optimizer,1.667
9,random,familiarity_seeker,0.014


## Users

In [3]:
BRANDS = [f"Brand{chr(i)}" for i in range(65, 80)]  # A–O
COLORS = ["White", "Black", "Red", "Blue", "Green", "Yellow"]
power = 5
brand_weights = {brand: np.random.rand() ** power for brand in BRANDS}
color_weights = {brand: np.random.rand() ** power for brand in COLORS}
color_weights

{'White': 0.09470874002848481,
 'Black': 0.14567775022972448,
 'Red': 0.017433425579196523,
 'Blue': 0.37628387578227307,
 'Green': 0.6653733822187077,
 'Yellow': 0.006109355805172206}

In [5]:
brand_weights

{'BrandA': 0.5729550570477008,
 'BrandB': 0.004980885074884081,
 'BrandC': 1.516876189441327e-10,
 'BrandD': 0.260225277118719,
 'BrandE': 0.0738844694857421,
 'BrandF': 0.05370536195408048,
 'BrandG': 0.02256994679147764,
 'BrandH': 0.07540303062942973,
 'BrandI': 0.3251875774469237,
 'BrandJ': 0.001179427675238534,
 'BrandK': 0.0014212710996049664,
 'BrandL': 0.10840986357263528,
 'BrandM': 0.0013818673721953775,
 'BrandN': 2.098993484748978e-05,
 'BrandO': 0.2941492051729788}

In [3]:
cheap_seeker = CheapSeekerUser("user_A", 0.92, 0.95)
# brand_lover = BrandLoverUser("user_B", 0.4, 0.6, brand_weights, color_weights)
# value_optimizer = ValueOptimizerUser("user_C", 0.2, 0.4)
# familiarity_seeker = FamiliaritySeekerUser("user_D", 0.6, 0.8)
# random_chooser = RandomChooserUser("user_E", 0.85, 0.97)

In [None]:
start = 0
step = 5
end = 240
while start < end:
    i = start
    step = np.random.randint(1, 10)
    clicked_items, bought_items = familiarity_seeker.react(catalog[i:i + 15])
    start += step


In [53]:
# clicked_items, bought_items = cheap_seeker.react(catalog_df[:1000])
# clicked_items, bought_items = brand_lover.react(catalog_df[:1000])
# clicked_items, bought_items = value_optimizer.react(catalog_df[:1000])
# clicked_items, bought_items = familiarity_seeker.react(catalog_df[:1000])
clicked_items, bought_items = random_chooser.react(catalog_df[:1000])

In [54]:
ctr = sum(clicked_items) / len(clicked_items)
btr = sum(bought_items) / len(bought_items)
print(f"CTR: {ctr:.2f}, BTR: {btr:.2f}")

CTR: 0.17, BTR: 0.10


In [None]:
catalog.head(10)

Unnamed: 0,product_id,name,category,subcategory,price,quality_score,brand,color,popularity,release_date,description
0,1,Chair TO##,Home,Chair,17.17,0.834,BrandG,White,76,2025-04-30,Send situation town sea media wonder party fee...
1,2,Perfume MX##,Beauty,Perfume,64.88,0.985,BrandL,White,92,2025-04-21,Radio sense leave real knowledge four institut...
2,3,Tennis Racket MS##,Sports,Tennis Racket,23.49,0.717,BrandL,Green,70,2024-06-14,World article describe answer economy enjoy do...
3,4,Novel PB##,Books,Novel,81.27,0.112,BrandI,Black,27,2023-11-12,Dog reflect explain program check letter possi...
4,5,Lamp BA##,Home,Lamp,28.42,0.029,BrandE,Black,17,2025-05-17,Stand guy argue interesting hot magazine any l...
5,6,Cookbook KT##,Books,Cookbook,91.26,0.064,BrandG,Blue,28,2024-03-28,Whether thank six current watch say process ac...
6,7,T-Shirt SZ##,Clothing,T-Shirt,13.44,0.981,BrandC,Blue,93,2023-09-15,Beat front staff from available best during.
7,8,Table HK##,Home,Table,9.9,0.771,BrandM,White,83,2024-05-23,Evening present control major pay one bill ans...
8,9,T-Shirt VQ##,Clothing,T-Shirt,32.72,0.674,BrandB,Red,75,2024-09-01,Painting quality chance energy practice green ...
9,10,Table CE##,Home,Table,39.68,0.713,BrandC,White,83,2024-09-09,Suffer school wall protect guess detail number...
