In [2]:
import pandas as pd
import numpy as np

import sys
sys.path.append('..')  # Add the parent directory to the Python path

from src.users import (
    CheapSeekerUser,
    BrandLoverUser,
    RandomChooserUser,
    ValueOptimizerUser,
    FamiliaritySeekerUser
)

from src.env import ShopEnv

In [2]:
catalog_df = pd.read_csv('../src/data/catalog.csv')
catalog_df['release_date'] = pd.to_datetime(catalog_df['release_date'], errors='coerce')
# catalog_df['brand'] = catalog_df['brand'].astype('category')
# catalog_df['category'] = catalog_df['category'].astype('category')
# catalog_df['subcategory'] = catalog_df['subcategory'].astype('category')
# catalog_df['color'] = catalog_df['color'].astype('category')
catalog_df.head()

Unnamed: 0,product_id,name,category,subcategory,price,quality_score,brand,color,popularity,release_date,description
0,1,Chair TO##,Home,Chair,17.17,0.834,BrandG,White,76,2025-04-30,Send situation town sea media wonder party fee...
1,2,Perfume MX##,Beauty,Perfume,64.88,0.985,BrandL,White,92,2025-04-21,Radio sense leave real knowledge four institut...
2,3,Tennis Racket MS##,Sports,Tennis Racket,23.49,0.717,BrandL,Green,70,2024-06-14,World article describe answer economy enjoy do...
3,4,Novel PB##,Books,Novel,81.27,0.112,BrandI,Black,27,2023-11-12,Dog reflect explain program check letter possi...
4,5,Lamp BA##,Home,Lamp,28.42,0.029,BrandE,Black,17,2025-05-17,Stand guy argue interesting hot magazine any l...


## Users

In [3]:
BRANDS = [f"Brand{chr(i)}" for i in range(65, 80)]  # A–O
COLORS = ["White", "Black", "Red", "Blue", "Green", "Yellow"]
power = 5
brand_weights = {brand: np.random.rand() ** power for brand in BRANDS}
color_weights = {brand: np.random.rand() ** power for brand in COLORS}
color_weights

{'White': 0.09470874002848481,
 'Black': 0.14567775022972448,
 'Red': 0.017433425579196523,
 'Blue': 0.37628387578227307,
 'Green': 0.6653733822187077,
 'Yellow': 0.006109355805172206}

In [5]:
brand_weights

{'BrandA': 0.5729550570477008,
 'BrandB': 0.004980885074884081,
 'BrandC': 1.516876189441327e-10,
 'BrandD': 0.260225277118719,
 'BrandE': 0.0738844694857421,
 'BrandF': 0.05370536195408048,
 'BrandG': 0.02256994679147764,
 'BrandH': 0.07540303062942973,
 'BrandI': 0.3251875774469237,
 'BrandJ': 0.001179427675238534,
 'BrandK': 0.0014212710996049664,
 'BrandL': 0.10840986357263528,
 'BrandM': 0.0013818673721953775,
 'BrandN': 2.098993484748978e-05,
 'BrandO': 0.2941492051729788}

In [3]:
cheap_seeker = CheapSeekerUser("user_A", 0.92, 0.95)
# brand_lover = BrandLoverUser("user_B", 0.4, 0.6, brand_weights, color_weights)
# value_optimizer = ValueOptimizerUser("user_C", 0.2, 0.4)
# familiarity_seeker = FamiliaritySeekerUser("user_D", 0.6, 0.8)
# random_chooser = RandomChooserUser("user_E", 0.85, 0.97)

In [45]:
start = 0
step = 5
end = 240
while start < end:
    i = start
    step = np.random.randint(1, 10)
    clicked_items, bought_items = familiarity_seeker.react(catalog_df[i:i + 15])
    start += step


In [53]:
# clicked_items, bought_items = cheap_seeker.react(catalog_df[:1000])
# clicked_items, bought_items = brand_lover.react(catalog_df[:1000])
# clicked_items, bought_items = value_optimizer.react(catalog_df[:1000])
# clicked_items, bought_items = familiarity_seeker.react(catalog_df[:1000])
clicked_items, bought_items = random_chooser.react(catalog_df[:1000])

In [54]:
ctr = sum(clicked_items) / len(clicked_items)
btr = sum(bought_items) / len(bought_items)
print(f"CTR: {ctr:.2f}, BTR: {btr:.2f}")

CTR: 0.17, BTR: 0.10


In [6]:
catalog_df.head(10)

Unnamed: 0,product_id,name,category,subcategory,price,quality_score,brand,color,popularity,release_date,description
0,1,Chair TO##,Home,Chair,17.17,0.834,BrandG,White,76,2025-04-30,Send situation town sea media wonder party fee...
1,2,Perfume MX##,Beauty,Perfume,64.88,0.985,BrandL,White,92,2025-04-21,Radio sense leave real knowledge four institut...
2,3,Tennis Racket MS##,Sports,Tennis Racket,23.49,0.717,BrandL,Green,70,2024-06-14,World article describe answer economy enjoy do...
3,4,Novel PB##,Books,Novel,81.27,0.112,BrandI,Black,27,2023-11-12,Dog reflect explain program check letter possi...
4,5,Lamp BA##,Home,Lamp,28.42,0.029,BrandE,Black,17,2025-05-17,Stand guy argue interesting hot magazine any l...
5,6,Cookbook KT##,Books,Cookbook,91.26,0.064,BrandG,Blue,28,2024-03-28,Whether thank six current watch say process ac...
6,7,T-Shirt SZ##,Clothing,T-Shirt,13.44,0.981,BrandC,Blue,93,2023-09-15,Beat front staff from available best during.
7,8,Table HK##,Home,Table,9.9,0.771,BrandM,White,83,2024-05-23,Evening present control major pay one bill ans...
8,9,T-Shirt VQ##,Clothing,T-Shirt,32.72,0.674,BrandB,Red,75,2024-09-01,Painting quality chance energy practice green ...
9,10,Table CE##,Home,Table,39.68,0.713,BrandC,White,83,2024-09-09,Suffer school wall protect guess detail number...


## ENV

In [13]:
env = ShopEnv(cheap_seeker, catalog_df)
state = env.reset()

In [6]:
class Recommender:
    def recommend(self, state):
        candidates = state['candidates']
        action = np.zeros(len(candidates), dtype=int)
        indices = np.random.choice(candidates.index, size=10, replace=False)
        action[indices] = 1
        return action

In [14]:
recommender = Recommender()
action = recommender.recommend(state)
action

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0])

In [15]:
done = False
step = 0
while not done:
    action = recommender.recommend(state)
    state, reward, done, _, info = env.step(action)
    print(f"Step {step}:")
    print(f"Action indices: {np.where(action)[0]}, Reward: {reward}, Done: {done}")
    print(f"Info: {info['click_through_rate'], info['buy_through_rate']}")
    step += 1

Step 0:
Action indices: [  7  30  36  49  95 141 151 159 228 242], Reward: 0.1, Done: False
Info: (np.float64(0.2), np.float64(0.0))
Step 1:
Action indices: [ 16  63  82  94  96 180 212 228 233 248], Reward: 0.0, Done: False
Info: (np.float64(0.0), np.float64(0.0))
Step 2:
Action indices: [  9  11  19  61  85  86 103 151 160 185], Reward: 0.0, Done: False
Info: (np.float64(0.0), np.float64(0.0))
Step 3:
Action indices: [ 29  58  72  87 101 107 147 158 171 196], Reward: 0.2, Done: False
Info: (np.float64(0.2), np.float64(0.1))
Step 4:
Action indices: [ 33  69 102 107 145 167 171 185 228 235], Reward: 0.0, Done: False
Info: (np.float64(0.0), np.float64(0.0))
Step 5:
Action indices: [ 12  61  75  79  82 126 157 180 202 215], Reward: 0.0, Done: False
Info: (np.float64(0.0), np.float64(0.0))
Step 6:
Action indices: [ 24  35  72  81 109 153 174 178 185 246], Reward: 0.15000000000000002, Done: False
Info: (np.float64(0.1), np.float64(0.1))
Step 7:
Action indices: [  0  71  80  84  94 101 127 

In [16]:
state['history']

{'page_count': 10,
 'click_count': 4,
 'buy_count': 3,
 'last_click_item': np.int64(3),
 'consecutive_no_click_pages': 1}

## Testing

In [1]:
import numpy as np
import pandas as pd

import sys
sys.path.append('..')  # Add the parent directory to the Python path

from src.env import ShopEnv
from src.config import Config
from src.users import (
    CheapSeekerUser,
    BrandLoverUser,
    RandomChooserUser,
    ValueOptimizerUser,
    FamiliaritySeekerUser
)
from src.recommenders import (
    RandomRecommender,
    PopularityRecommender
)


def action_to_indices(action):
    """
    Convert the action vector to indices of selected items.
    """
    return np.where(action == 1)[0].tolist()

def snake_case_to_camel_case(snake_str: str) -> str:
    return''.join(part.capitalize() for part in snake_str.split('_'))

def print_state_info(state):
    """
    Print the state information in a readable format.
    """
    print("User:", state['user'])
    print("Page Count:", state['history']['page_count'])
    print("Click Count:", state['history']['click_count'])
    print("Buy Count:", state['history']['buy_count'])
    print("Last Click Item:", state['history']['last_click_item'])
    print("Consecutive No Click Pages:", state['history']['consecutive_no_click_pages'])
    

config = Config()
user_params = config.get("user_params")
username_to_user = {
    user: globals()[snake_case_to_camel_case(user) + 'User'](user, **params)
    for user, params in user_params.items()
}
name_to_recommender = {
    "random": RandomRecommender(),
    "popularity": PopularityRecommender()
}

items = pd.read_csv("../src/data/catalog.csv")

In [52]:
user = username_to_user["familiarity_seeker"]
# user = username_to_user["cheap_seeker"]
recommender = name_to_recommender["random"]
# recommender = name_to_recommender["popularity"]
env = ShopEnv(items)

In [53]:
state = env.reset(user)  # Initial state
done = False
action = None

In [57]:
while not done:
    candidates = state['candidates']
    action = recommender.recommend(state)
    state, reward, done, info = env.step(action, user)
    recommended_items = candidates.iloc[action_to_indices(action)]
    utility = user.utility(recommended_items)
    combined_df = pd.concat([recommended_items[['product_id', 'popularity', 'price']], utility], axis=1)
    # value = recommended_items['quality_score'] / recommended_items['price']
    # combined_df = pd.concat([recommended_items[['product_id', 'quality_score', 'price']], value, utility], axis=1)
    print(f"Action indices: {action_to_indices(action)}, Reward: {reward}, Done: {done}")
    print(f"Info: \nCTR, BTR:{info['click_through_rate'], info['buy_through_rate']}")
    print_state_info(state)  # Print the state information
    break

Action indices: [38, 51, 53, 89, 169, 174, 193, 194, 201, 202], Reward: 0.30000000000000004, Done: False
Info: 
CTR, BTR:(np.float64(0.1), np.float64(0.1))
User: familiarity_seeker
Page Count: 4
Click Count: 2
Buy Count: 1
Last Click Item: 222
Consecutive No Click Pages: 0


In [20]:
recommended_items

Unnamed: 0,product_id,name,category,subcategory,price,quality_score,brand,color,popularity,release_date,description
108,109,Novel HP##,Books,Novel,19.2,0.8,BrandK,Green,82,2024-11-16,Note recognize other answer development term i...
115,116,Board Game JS##,Toys,Board Game,41.02,0.878,BrandJ,Black,91,2025-03-02,Sell type bit activity style save address serv...
122,123,Yoga Mat TW##,Sports,Yoga Mat,6.67,0.021,BrandH,Black,27,2024-09-21,Economic money pattern wear check position edg...
155,156,Cream YG##,Beauty,Cream,124.49,0.246,BrandO,Black,40,2023-12-04,Special building country both rest ok together...
161,162,Cookbook ZY##,Books,Cookbook,20.98,0.886,BrandF,Yellow,74,2023-09-18,Big finish within particularly suddenly yard f...
163,164,Lipstick GZ##,Beauty,Lipstick,27.34,0.337,BrandB,Yellow,57,2024-01-31,Because true technology power plant recent lit...
173,174,Doll YN##,Toys,Doll,17.41,0.569,BrandE,Red,71,2023-10-10,Throughout positive if turn necessary success ...
179,180,Smartphone KV##,Electronics,Smartphone,10.67,0.627,BrandB,Red,65,2025-02-01,Focus charge picture watch trial only break ac...
194,195,Puzzle VQ##,Toys,Puzzle,11.92,0.114,BrandN,Black,24,2025-01-11,Reduce statement set imagine bag become east r...
221,222,Smartphone PL##,Electronics,Smartphone,196.45,0.992,BrandE,Black,98,2024-11-26,Letter within along former plant each poor TV ...


In [28]:
combined_df

Unnamed: 0,product_id,popularity,price,score
26,27,107,28.01,0.083105
36,37,101,11.84,0.227359
111,112,102,61.01,0.398913
135,136,108,34.78,0.220124
153,154,107,98.41,0.0
170,171,102,48.01,0.263123
171,172,106,51.13,0.085424
192,193,103,64.72,0.059802
200,201,110,55.52,0.04329
205,206,107,128.88,0.396652


In [25]:
utility.reindex(recommended_items.index)

3      0.582627
29          NaN
50          NaN
82          NaN
102         NaN
120         NaN
131         NaN
149         NaN
160         NaN
205         NaN
dtype: float64