In [3]:
import pandas as pd
import numpy as np

events_path = '../../data/events.csv'
events = pd.read_csv(events_path)

item_props1 = pd.read_csv('../../data/item_properties_part1.csv')
item_props2 = pd.read_csv('../../data/item_properties_part2.csv')
item_properties = pd.concat([item_props1, item_props2], ignore_index=True)
category_tree = pd.read_csv('../../data/category_tree.csv')

In [None]:
events = events.sort_values(['visitorid', 'timestamp'])

# Assign rewards: view -> 0.1, addtocart -> 0.5, transaction -> 1.0
reward_map = {'view': 0.1, 'addtocart': 0.5, 'transaction': 1.0}
events['reward'] = events['event'].map(reward_map)

# Select top 50 items as arms
top_items = events['itemid'].value_counts().head(50).index.tolist()
events_filtered = events[events['itemid'].isin(top_items)].copy()

# Map itemid to arm index
arm_map = {item: idx for idx, item in enumerate(top_items)}
events_filtered['arm'] = events_filtered['itemid'].map(arm_map)
events_filtered = events_filtered.sort_values(['visitorid', 'timestamp'])

# Group by visitorid and compute user state (history of interactions)
grouped = events_filtered.groupby('visitorid')
data_list = []

for visitor, group in grouped:
    past_views = 0
    past_addtocart = 0
    past_transactions = 0
    for _, row in group.iterrows():
        # User state before this event: [past_views, past_addtocart, past_transactions]
        user_state = np.array([past_views, past_addtocart, past_transactions], dtype=float)
        arm = row['arm']
        reward = row['reward']
        # Context: same user state repeated for each arm (n_arms=50, n_dims=3)
        contexts = np.tile(user_state, (50, 1))
        row_data = np.concatenate([[arm, reward], contexts.flatten()])
        data_list.append(row_data)
        # Update past counts
        if row['event'] == 'view':
            past_views += 1
        elif row['event'] == 'addtocart':
            past_addtocart += 1
        elif row['event'] == 'transaction':
            past_transactions += 1

data_array = np.array(data_list)

# Save to dataset.txt
np.savetxt('../../data/dataset.txt', data_array)
print("Dataset saved to ../../data/dataset.txt")

FileNotFoundError: [Errno 2] No such file or directory: 'data/dataset.txt'