In [2]:
# # RL4RS Exploratory Data Analysis

# This notebook performs exploratory data analysis on the RL4RS datasets.

# ## Environment Setup
# Make sure the `rl4rs` conda environment is activated and selected as the kernel for this notebook.

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("✓ Libraries imported successfully!")
print(f"Pandas version: {pd.__version__}")
print(f"NumPy version: {np.__version__}")

✓ Libraries imported successfully!
Pandas version: 2.0.3
NumPy version: 1.24.3


In [5]:
ls

EDA.ipynb              [34massets[m[m/                [34mrl4rs[m[m/
ENVIRONMENT_SETUP.md   [34mdataset[m[m/               [34mscript[m[m/
LICENSE                environment.yml        test_environment.py
README.md              environment_macos.yml  tutorial.ipynb
RL4RS_appendix.pdf     index.html
activate_rl4rs.sh      [34mreproductions[m[m/


## Load Item Information Dataset

Let's start by loading the item information dataset which contains metadata about the items.

In [6]:
pwd

'/Users/armandoordoricadelatorre/Documents/U of T/PhD/PhD Research/RL4RS/RL4RS'

In [8]:
ls

EDA.ipynb              [34massets[m[m/                [34mrl4rs[m[m/
ENVIRONMENT_SETUP.md   [34mdataset[m[m/               [34mscript[m[m/
LICENSE                environment.yml        test_environment.py
README.md              environment_macos.yml  tutorial.ipynb
RL4RS_appendix.pdf     index.html
activate_rl4rs.sh      [34mreproductions[m[m/


In [13]:
# Load item information
# Note: The file uses space separator, and item_vec is a comma-separated string
item_info_path = "dataset/item_info.csv"
item_info = pd.read_csv(item_info_path, sep=' ')

print(f"Dataset shape: {item_info.shape}")
print(f"Number of items: {len(item_info)}")
print(f"\nColumn names: {list(item_info.columns)}")
print(f"\nData types:")
print(item_info.dtypes)
print(f"\nFirst few rows:")
display(item_info.head())

# Show a sample of the item_vec to understand its format
print(f"\nSample item_vec (first item):")
print(item_info['item_vec'].iloc[0])

Dataset shape: (283, 5)
Number of items: 283

Column names: ['item_id', 'item_vec', 'price', 'location', 'special_item']

Data types:
item_id           int64
item_vec         object
price           float64
location          int64
special_item      int64
dtype: object

First few rows:


Unnamed: 0,item_id,item_vec,price,location,special_item
0,1,"-0.2137,-0.0489,-0.3633,-0.1349,1.8061,0.4482,...",7.0,1,0
1,2,"-0.2137,-0.7579,-0.3633,-0.1377,1.5852,-0.9129...",13.1,1,0
2,3,"-0.2137,2.0783,-0.3633,-0.142,1.372,-0.6294,-1...",14.6,1,0
3,4,"-0.2137,-0.7579,-0.3633,-0.1423,1.7014,-0.2719...",13.4,1,0
4,5,"-0.2137,-0.7579,-0.3633,-0.1352,1.3293,-0.7099...",12.8,1,0



Sample item_vec (first item):
-0.2137,-0.0489,-0.3633,-0.1349,1.8061,0.4482,-1.8973,-1.7259,0.4035,-1.5086,-1.5189,-1.4673,-1.2481,-1.2081,-0.2745,-0.2303,-1.622,0.3304,0.4923,-1.2716,-0.5764,1.4189,1.1158,0.4944,0.0253,-0.8038,1.5981,-1.0421,1.2316,0.7456,0.3724,-0.402,1.1648,-0.1391,1.1556,-0.6094,-0.8756,-0.0815,1.2836,1.2644
