In [1]:
import boto3
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split

In [2]:
# 1. Load Data from S3
s3 = boto3.client('s3')
obj = s3.get_object(Bucket='ltr-models-frp', Key='data/ltr_training_data.csv')
df = pd.read_csv(obj['Body'])

In [9]:
df.sample(n=10)

Unnamed: 0,query_group_id,product_id,category,retail_price,cost,label
3718,22763,4396,Jeans,120.0,63.48,0
3413,21139,21069,Jeans,73.989998,41.582379,0
1576,15120,14418,Maternity,28.0,12.908,1
9648,95356,19057,Sweaters,53.549999,28.64925,0
4196,30171,13461,Swim,56.0,21.56,0
3232,20553,12998,Swim,16.99,6.52416,1
3058,19894,5086,Jeans,87.949997,46.965298,0
8113,76167,27446,Swim,25.0,15.2,0
351,10835,14197,Accessories,5.55,2.1312,1
6127,5227,99,Tops & Tees,96.0,54.912,1


In [None]:
# list the unique values for category
print(df['category'].unique())

['Intimates' 'Swim' 'Blazers & Jackets' 'Socks & Hosiery' 'Accessories'
 'Pants & Capris' 'Skirts' 'Dresses' 'Plus' 'Maternity'
 'Outerwear & Coats' 'Shorts' 'Leggings' 'Sweaters' 'Jeans' 'Tops & Tees'
 'Sleep & Lounge' 'Active' 'Underwear' 'Socks' 'Suits & Sport Coats'
 'Suits' 'Fashion Hoodies & Sweatshirts' 'Clothing Sets'
 'Jumpsuits & Rompers' 'Pants']


In [6]:
# 2. Preprocessing for Ranker
# XGBoost Ranker requires data to be sorted by Group ID
df = df.sort_values(by='query_group_id')

# Feature Engineering (Simplistic for demo)
# In reality: You would use One-Hot Encoding for Category, etc.
X = df[['retail_price', 'cost']] 
y = df['label']

# 3. Create the Groups
# "groups" must be an array where each element is the size of a group.
# e.g., [5, 10, 2] means the first 5 rows are one group, next 10 are another, etc.
groups = df.groupby('query_group_id').size().to_numpy()

In [None]:
# 4. Initialize the Ranker
model = xgb.XGBRanker(  
    objective='rank:pairwise',
    learning_rate=0.1,
    gamma=1.0,
    min_child_weight=0.1,
    n_estimators=100,
    eval_metric='ndcg'  # 
)

In [8]:
# 5. Training
model.fit(
    X, 
    y, 
    group=groups,
    verbose=True
)

print("Success! Model trained with LambdaMART.")

Success! Model trained with LambdaMART.
