# Lavka Recommender System Example Usage

This notebook demonstrates how to use the refactored recommender system with the unified experiment interface.

In [None]:
import os
import polars as pl
import matplotlib.pyplot as plt

from lavka_recsys.config import Config
from lavka_recsys.experiment import Experiment, ExperimentType

## 1. Load Configuration

You can either load configuration from a YAML file or create it programmatically.

In [None]:
# Load from file if it exists
if os.path.exists('default_config.yaml'):
    config = Config.from_file('default_config.yaml')
else:
    # Create configuration programmatically
    config = Config({
        "experiment": {
            "type": "standard",
            "use_feature_selection": False,
            "use_hyperparameter_tuning": False,
            "evaluation": {
                "perform_kaggle_simulation": True,
                "create_submission": True
            }
        },
        "model": {
            "type": "catboost",
            "config": {
                "catboost": {
                    "iterations": 300,
                    "learning_rate": 0.1,
                    "depth": 6,
                    "l2_leaf_reg": 3.0
                }
            }
        },
        "features": [
            "count_purchase_user_product",
            "count_purchase_user_store",
            "ctr_product",
            "recency_user_product",
            "user_stats",
            "product_stats",
            "store_stats"
        ],
        "target": "CartUpdate_Purchase_vs_View",
        "data": {
            "train_path": "data/train.parquet",
            "test_path": "data/test.parquet",
            "sample_size": 50000  # Use a smaller dataset for faster execution
        },
        "training": {
            "split_type": "sliding_window",
            "target_days": 7,
            "step_days": 7,
            "max_splits": 5,
            "validation_days": 7
        }
    })

## 2. Create and Setup Experiment

Create an experiment and set it up by loading the data and initializing components.

In [None]:
# Create experiment
experiment = Experiment("example_experiment", config)

# Setup experiment (load data, initialize components)
experiment.setup()

## 3. Run Standard Experiment

Run a standard experiment with a single train/validation split.

In [None]:
# Set experiment type to STANDARD
experiment.experiment_type = ExperimentType.STANDARD

# Run experiment
results = experiment.run()

# Print metrics
print("Standard Experiment Metrics:")
for metric, value in results['metrics'].items():
    print(f"  {metric}: {value:.4f}")

# Print top features
print("\nTop 5 Important Features:")
top_features = sorted(results['feature_importance'].items(), key=lambda x: x[1], reverse=True)[:5]
for feature, importance in top_features:
    print(f"  {feature}: {importance:.6f}")

## 4. Full History Experiment

Run a full history experiment with time-aware training.

In [None]:
# Set experiment type to FULL_HISTORY
experiment.experiment_type = ExperimentType.FULL_HISTORY

# Run experiment (this will take longer)
results = experiment.run()

# Print metrics
print("Full History Experiment Metrics:")
for metric, value in results['metrics'].items():
    print(f"  {metric}: {value:.4f}")

# Print top features
print("\nTop 5 Important Features:")
top_features = sorted(results['feature_importance'].items(), key=lambda x: x[1], reverse=True)[:5]
for feature, importance in top_features:
    print(f"  {feature}: {importance:.6f}")

## 5. Kaggle Evaluation and Submission

Evaluate the model on simulated Kaggle test set and create a submission.

In [None]:
# Evaluate model and create submission
evaluation = experiment.evaluate()

# Print Kaggle simulation metrics
if 'kaggle_simulation' in evaluation:
    print("Kaggle Simulation Metrics:")
    for metric, value in evaluation['kaggle_simulation'].items():
        print(f"  {metric}: {value:.4f}")

## 6. Hyperparameter Tuning Example

Run an experiment with hyperparameter tuning.

In [None]:
# Update configuration for tuning
config.set('experiment.type', 'tuning')
config.set('experiment.use_hyperparameter_tuning', True)
config.set('hyperparameter_tuning.n_trials', 5)  # Low number for demonstration

# Create and setup new experiment
tuning_experiment = Experiment("tuning_example", config)
tuning_experiment.setup()

# Run tuning (this will take longer)
tuning_results = tuning_experiment.run()

# Print results
print("Tuning Experiment Metrics:")
for metric, value in tuning_results['metrics'].items():
    print(f"  {metric}: {value:.4f}")

print("\nBest Parameters:")
for param, value in tuning_results['best_params'].items():
    print(f"  {param}: {value}")

## 7. Time Splitting Strategies

Compare different time splitting strategies.

In [None]:
# Different time splitting configurations
split_configs = {
    'sliding_window': {
        'split_type': 'sliding_window',
        'target_days': 7,
        'step_days': 7,
        'max_splits': 3
    },
    'fixed_window': {
        'split_type': 'fixed_window',
        'history_days': 30,
        'target_days': 7,
        'step_days': 7,
        'max_splits': 3
    },
    'expanding_window': {
        'split_type': 'expanding_window',
        'target_days': 7,
        'step_days': 7,
        'max_splits': 3
    }
}

# Set up for comparison
metrics = {}

for split_name, split_config in split_configs.items():
    print(f"\nRunning experiment with {split_name} split")
    
    # Update configuration
    for key, value in split_config.items():
        config.set(f'training.{key}', value)
    
    # Create and setup experiment
    config.set('experiment.type', 'full_history')
    split_experiment = Experiment(f"{split_name}_split", config)
    split_experiment.setup()
    
    # Run experiment
    split_results = split_experiment.run()
    
    # Store metrics
    metrics[split_name] = split_results['metrics']
    
    # Print metrics
    print(f"{split_name.capitalize()} Split Metrics:")
    for metric, value in split_results['metrics'].items():
        print(f"  {metric}: {value:.4f}")

## 8. Visualize Comparison

Compare metrics across different strategies.

In [None]:
# Create bar chart for AUC comparison
plt.figure(figsize=(10, 6))
auc_values = [metrics[split].get('auc', 0) for split in split_configs.keys()]
ndcg_values = [metrics[split].get('ndcg@10', 0) for split in split_configs.keys()]

x = range(len(split_configs))
width = 0.35

plt.bar(x, auc_values, width, label='AUC')
plt.bar([i + width for i in x], ndcg_values, width, label='nDCG@10')

plt.xlabel('Time Splitting Strategy')
plt.ylabel('Metric Value')
plt.title('Performance Comparison of Time Splitting Strategies')
plt.xticks([i + width/2 for i in x], list(split_configs.keys()))
plt.legend()
plt.tight_layout()
plt.show()