In [1]:
!pip install git+https://github.com/SAP-samples/sap-rpt-1-oss

Collecting git+https://github.com/SAP-samples/sap-rpt-1-oss
  Cloning https://github.com/SAP-samples/sap-rpt-1-oss to /tmp/pip-req-build-zr0xoav0
  Running command git clone --filter=blob:none --quiet https://github.com/SAP-samples/sap-rpt-1-oss /tmp/pip-req-build-zr0xoav0
  Resolved https://github.com/SAP-samples/sap-rpt-1-oss to commit a323a0aff976fda4ac43c3196a92406de7689aaa
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torcheval>=0.0.7 (from sap_rpt_oss==1.0.1)
  Downloading torcheval-0.0.7-py3-none-any.whl.metadata (8.6 kB)
Collecting pandas>=2.2.3 (from sap_rpt_oss==1.0.1)
  Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyarrow>=20.0.0 (from sap_rpt_oss==1.0.1)
  Downloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading pandas-2.3.3-cp3

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
from sap_rpt_oss import SAP_RPT_OSS_Classifier, SAP_RPT_OSS_Regressor
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score
import numpy as np
import pandas as pd

In [None]:
def generate_data(n=2040, chaos=False):
    """Generates data for forecasting where target variables are defined by simple rules"""
    np.random.seed(42)
    df = pd.DataFrame({
        'Region': np.random.choice(['EU', 'NA', 'APAC'], n),
        'Material_Group': np.random.choice(['Electronics', 'Steel', 'Chemicals'], n),
        'Weight_KG': np.random.randint(10, 1000, n),
        'Quantity': np.random.randint(1, 500, n),
        'Service_Level': np.random.choice(['Standard', 'Express'], n)
    })

    df['Expected_Cost'] = (df['Weight_KG'] * 1.5) + (df['Quantity'] * 4)
    df.loc[df['Service_Level'] == 'Express', 'Expected_Cost'] += 300

    conditions = [
        (df['Region'] == 'APAC') & (df['Service_Level'] == 'Express'),
        (df['Weight_KG'] > 800)
    ]
    choices = ['Air_Freight', 'Heavy_Haul', 'Standard_Truck']
    df['Carrier_Type'] = np.select(conditions, choices, default='Standard_Truck')

    if chaos:
        noise_amount = df['Expected_Cost'] * np.random.uniform(-0.15, 0.15, n)
        df['Expected_Cost'] += noise_amount

        mask = np.random.choice([True, False], n, p=[0.1, 0.9])
        random_labels = np.random.choice(choices, mask.sum())
        df.loc[mask, 'Carrier_Type'] = random_labels

    return df

In [5]:
df = generate_data(2040, chaos=True)

df_context = df.iloc[:2000]

df_pred_cost = df.iloc[2000:2020].copy()
df_pred_carrier = df.iloc[2020:2040].copy()

print(f"Context Size: {len(df_context)} rows | Prediction Tasks: 2 (20 rows each)")

Context Size: 2000 rows | Prediction Tasks: 2 (20 rows each)


In [None]:
# Regression
X_ctx = df_context[['Region', 'Material_Group', 'Weight_KG', 'Quantity', 'Service_Level']]
y_ctx = df_context['Expected_Cost']
X_target = df_pred_cost[['Region', 'Material_Group', 'Weight_KG', 'Quantity', 'Service_Level']]
y_true = df_pred_cost['Expected_Cost']

reg = SAP_RPT_OSS_Regressor(max_context_size=4096, bagging=5)
reg.fit(X_ctx, y_ctx)
cost_preds = reg.predict(X_target)

mae = mean_absolute_error(y_true, cost_preds)
r2 = r2_score(y_true, cost_preds)
print(f"Mean Absolute Error: €{mae:.2f}")
print(f"R2 Score: {r2:.4f}")
print(df_pred_cost.assign(Predicted_Cost=cost_preds)[['Expected_Cost', 'Predicted_Cost']].head(3))

2025-11-04_sap-rpt-one-oss.pt:   0%|          | 0.00/64.6M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Mean Absolute Error: €173.56
R2 Score: 0.9320
      Expected_Cost  Predicted_Cost
2000    2790.619194          2692.0
2001    2116.175550          2472.0
2002    1724.987725          1651.0


In [None]:
# Classification
print("\n>>> TASK 2: Predicting 'Carrier_Type' (Classification)...")

y_ctx_class = df_context['Carrier_Type']
X_target_class = df_pred_carrier[['Region', 'Material_Group', 'Weight_KG', 'Quantity', 'Service_Level']]
y_true_class = df_pred_carrier['Carrier_Type']

clf = SAP_RPT_OSS_Classifier(max_context_size=4096, bagging=5)
clf.fit(X_ctx, y_ctx_class)
carrier_preds = clf.predict(X_target_class)

acc = accuracy_score(y_true_class, carrier_preds)
print(f"Results: Accuracy: {acc*100:.1f}%")
print(df_pred_carrier.assign(Predicted_Carrier=carrier_preds)[['Carrier_Type', 'Predicted_Carrier']].head(3))


>>> TASK 2: Predicting 'Carrier_Type' (Classification)...
Results: Accuracy: 90.0%
     Carrier_Type Predicted_Carrier
2020   Heavy_Haul        Heavy_Haul
2021   Heavy_Haul        Heavy_Haul
2022  Air_Freight       Air_Freight
