# Simple workflow with Dummy Data
## 1. Load Dummy Data

In case you have installed the `sawmil` package, remove the `src` from the code below.

In [None]:
from dataset import make_complex_bags
import numpy as np
rng = np.random.default_rng(0)

ds = make_complex_bags(
    n_pos=100, n_neg=100, inst_per_bag=(5, 15), d=2,
    pos_centers=((+2,+1), (+4,+3)),
    neg_centers=((-1.5,-1.0), (-3.0,+0.5)),
    pos_scales=((2.0, 0.6), (1.2, 0.8)),
    neg_scales=((1.5, 0.5), (2.5, 0.9)),
    pos_intra_rate=(0.25, 0.85),
    ensure_pos_in_every_pos_bag=True,
    neg_pos_noise_rate=(0.00, 0.05),
    pos_neg_noise_rate=(0.00, 0.20),
    outlier_rate=0.1,
    outlier_scale=8.0,
    random_state=42,
)

# Quick sanity:
X_pos, pos_idx = ds.positive_instances()
X_neg, neg_idx = ds.negative_instances()
print("n bags:", len(ds.bags))
print("pos inst:", X_pos.shape, "neg inst:", X_neg.shape)
print("unique bag indices in positives:", np.unique(pos_idx)[:10], "…")
print("unique bag indices in negatives:", np.unique(neg_idx)[:10], "…")


## 2. Fit the model

In [None]:
from src.sawmil.kernels import get_kernel
from src.sawmil.bag_kernels import make_bag_kernel
k = get_kernel("linear", normalizer="average") # base (single-instance kernel)
bag_k  = make_bag_kernel(k, use_intra_labels=False) # convert single-instance kernel to bagged kernel

### 2(A) Fit NSK with the Linear Kernel

In [None]:
from src.sawmil.nsk import NSK

base_k = get_kernel("linear", normalizer="average")
bag_k  = make_bag_kernel(base_k, use_intra_labels=False)

clf = NSK(C=1, bag_kernel=bag_k, scale_C=True, tol=1e-8, verbose=False, solver='osqp').fit(ds, None)
print("Train acc:", clf.score(ds, np.array([b.y for b in ds.bags])))

In [None]:

# clf.predict(ds), clf.decision_function(ds)

### 2(B) Fit NSK with the RBF Kernel

In [None]:
base_k = get_kernel("rbf", gamma=0.8)
bag_k  = make_bag_kernel(base_k, use_intra_labels=False, normalizer="average")
clf = NSK(C=10, bag_kernel=bag_k, scale_C=True, tol=1e-8, verbose=False, solver='osqp').fit(ds, None)
print("Train acc:", clf.score(ds, np.array([b.y for b in ds.bags])))

### 2(C) Combine Kernels

In [None]:
from src.sawmil.kernels import Product, Polynomial, Linear, RBF, Sum, Scale

k = Sum(Linear(), 
        Scale(0.5, 
              Product(Polynomial(degree=2), RBF(gamma=1.0))))
bag_k = make_bag_kernel(
    inst_kernel=k,
    normalizer="none")
clf = NSK(C=100, bag_kernel=bag_k, scale_C=True, tol=1e-8, verbose=False, solver='gurobi').fit(ds, None)
print("Train acc:", clf.score(ds, np.array([b.y for b in ds.bags])))

# 3. Use sMIL

In [None]:
from src.sawmil.smil import sMIL
from src.sawmil.kernels import Linear

In [None]:
bag_k  = make_bag_kernel(Linear(), normalizer="none", use_intra_labels=False)
clf = sMIL(C=10, bag_kernel=bag_k, scale_C=True, tol=1e-6, verbose=False, solver='osqp').fit(ds, None)

In [None]:
print("Train acc:", clf.score(ds, np.array([1 if b.y > 0 else -1 for b in ds.bags])))

## 4. Use sAwMIL

In [None]:
from src.sawmil.sawmil import sAwMIL
from src.sawmil.kernels import get_kernel, Linear
from src.sawmil.bag_kernels import make_bag_kernel

In [None]:
k = get_kernel('linear')
clf = sAwMIL(C=0.1, base_kernel=k,
             solver="gurobi", eta=0.95) # here eta is high, since all items in the bag are relevant
clf.fit(ds)

In [None]:
print("Train acc:", clf.score(ds, np.array([1 if b.y > 0 else -1 for b in ds.bags])))

In [None]:
clf.predict(ds)