# Week 8 Unified Practicum: Pricing (Loss) + CliffWalking (Returns)
- **Pricing**: Expected, VaR, CVaR, EVaR on light vs heavy tail losses.
- **RL Mini-Project (CliffWalking)**: REINFORCE vs EVaR-PG, metrics = mean return, VaR, CVaR, fall frequency.
- α-levels used: **0.95** and **0.99**.
- Exports: CSV tables for pricing and RL metrics.

> Notes: Matplotlib only, single-plot per figure, default colors.

## Utilities: Risk Measures

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
rng = np.random.default_rng(123)

# VaR & CVaR for arrays x. For returns (higher is better), the lower-tail (worse) is <= VaR.
def var_alpha(x, alpha=0.95):
    x = np.sort(np.asarray(x))
    idx = max(0, min(int(np.floor(alpha*len(x))) - 1, len(x)-1))
    return float(x[idx])

def cvar_alpha_returns(returns, alpha=0.95):
    v = var_alpha(returns, alpha)
    tail = np.asarray(returns)[np.asarray(returns) <= v]
    if len(tail)==0: return v
    return float(tail.mean())

def cvar_alpha_losses(losses, alpha=0.95):
    v = np.quantile(losses, alpha)
    tail = np.asarray(losses)[np.asarray(losses) >= v]
    if len(tail)==0: return float(v)
    return float(tail.mean())

def evar_alpha_losses(losses, alpha=0.95, lambdas=None):
    x = np.asarray(losses, dtype=float)
    if lambdas is None:
        lambdas = np.logspace(-3, 2, 200)
    best = np.inf
    for lam in lambdas:
        y = lam * x
        m = np.max(y)
        log_mgf = m + np.log(np.mean(np.exp(y - m)))
        val = (log_mgf - np.log(1.0 - alpha)) / lam
        if val < best: best = val
    return float(best)

## Part A — Claim Pricing (Loss Domain)

In [None]:
# Simulate light- vs heavy-tail losses
N = 50000
mu_lt, sigma_lt = 8.5, 0.5
light = rng.lognormal(mu_lt, sigma_lt, size=N)

mu_ht, sigma_ht = 8.5, 0.8
ln = rng.lognormal(mu_ht, sigma_ht, size=N)
xm, shape = 5e4, 1.5
u = rng.uniform(size=N)
pareto = xm * (1 - u) ** (-1.0/shape)
mask = rng.uniform(size=N) < 0.2
heavy = np.where(mask, pareto, ln)

alphas = [0.95, 0.99]

def pricing_table_wide(losses, name, alphas=(0.95, 0.99)):
    rec = {"Dataset": name, "Expected": float(np.mean(losses))}
    for a in alphas:
        v = float(np.quantile(losses, a))
        rec[f"VaR@{a}"]  = v
        rec[f"CVaR@{a}"] = cvar_alpha_losses(losses, a)
        rec[f"EVaR@{a}"] = evar_alpha_losses(losses, a)
    return pd.DataFrame([rec])

pricing_df = pd.concat([
    pricing_table_wide(light, "Light-tail"),
    pricing_table_wide(heavy, "Heavy-tail")
], ignore_index=True)
pricing_df


In [None]:
# Visuals: histogram with VaR/CVaR/EVaR for alpha=0.95 (single plot per dataset)
a = 0.95
for losses, title in [(light, "Light-tail"), (heavy, "Heavy-tail")]:
    plt.figure()
    plt.hist(losses, bins=100)
    v = np.quantile(losses, a)
    c = cvar_alpha_losses(losses, a)
    e = evar_alpha_losses(losses, a)      # <— tambahkan perhitungan EVaR

    plt.axvline(v, linestyle='--', linewidth=2, label=f"VaR@{a}")
    plt.axvline(c, linestyle=':',  linewidth=2, label=f"CVaR@{a}")
    plt.axvline(e, linestyle='-.', linewidth=2, label=f"EVaR@{a}")  # <— gambar EVaR

    plt.xlim(0, max(v, c, e) * 1.05)

    plt.title(f"{title} — Histogram Loss with VaR/CVaR/EVaR")
    plt.xlabel("Loss"); plt.ylabel("Frequency"); plt.legend()
    plt.show()


In [None]:
# Show pricing table nicely (no CSV)
from IPython.display import display

# Define a dictionary to specify formatting for numeric columns
numeric_cols = pricing_df.select_dtypes(include=np.number).columns.tolist()
format_dict = {col: "{:,.0f}" for col in numeric_cols}

display(
    pricing_df
      .copy()
      .sort_values(["Dataset"])  # sesuaikan bila ada kolom 'alpha'
      .style.format(format_dict, na_rep="–")  # ribuan, tanpa desimal
           #.hide_index() # Removed unsupported method
)

# from IPython.display import display # Commented out undefined DataFrame usage

# wide = (pricing_df_long # Commented out undefined DataFrame usage
#         .pivot_table(index="Dataset", columns="alpha", # Commented out undefined DataFrame usage
#                      values=["VaR","CVaR","EVaR"], aggfunc="first")) # Commented out undefined DataFrame usage
# # urut kolom agar rapi # Commented out undefined DataFrame usage
# wide = wide.reindex(sorted(wide.columns, key=lambda x: (x[0], x[1])), axis=1) # Commented out undefined DataFrame usage

# display(wide.style.format("{:,.0f}", na_rep="–")) # Commented out undefined DataFrame usage

## Part B — RL Mini-Project: CliffWalking (Return Domain)

In [None]:
class CliffWalking:
    def __init__(self, H=4, W=12):
        self.H, self.W = H, W
        self.start = (H-1, 0); self.goal = (H-1, W-1)
        self.cliff = {(H-1, j) for j in range(1, W-1)}
        self.reset()
    def reset(self):
        self.s = self.start
        return self.s
    def step(self, a):
        i,j = self.s
        di = [-1,0,1,0][a]; dj = [0,1,0,-1][a]
        ni = max(0, min(self.H-1, i+di)); nj = max(0, min(self.W-1, j+dj))
        ns = (ni, nj); r = -1.0; done = False; fell = False
        if ns in self.cliff:
            r = -100.0; fell = True; ns = self.start    # continue episode
        elif ns == self.goal:
            done = True
        self.s = ns
        return ns, r, done, {"fell": fell}


env = CliffWalking(); nS=env.H*env.W; nA=4

def s2i(s): return s[0]*env.W + s[1]
def softmax(z):
    z = z - np.max(z); ez = np.exp(z); return ez/np.sum(ez)

theta0 = np.zeros((nS,nA))

def sample_episode(theta, max_steps=500):
    s = env.reset(); traj=[]
    for t in range(max_steps):
        p = softmax(theta[s2i(s)]); a = np.random.choice(nA, p=p)
        ns, r, done, info = env.step(a)
        traj.append((s,a,r,info["fell"]))
        s = ns
        if done: break
    return traj

def returns(traj, gamma=1.0):
    G=0.0; out=[]
    for s,a,r,f in reversed(traj):
        G = r + gamma*G; out.append(G)
    return list(reversed(out))

In [None]:
# Train: REINFORCE vs EVaR-PG (fixed λ). Evaluate for α=0.95 and α=0.99
alpha_list = [0.95, 0.99]
lam = 0.05
lr = 0.05

def train(method="risk_neutral", episodes=1500, lr=0.01, lam=0.01):
    th = np.zeros_like(theta0); rets = []
    for ep in range(episodes):
        traj = sample_episode(th)
        Gs = returns(traj)
        rets.append(sum(r for *_, r, _ in traj))
        b = np.mean(Gs)  # baseline
        for t,(s,a,r,f) in enumerate(traj):
            G = Gs[t]; p = softmax(th[s2i(s)])
            one = np.zeros(nA); one[a] = 1
            if method == "risk_neutral":
                w = (G - b)
            else:  # EVaR-PG (λ fixed, stabilised)
                w = -np.exp(-lam*G) + np.exp(-lam*b)
            th[s2i(s)] += lr * (one - p) * w
    return th, np.array(rets)


th_rn, mets_rn = train("risk_neutral")
th_ev, mets_ev = train("evar")

plt.figure(); plt.plot(mets_rn, label="REINFORCE"); plt.plot(mets_ev, label="EVaR-PG (λ fixed)")
plt.title("Episode Returns during Training (CliffWalking)"); plt.xlabel("Episode"); plt.ylabel("Return"); plt.legend(); plt.show()

def evaluate(theta, nroll=3000, alpha=0.95):
    rets=[]; falls=0
    for _ in range(nroll):
        tr = sample_episode(theta)
        rets.append(sum(r for *_, r, _ in tr))
        if any(f for *_, f in tr): falls+=1
    rets = np.array(rets)
    return {
        "mean_return": float(np.mean(rets)),
        f"VaR@{alpha}": var_alpha(rets, alpha),
        f"CVaR@{alpha}": cvar_alpha_returns(rets, alpha),
        "fall_freq": falls/nroll
    }

# Build evaluation table for both alphas
rows = []
for a in alpha_list:
    ev_rn = evaluate(th_rn, alpha=a)
    ev_ev = evaluate(th_ev, alpha=a)
    rows.append({"Policy":"Risk-neutral", **ev_rn, "alpha": a})
    rows.append({"Policy":"EVaR-PG", **ev_ev, "alpha": a})

rl_metrics_df = pd.DataFrame(rows)
rl_metrics_df

In [None]:
# Show RL metrics nicely in the notebook
from IPython.display import display

# (opsional) urut & pembulatan agar rapi
view_cols = [c for c in rl_metrics_df.columns]  # atau pilih kolom spesifik
out_df = (rl_metrics_df
          .sort_values(["Policy","alpha"])
          .copy())

display(out_df.style.format(precision=3))

# Bar chart per-metrik: mean_return, VaR, CVaR, fall_rate (sesuaikan nama kolom Anda)
import matplotlib.pyplot as plt
import pandas as pd
num_cols = [c for c in rl_metrics_df.columns
            if c not in ["Policy","alpha"] and pd.api.types.is_numeric_dtype(rl_metrics_df[c])]

# Sumbu-x: kombinasi agent–alpha
x_labels = rl_metrics_df[["Policy","alpha"]].astype(str).agg(" | ".join, axis=1)

for metric in num_cols:
    plt.figure()
    plt.title(f"{metric} per policy (agent | alpha)")
    plt.xticks(rotation=45, ha="right")
    plt.bar(x_labels, rl_metrics_df[metric])   # 1 plot/figur, tidak set warna
    plt.ylabel(metric)
    plt.tight_layout()
    plt.show()