<a href="https://colab.research.google.com/github/aslestia/ACS_2025/blob/main/Week7_CVaR_Pricing_Insurance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Week 7 Practice 2 — CVaR-based Pricing for Insurance Claims

Tujuan:
- Mengestimasi **VaR** dan **CVaR** pada data klaim.
- Membandingkan **premi berbasis Expected Loss** vs **premi berbasis CVaR**.
- Opsional: pemodelan ekor sederhana (POT) jika dibutuhkan.

> Jika Anda memiliki file klaim asli (mis. Danish Fire), ganti langkah simulasi dengan `pd.read_csv(...)`.


In [None]:

import numpy as np, pandas as pd, math
import matplotlib.pyplot as plt

np.random.seed(123)

# ====== Helper: empirical VaR & CVaR for losses ======
def var_cvar_losses(losses: np.ndarray, alpha: float=0.95):
    x = np.sort(losses)  # ascending
    q_idx = int(math.floor(alpha * (len(x)-1)))
    var = x[q_idx]
    tail = x[q_idx:]
    cvar = tail.mean() if len(tail) else var
    return var, cvar

# ====== Load or simulate data ======
USE_SIM = True  # set False and load your CSV

if USE_SIM:
    # Simulasi heavy-tailed: campuran Lognormal + Pareto untuk menyerupai data kebakaran
    n = 5000
    mix = np.random.binomial(1, 0.85, size=n)  # 85% lognormal, 15% Pareto (ekor berat)
    logn = np.random.lognormal(mean=8.5, sigma=0.9, size=n)  # skala bebas
    # Pareto(scale xm, shape alpha)
    xm, alpha_shape = 5e3, 1.5
    pareto = xm * (1/np.random.random(size=n))**(1/alpha_shape)
    losses = mix*logn + (1-mix)*pareto
    df = pd.DataFrame({"loss": losses})
else:
    import pandas as pd
    df = pd.read_csv("claims.csv")  # ganti nama file
    df = df.rename(columns={df.columns[0]:"loss"})
    losses = df["loss"].values

df.describe()



## Estimasi VaR & CVaR dan Perbandingan Premi


In [None]:
import numpy as np, pandas as pd, math

# ====== Helper: empirical VaR & CVaR for losses ======
def var_cvar_losses(losses: np.ndarray, alpha: float=0.95):
    x = np.sort(losses)  # ascending
    q_idx = int(math.floor(alpha * (len(x)-1)))
    var = x[q_idx]
    tail = x[q_idx:]
    cvar = tail.mean() if len(tail) else var
    return var, cvar

# ====== Load or simulate data ======
USE_SIM = True  # set False and load your CSV

if USE_SIM:
    # Simulasi heavy-tailed: campuran Lognormal + Pareto untuk menyerupai data kebakaran
    n = 5000
    mix = np.random.binomial(1, 0.85, size=n)  # 85% lognormal, 15% Pareto (ekor berat)
    logn = np.random.lognormal(mean=8.5, sigma=0.9, size=n)  # skala bebas
    # Pareto(scale xm, shape alpha)
    xm, alpha_shape = 5e3, 1.5
    pareto = xm * (1/np.random.random(size=n))**(1/alpha_shape)
    losses = mix*logn + (1-mix)*pareto
    df = pd.DataFrame({"loss": losses})
else:
    import pandas as pd
    df = pd.read_csv("claims.csv")  # ganti nama file
    df = df.rename(columns={df.columns[0]:"loss"})
    losses = df["loss"].values

alpha_list = [0.95, 0.99]
mean_loss = losses.mean()
prem_expected = mean_loss  # Expected loss premium (belum termasuk loading)

rows = []
for a in alpha_list:
    var, cvar = var_cvar_losses(losses, alpha=a)
    rows.append({"alpha": a, "VaR": var, "CVaR": cvar})

summary = pd.DataFrame(rows)
summary["Expected_Loss"] = mean_loss

In [None]:
import numpy as np, pandas as pd, math

# ====== Helper: empirical VaR & CVaR for losses ======
def var_cvar_losses(losses: np.ndarray, alpha: float=0.95):
    x = np.sort(losses)  # ascending
    q_idx = int(math.floor(alpha * (len(x)-1)))
    var = x[q_idx]
    tail = x[q_idx:]
    cvar = tail.mean() if len(tail) else var
    return var, cvar

# ====== Load or simulate data ======
USE_SIM = True  # set False and load your CSV

if USE_SIM:
    # Simulasi heavy-tailed: campuran Lognormal + Pareto untuk menyerupai data kebakaran
    n = 5000
    mix = np.random.binomial(1, 0.85, size=n)  # 85% lognormal, 15% Pareto (ekor berat)
    logn = np.random.lognormal(mean=8.5, sigma=0.9, size=n)  # skala bebas
    # Pareto(scale xm, shape alpha)
    xm, alpha_shape = 5e3, 1.5
    pareto = xm * (1/np.random.random(size=n))**(1/alpha_shape)
    losses = mix*logn + (1-mix)*pareto
    df = pd.DataFrame({"loss": losses})
else:
    import pandas as pd
    df = pd.read_csv("claims.csv")  # ganti nama file
    df = df.rename(columns={df.columns[0]:"loss"})
    losses = df["loss"].values

alpha_list = [0.95, 0.99]
mean_loss = losses.mean()
prem_expected = mean_loss  # Expected loss premium (belum termasuk loading)

rows = []
for a in alpha_list:
    var, cvar = var_cvar_losses(losses, alpha=a)
    rows.append({"alpha": a, "VaR": var, "CVaR": cvar})

summary = pd.DataFrame(rows)
summary["Expected_Loss"] = mean_loss

print("Ringkasan Premi & Risiko:\n")
from IPython.display import display
display(summary.style.format({
    "VaR": "{:,.2f}".format,
    "CVaR": "{:,.2f}".format,
    "Expected_Loss": "{:,.2f}".format
}))

In [None]:
import numpy as np, pandas as pd, math
import matplotlib.pyplot as plt

# ====== Helper: empirical VaR & CVaR for losses ======
def var_cvar_losses(losses: np.ndarray, alpha: float=0.95):
    x = np.sort(losses)  # ascending
    q_idx = int(math.floor(alpha * (len(x)-1)))
    var = x[q_idx]
    tail = x[q_idx:]
    cvar = tail.mean() if len(tail) else var
    return var, cvar

# ====== Load or simulate data ======
USE_SIM = True  # set False and load your CSV

if USE_SIM:
    # Simulasi heavy-tailed: campuran Lognormal + Pareto untuk menyerupai data kebakaran
    n = 5000
    mix = np.random.binomial(1, 0.85, size=n)  # 85% lognormal, 15% Pareto (ekor berat)
    logn = np.random.lognormal(mean=8.5, sigma=0.9, size=n)  # skala bebas
    # Pareto(scale xm, shape alpha)
    xm, alpha_shape = 5e3, 1.5
    pareto = xm * (1/np.random.random(size=n))**(1/alpha_shape)
    losses = mix*logn + (1-mix)*pareto
    df = pd.DataFrame({"loss": losses})
else:
    import pandas as pd
    df = pd.read_csv("claims.csv")  # ganti nama file
    df = df.rename(columns={df.columns[0]:"loss"})
    losses = df["loss"].values


# Plot histogram losses + garis VaR/CVaR (alpha=0.95)
alpha = 0.95
var, cvar = var_cvar_losses(losses, alpha=alpha)

plt.figure(figsize=(7.5,4))
plt.hist(losses, bins=100)
plt.axvline(var, linestyle="--", label=f"VaR {alpha:.2f}")
plt.axvline(cvar, linestyle="-.", label=f"CVaR {alpha:.2f}")
plt.title("Histogram Losses with VaR and CVaR")
plt.xlabel("Loss amount")
plt.ylabel("Count")
plt.legend()
plt.yscale("log")  # ekor berat terlihat lebih jelas
plt.show()