In [5]:
import numpy as np
import math, time, json, os
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

In [None]:
path = "/path/to/noisy_dataset.npy"
arr = np.load(path)
assert arr.ndim == 3 and arr.shape == (4, 300, 4), f"Expected (4,300,4), got {arr.shape}"
print(arr.shape)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(5, 5))

for i in range(arr.shape[0]):   
    traj = arr[i]  
    
    state1 = traj[:, 0]  
    state2 = traj[:, 1]  
    
    plt.plot(
        state1, state2,
        linewidth=2.5,   
        label=f"Trajectory {i+1}"
    )

# Bold title
plt.title("Clean Trajectories", fontsize=14, fontweight="bold")

# Bold axis labels
plt.xlabel("X axis", fontsize=12, fontweight="bold")
plt.ylabel("Y axis", fontsize=12, fontweight="bold")

# Bold tick labels
plt.xticks(fontsize=10, fontweight="bold")
plt.yticks(fontsize=10, fontweight="bold")

# Fix axis range to [-2, 2]
plt.xlim(-2, 2)
plt.ylim(-2, 2)

# Grid and aspect ratio
plt.grid(True, linewidth=1.2)
plt.axis("equal")

# plt.legend(fontsize=10, frameon=True)

# Save high-resolution image
plt.savefig("noisy_trajectories.png", dpi=600, bbox_inches="tight")
plt.show()


In [8]:
demos = []
for i in range(arr.shape[0]):
    S = arr[i,:, :2].astype(np.float32)
    A = arr[i,:, 2:4].astype(np.float32)
    demos.append({"states": S, "actions_exec": A})



In [9]:
# Concatenate
Xs, Ys, Ids = [], [], []
off = 0
for i, d in enumerate(demos):
    S = d["states"]; A = d["actions_exec"]
    Xs.append(S); Ys.append(A)
    Ids.append(np.full((S.shape[0],), i, np.int64))
X = np.concatenate(Xs, axis=0)
Y = np.concatenate(Ys, axis=0)
demo_ids = np.concatenate(Ids, axis=0)

In [10]:

# Normalize
def normalize_xy(X, Y):
    x_mean, x_std = X.mean(0), X.std(0) + 1e-6
    y_mean, y_std = Y.mean(0), Y.std(0) + 1e-6
    Xn = (X - x_mean) / x_std
    Yn = (Y - y_mean) / y_std
    stats = dict(x_mean=x_mean, x_std=x_std, y_mean=y_mean, y_std=y_std)
    return Xn, Yn, stats

Xn, Yn, stats = normalize_xy(X, Y)

In [11]:
def to_torch(*arrs, device="cpu"):
    out=[]
    for a in arrs:
        if isinstance(a,np.ndarray) and a.dtype==np.int64:
            out.append(torch.tensor(a,dtype=torch.long,device=device))
        else:
            out.append(torch.tensor(a,dtype=torch.float32,device=device))
    return out

device = "cpu"
torch.manual_seed(0)
np.random.seed(0)

In [12]:
class MLP(nn.Module):
    def __init__(self, p, d, width=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(p, width), nn.ReLU(),
            nn.Linear(width, width), nn.ReLU(),
            nn.Linear(width, d),
        )
    def forward(self, x): return self.net(x)

class GaussianMLP(nn.Module):
    def __init__(self, p, d, width=128, min_std=1e-3):
        super().__init__()
        self.min_std = min_std
        self.mu_head = nn.Sequential(
            nn.Linear(p, width), nn.ReLU(),
            nn.Linear(width, width), nn.ReLU(),
            nn.Linear(width, d)
        )
        self.log_std_head = nn.Sequential(
            nn.Linear(p, width), nn.ReLU(),
            nn.Linear(width, width), nn.ReLU(),
            nn.Linear(width, d)
        )
    def forward(self, x):
        mu = self.mu_head(x)
        log_std = self.log_std_head(x)
        std = F.softplus(log_std) + self.min_std
        return D.Independent(D.Normal(loc=mu, scale=std), 1)

class GMMMLP(nn.Module):
    def __init__(self, p, d, M=5, width=128, min_std=1e-3):
        super().__init__()
        self.M = M
        self.d = d
        self.min_std = min_std
        self.net = nn.Sequential(
            nn.Linear(p, width), nn.ReLU(),
            nn.Linear(width, width), nn.ReLU()
        )
        self.pi_head = nn.Linear(width, M)
        self.mu_head = nn.Linear(width, M * d)
        self.log_std_head = nn.Linear(width, M * d)
    
    def forward(self, x):
        h = self.net(x)
        B = x.shape[0]
        pi_logits = self.pi_head(h)
        mu = self.mu_head(h).view(B, self.M, self.d)
        log_std = self.log_std_head(h).view(B, self.M, self.d)
        std = F.softplus(log_std) + self.min_std
        
        comp_dist = D.Independent(D.Normal(loc=mu, scale=std), 1)
        mix_dist = D.Categorical(logits=pi_logits)
        return D.MixtureSameFamily(mix_dist, comp_dist)

class GRUPolicy(nn.Module):
    def __init__(self, p, d, h=128):
        super().__init__()
        self.gru = nn.GRU(p, h, batch_first=True)
        self.head = nn.Linear(h, d)
    def forward(self, x, h0=None):
        o, h = self.gru(x, h0); y = self.head(o); return y, h

class TransformerPolicy(nn.Module):
    def __init__(self, p, d, nhead=2, nlayers=2, dim_feedforward=256, dropout=0.1, context_length=32):
        super().__init__()
        self.p = p
        self.d = d
        self.context_length = context_length
        self.input_proj = nn.Linear(p, dim_feedforward)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=dim_feedforward, nhead=nhead, dim_feedforward=dim_feedforward,
            dropout=dropout, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=nlayers)
        self.output_head = nn.Linear(dim_feedforward, d)
    def forward(self, x):
        h = self.input_proj(x)
        h = self.transformer_encoder(h)
        return self.output_head(h)

class ILEEDGMM(nn.Module):
    def __init__(self, p, d, n_demos, M=5, k=16, width=128, rho_min=0.05,
                 l2_omega=1e-3, l2_embed=1e-4, aux_lambda=1e-2):
        super().__init__()
        self.p, self.d, self.M, self.k = p, d, M, k
        self.width = width
        self.rho_min = rho_min
        self.l2_omega = l2_omega
        self.l2_embed = l2_embed
        self.aux_lambda = aux_lambda
        hid = width
        self.feat = nn.Sequential(nn.Linear(p,hid), nn.ReLU(),
                                  nn.Linear(hid,hid), nn.ReLU())
        self.pi_head   = nn.Linear(hid, M)
        self.mu_head   = nn.Linear(hid, M*d)
        self.logsig_head = nn.Linear(hid, M*d)
        self.embed = nn.Sequential(nn.Linear(p,width), nn.ReLU(), nn.Linear(width,k))
        self.omega = nn.Parameter(torch.zeros(n_demos,k))
        nn.init.normal_(self.omega, std=0.1)
        self.trans = nn.Sequential(
            nn.Linear(k + d, width), nn.ReLU(),
            nn.Linear(width, width), nn.ReLU(),
            nn.Linear(width, k),
        )
        self.min_sigma = 1e-3
    def gmm_params(self, s):
        h = self.feat(s)
        B = s.shape[0]
        pi_logits = self.pi_head(h)
        mu = self.mu_head(h).view(B, self.M, self.d)
        log_sigma = self.logsig_head(h).view(B, self.M, self.d)
        sigma = torch.nn.functional.softplus(log_sigma) + self.min_sigma
        return pi_logits, mu, sigma
    def rho_from_ids(self, s_for_embed, demo_ids):
        z = self.embed(s_for_embed)
        w = self.omega[demo_ids]
        rho = torch.sigmoid((z*w).sum(-1, keepdim=True))
        rho = torch.clamp(rho, min=self.rho_min, max=1.0)
        return rho, z
    def logprob(self, a, pi_logits, mu, sigma, rho):
        B,M,d = a.shape[0], self.M, self.d
        a_exp = a[:,None,:].expand(B,M,d)
        quad_base = ((a_exp - mu) / (sigma+1e-8))**2
        quad_base = quad_base.sum(-1)
        s2log = (2.0*torch.log(sigma+1e-8)).sum(-1)
        logdet_eff = s2log - d * torch.log(rho+1e-8)
        const = d * math.log(2*math.pi)
        logN = -0.5*(const + logdet_eff + rho*quad_base)
        logpi = torch.log_softmax(pi_logits, dim=-1)
        logmix = logpi + logN
        return torch.logsumexp(logmix, dim=-1)
    def nll_with_aux(self, s_policy, s_embed, a, demo_ids, s_next, aux_lambda=None):
        pi_logits, mu, sigma = self.gmm_params(s_policy)
        rho, z = self.rho_from_ids(s_embed, demo_ids)
        logp = self.logprob(a, pi_logits, mu, sigma, rho)
        nll = -logp.mean()
        if aux_lambda is None:
            aux_lambda = self.aux_lambda
        with torch.no_grad():
            z_next_target = self.embed(s_next)
        z_pred = self.trans(torch.cat([z, a], dim=-1))
        aux = ((z_pred - z_next_target)**2).mean()
        reg = self.l2_omega*(self.omega**2).mean() + self.l2_embed*(z**2).mean()
        return nll + aux_lambda*aux + reg, dict(nll=nll.detach(), aux=aux.detach(), reg=reg.detach())

In [13]:
def make_seq_dataset(demos, Xn, Yn, seq_len=32, seq_stride=16):
    seqs=[]; off=0
    for d in demos:
        T=d["states"].shape[0]
        xs=Xn[off:off+T]; ys=Yn[off:off+T]; off+=T
        for s in range(0, max(1,T-seq_len+1), seq_stride):
            e=min(T,s+seq_len); seqs.append((xs[s:e], ys[s:e]))
    return seqs

In [14]:
def make_transition_dataset(demos, Xn, Yn):
    S=[]; A=[]; S2=[]; ids=[]
    off=0
    for i,d in enumerate(demos):
        T=d["states"].shape[0]
        if T<2: off+=T; continue
        S.append(Xn[off:off+T-1])
        A.append(Yn[off:off+T-1])
        S2.append(Xn[off+1:off+T])
        ids.append(np.full((T-1,), i, np.int64))
        off+=T
    if len(S)==0:
        return np.zeros((0,Xn.shape[1]),np.float32), np.zeros((0,Yn.shape[1]),np.float32), np.zeros((0,Xn.shape[1]),np.float32), np.zeros((0,),np.int64)
    return np.concatenate(S,0), np.concatenate(A,0), np.concatenate(S2,0), np.concatenate(ids,0)


In [15]:
def train_bc(Xn, Yn, width=128, W=None, lr=1e-3, epochs=120, batch=8192, device="cpu", clip=1.0):
    p,d = Xn.shape[1], Yn.shape[1]
    net = MLP(p,d,width=width).to(device)
    opt = optim.Adam(net.parameters(), lr=lr)
    Xt,Yt = to_torch(Xn,Yn,device=device)
    Wt = torch.ones(Xt.shape[0],device=device) if W is None else torch.tensor(W,dtype=torch.float32,device=device)
    N=Xt.shape[0]; B=min(batch,N)
    for _ in range(epochs):
        idx=torch.randperm(N,device=device)
        for k in range(0,N,B):
            sel=idx[k:k+B]; x=Xt[sel]; y=Yt[sel]; w=Wt[sel]
            pred=net(x); mse=((pred-y)**2).sum(-1); loss=(w*mse).mean()
            opt.zero_grad(); loss.backward()
            if clip: nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
    # final training loss (same objective)
    with torch.no_grad():
        pred = net(Xt); mse = ((pred - Yt)**2).sum(-1)
        loss = (Wt * mse).mean().item()
    return net, loss

In [16]:
def train_traj_bc(demos, Xn, Yn, width=128, lr=1e-3, epochs=120, batch=8192, device="cpu", clip=1.0):
    # per-trajectory weights (same heuristic)
    demo_w=[]
    for d in demos:
        slip_frac=float(d.get("slip_width", 0.25))
        clip_ratio=float(d.get("clip_ratio", 0.05))
        dist=float(d.get("distance", 8.0))
        w=(1.0 - 0.5*slip_frac) * (1.0 - 0.3*clip_ratio) * (1.0 + 0.02*dist)
        demo_w.append(w)
    demo_w=np.array(demo_w,np.float32); demo_w/=demo_w.mean()
    W=np.concatenate([np.full(d["states"].shape[0], w, np.float32) for w,d in zip(demo_w, demos)],0)
    net, loss = train_bc(Xn, Yn, width=width, W=W, lr=lr, epochs=epochs, batch=batch, device=device, clip=clip)
    return net, loss, W

In [17]:
def train_ileed_paperfaithful(demos, Xn, Yn, demo_ids, width=128, k=16, M=5, rho_min=0.05,
                              l2_omega=1e-3, l2_embed=1e-4, aux_lambda=1e-2,
                              epochs_warm=60, epochs_joint=90, lr_warm=1e-3, lr_joint=6e-4,
                              batch=256, grad_clip=1.0, device="cpu"):
    p, d = Xn.shape[1], Yn.shape[1]; n_d = len(demos)
    net = ILEEDGMM(p, d, n_d, M=M, k=k, width=width, rho_min=rho_min,
                   l2_omega=l2_omega, l2_embed=l2_embed, aux_lambda=aux_lambda).to(device)
    S0, A0, S1, ids_tr = make_transition_dataset(demos, Xn, Yn)
    Xt, At, Xnext, Ids_tr = to_torch(S0, A0, S1, ids_tr, device=device)
    Ntr = Xt.shape[0]; B = min(batch, Ntr)
    # Warm
    Xall, Yall = to_torch(Xn, Yn, device=device)
    Nall = Xall.shape[0]; Bw = min(batch, Nall)
    freezed = list(net.embed.parameters()) + [net.omega] + list(net.trans.parameters())
    for pmt in freezed: pmt.requires_grad_(False)
    opt_warm = optim.Adam(
        list(net.feat.parameters()) + list(net.pi_head.parameters())
        + list(net.mu_head.parameters()) + list(net.logsig_head.parameters()),
        lr=lr_warm
    )
    for _ in range(epochs_warm):
        idx = torch.randperm(Nall, device=device)
        for k0 in range(0, Nall, Bw):
            sel = idx[k0:k0+Bw]
            pi_logits, mu, sigma = net.gmm_params(Xall[sel])
            rho_ones = torch.ones((sel.shape[0],1), device=device)
            logp = net.logprob(Yall[sel], pi_logits, mu, sigma, rho_ones)
            loss = -logp.mean()
            opt_warm.zero_grad(); loss.backward()
            if grad_clip: nn.utils.clip_grad_norm_(net.parameters(), grad_clip)
            opt_warm.step()
    # Joint
    for pmt in freezed: pmt.requires_grad_(True)
    opt_joint = optim.Adam(net.parameters(), lr=lr_joint)
    for _ in range(epochs_joint):
        idx = torch.randperm(Ntr, device=device)
        for k0 in range(0, Ntr, B):
            sel = idx[k0:k0+B]
            loss, _logs = net.nll_with_aux(
                s_policy= Xt[sel],
                s_embed = Xt[sel],
                a       = At[sel],
                demo_ids= Ids_tr[sel],
                s_next  = Xnext[sel]
            )
            opt_joint.zero_grad(); loss.backward()
            if grad_clip: nn.utils.clip_grad_norm_(net.parameters(), grad_clip)
            opt_joint.step()
    # Final training loss on transitions (same objective)
    with torch.no_grad():
        loss, logs = net.nll_with_aux(s_policy=Xt, s_embed=Xt, a=At, demo_ids=Ids_tr, s_next=Xnext)
        loss_val = float(loss.item())
        components = {k: float(v.item()) for k,v in logs.items()}
    return net, loss_val, components

In [18]:
def train_bc_gmm(Xn, Yn, width=128, M=5, lr=1e-3, epochs=120, batch=8192, device="cpu", clip=1.0):
    p, d = Xn.shape[1], Yn.shape[1]
    net = GMMMLP(p, d, M=M, width=width).to(device)
    opt = optim.Adam(net.parameters(), lr=lr)
    Xt, Yt = to_torch(Xn, Yn, device=device)
    N = Xt.shape[0]; B = min(batch, N)
    for _ in range(epochs):
        idx = torch.randperm(N, device=device)
        for k in range(0, N, B):
            sel = idx[k:k+B]; x = Xt[sel]; y = Yt[sel]
            dist = net(x)
            loss = -dist.log_prob(y).mean()
            opt.zero_grad(); loss.backward()
            if clip: nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
    with torch.no_grad():
        dist = net(Xt)
        loss = -dist.log_prob(Yt).mean().item()
    return net, loss

In [19]:
import torch.nn.functional as F
import torch.distributions as D
import torchdiffeq as ode

In [20]:
# ---------------- Train & report ----------------
p, d = Xn.shape[1], Yn.shape[1]
t0 = time.time()
bc_net, bc_loss = train_bc(Xn, Yn, epochs=120, device=device)
t1 = time.time()
traj_net, traj_loss, W_traj = train_traj_bc(demos, Xn, Yn, epochs=120, device=device)
t2 = time.time()
ileed_net, ileed_loss, ileed_logs = train_ileed_paperfaithful(
    demos, Xn, Yn, demo_ids, epochs_warm=60, epochs_joint=90, device=device
)
t3 = time.time()
bc_gmm_net, bc_gmm_loss = train_bc_gmm(Xn, Yn, epochs=120, device=device)
t4 = time.time()

In [21]:
results = [
    {"model": "BC (MLP)", "train_objective": "Weighted MSE (here: unweighted)", "final_loss": bc_loss, "time_s": t1-t0},
    {"model": "Traj-BC", "train_objective": "Weighted MSE (per-trajectory weights)", "final_loss": traj_loss, "time_s": t2-t1},
    {"model": "ILEED ", "train_objective": "NLL + aux*lambda + reg", "final_loss": ileed_loss, "time_s": t3-t2,
     **{f"ILEED_{k}": v for k,v in ileed_logs.items()}},
    {"model": "BC-GMM", "train_objective": "Negative Log-Likelihood", "final_loss": bc_gmm_loss, "time_s": t4-t3},
]


# Display the updated results table and save to CSV
# df = pd.DataFrame(results)
# out_csv = "newlosses_summary_with_node.csv"
# df.to_csv(out_csv, index=False)
# print("Saved:", out_csv)
# print(df)

In [22]:
# df = pd.DataFrame(results)
# out_csv = "newlosses_summary_noisy.csv"
# df.to_csv(out_csv, index=False)
# print("Saved:", out_csv)
# print(df)

Trajectory generation/Rollout code starts below 

In [23]:
def rollout_policy(model, stats, init_state, T=50, model_type="bc", device="cpu"):


    x_mean, x_std = stats["x_mean"], stats["x_std"]
    y_mean, y_std = stats["y_mean"], stats["y_std"]

    states = []
    actions = []

    # normalize initial state
    s = (init_state - x_mean) / x_std
    s_torch = torch.tensor(s, dtype=torch.float32, device=device).unsqueeze(0)

    # For sequence models (RNN, Transformer)
    # if model_type in ["rnn", "bc_transformer"]:
    #     seq_len = 32  # This should match the training seq_len
    #     state_sequence = [s_torch]
    #     if model_type == "rnn":
    #         h = None

    for t in range(T):
        with torch.no_grad():
            if model_type in ["bc", "traj_bc"]:
                a_norm = model(s_torch).cpu().numpy()[0]
            
            elif model_type == "bc_gmm":
                dist = model(s_torch)
                a_norm = dist.mean.cpu().numpy()[0]

            elif model_type == "ileed":
                pi_logits, mu, sigma = model.gmm_params(s_torch)
                rho = torch.ones((1,1), device=device)
                logpi = torch.log_softmax(pi_logits, dim=-1)
               
                k = torch.argmax(logpi, dim=-1).item()
                a_norm = mu[0, k].cpu().numpy()
        
                
            else:
                raise ValueError(f"Unknown model_type {model_type}")

        # unnormalize action
        a = a_norm * y_std + y_mean

        # store
        states.append(s * x_std + x_mean)  # unnormalized
        actions.append(a)

        # simple dynamics assumption: next state = current state + action * dt
        dt = 0.1
        s_next = s + (a / x_std) * dt  

        # prepare for next step
        s = s_next
        s_torch = torch.tensor(s, dtype=torch.float32, device=device).unsqueeze(0)
        
        # for sequence models, append the new state to the context
        # if model_type in ["rnn", "bc_transformer"]:
        #     state_sequence.append(s_torch)

    return np.array(states), np.array(actions)

In [None]:
# Map the display names to the exact model_type strings used in rollout_policy
MODEL_TYPE_MAP = {
    "BC": "bc",
    "Traj-BC": "traj_bc",
    "ILEED": "ileed",
    "BC-GMM": "bc_gmm",
}

def perform_multiple_rollouts_and_save(model, stats, T, num_rollouts, display_name, device):
    
    print(f"Generating {num_rollouts} rollouts for {display_name}...")
    all_trajectories = []
    
    base_init_state = X[0]
    
    delta_max = 0.2
    
    model_type = MODEL_TYPE_MAP.get(display_name)
    if not model_type:
        raise ValueError(f"Display name '{display_name}' not found in model type map.")

    for i in range(num_rollouts):
        # Add a random delta to the base initial state
        random_delta = np.random.uniform(-delta_max, delta_max, size=base_init_state.shape)
        init_state = base_init_state + random_delta

        states, actions = rollout_policy(model, stats, init_state, T=T, model_type=model_type, device=device)
        all_trajectories.append({"states": states, "actions": actions})

    save_path = f"{model_type.replace('_', '-')}_rollouts_noisy.npy"
    np.save(save_path, all_trajectories, allow_pickle=True)
    print(f"Saved {num_rollouts} trajectories to {save_path}")
    return save_path

num_rollouts_to_generate = 50
rollout_horizon = 60

saved_files = {}

saved_files["BC"] = perform_multiple_rollouts_and_save(bc_net, stats, rollout_horizon, num_rollouts_to_generate, "BC", device)
saved_files["Traj-BC"] = perform_multiple_rollouts_and_save(traj_net, stats, rollout_horizon, num_rollouts_to_generate, "Traj-BC", device)
saved_files["ILEED"] = perform_multiple_rollouts_and_save(ileed_net, stats, rollout_horizon, num_rollouts_to_generate, "ILEED", device)
saved_files["BC-GMM"] = perform_multiple_rollouts_and_save(bc_gmm_net, stats, rollout_horizon, num_rollouts_to_generate, "BC-GMM", device)

print("\nAll rollouts saved. The files are:")
for name, path in saved_files.items():
    print(f"- {name}: {path}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os

# Define the directory where your .npy files are saved.
# REPLACE THIS WITH YOUR ACTUAL PATH.
file_directory = "/path/to/your/saved/rollouts" 

# Define the models and their corresponding file name bases.
# The filenames are derived from the output you provided.
models_to_plot_names = [
    "bc_rollouts_noisy",
    "traj-bc_rollouts_noisy",
    "ileed_rollouts_noisy",
    "bc-gmm_rollouts_noisy",
]

for model_name_base in models_to_plot_names:

    file_path = os.path.join(file_directory, model_name_base + ".npy")

    if os.path.exists(file_path):
        
        plt.figure(figsize=(10, 10))
        
        
        model_name = model_name_base.replace("-rollouts_noisy", "").replace("_", " ").replace("-", " ").upper()

      
        plt.title(f"{model_name} Rolled-out Trajectories", fontsize=14, fontweight="bold")
        plt.xlabel("X axis", fontsize=12, fontweight="bold")
        plt.ylabel("Y axis", fontsize=12, fontweight="bold")
        plt.xticks(fontsize=10, fontweight="bold")
        plt.yticks(fontsize=10, fontweight="bold")
        plt.xlim(-2, 2)
        plt.ylim(-2, 2)
        plt.grid(True, linewidth=1.2)
        plt.gca().set_aspect("equal")

        # Load the array of dictionaries from the .npy file
        all_rollouts = np.load(file_path, allow_pickle=True)
        print(f"Loaded {len(all_rollouts)} rollouts from {file_path}")

        # Generate a colormap with enough colors for all rollouts
        colors = plt.cm.viridis(np.linspace(0, 1, len(all_rollouts)))
        for i, rollout in enumerate(all_rollouts):
            states = rollout["states"]
            plt.plot(
                states[:, 0],
                states[:, 1],
                alpha=0.8,
                linewidth=1.5,
                color=colors[i] 
            )
            
        output_file = f"rollout_plot_{model_name.lower().replace(' ', '-')}.png"
        plt.savefig(output_file, dpi=600, bbox_inches="tight")
        
        plt.show()

    else:
        print(f"File not found: {file_path}")