## COMP5328 - Advanced Machine Learning
## Assignment 2: Title
----------------------------------------------------------------------------------------

In [68]:
# Common imports
import os
import glob
import numpy as np
import json
import time
from datetime import datetime

# Ploting
import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import torch.optim as optim
import random
import scipy.stats as st

In [69]:
# Experiment variables
# Common
num_classes=3
dataset_folder = 'data/'
cifar_dataset = dataset_folder+'CIFAR.npz'
MNISTO3_dataset = dataset_folder+'FashionMNIST0.3.npz'
MNISTO6_dataset = dataset_folder+'FashionMNIST0.6.npz'


DATA_PATHS = {
    'fashion03': MNISTO3_dataset,
    'fashion06': MNISTO6_dataset,
    'cifar':     cifar_dataset
}

losses = ['forward','gce', 'forwardGCE']
datasets = ['cifar', "fashion03", "fashion06"]
base = {
    "runs":10,
    "epochs": 15,
    "loss":'forward',
    "batch_size":4096,
    "q":0.6,
    "est_epochs":10,
    "beta":0.2,
    "lr":1e-3,
    "device":'mps'
}


known_T_fashion_03 = np.array(  [[0.7,0.3,0.0],
                                [0.0,0.7,0.3],
                                [0.3,0.0,0.7]], dtype=np.float32)

known_T_fashion_06 = np.array(  [[0.4,0.3,0.3],
                                [0.3,0.4,0.3],
                                [0.3,0.3,0.4]], dtype=np.float32)

def pick_known_T(tag):
    if tag == 'fashion03':
        return known_T_fashion_03
    elif tag == 'fashion06':
        return known_T_fashion_06
    else:
        return None

def set_seed(seed=0):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def sanity(T, name, dataset):
    print(f"\n{name}")
    print("row sums:", T.sum(axis=1))
    print("col sums:", T.sum(axis=0))
    if dataset == 'fashion03':
        T_true = known_T_fashion_03
    elif dataset == 'fashion06':
        T_true = known_T_fashion_06
    if dataset != 'cifar':
        # if you know T_true for this dataset:
        print("Fro:", np.linalg.norm(T - T_true, 'fro'))
        print("MAE:", np.mean(np.abs(T - T_true)))

## 1. Load Dataset

### 1.0 Data Folder

In [70]:
# Path to your dataset zip stored in Drive
zip_path = "datasets.zip"

# Unzip file
!unzip -o -q "$zip_path" 

unzip:  cannot find or open datasets.zip, datasets.zip.zip or datasets.zip.ZIP.


In [71]:
# The structure of data folder.
!ls -l data



total 172688
-rw-r--r--@ 1 jamie.saunders  staff  55440974 Oct  4  2019 CIFAR.npz
-rw-r--r--@ 1 jamie.saunders  staff  16485974 Oct 10  2021 FashionMNIST0.3.npz
-rw-r--r--@ 1 jamie.saunders  staff  16485974 Oct 10  2021 FashionMNIST0.6.npz


In [72]:
def load_npz(path):
    d = np.load(path)
    Xtr, Str = d['Xtr'], d['Str']
    Xts, Yts = d['Xts'], d['Yts']
    return Xtr, Str, Xts, Yts

# A helper class, it is used as an input of the DataLoader object.
class DatasetArray(Dataset):
    def __init__(self, data, labels=None, transform=None):
        if labels != None:
            self.data_arr = np.asarray(data).astype(np.float32)
            self.label_arr = np.asarray(labels).astype(np.long)
        else:
            tmp_arr = np.asarray(data)
            self.data_arr = tmp_arr[:,:-1].astype(np.float32)
            self.label_arr = tmp_arr[:,-1].astype(np.long)
        self.transform = transform
        
    def __len__(self):
        return len(self.data_arr)
    
    def __getitem__(self, index):
     
        data = self.data_arr[index]
        label = self.label_arr[index]
        
        if self.transform is not None:
            data = self.transform(data)
            
        return (data, label)
    
    
# Splitting the data into three parts.
def train_val_test_random_split(data, fracs=[0.7,0.1,0.2]):
    r"""Split the data into training, validation and test set.
    Args:
        fracs: a list of length three
    """
    assert len(fracs) == 3
    assert sum(fracs) == 1
    assert all(frac > 0 for frac in fracs)
    n = len(data)
    subset_lens = [int(n*frac) for frac in fracs]
    idxs = list(range(n))
    random.shuffle(idxs)
    data = np.array(data)
    new_data = []
    start_idx = 0
    for subset_len in subset_lens:
        end_idx = start_idx + subset_len
        cur_idxs = idxs[start_idx:end_idx]
        new_data.append(data[cur_idxs,:].tolist())
        start_idx = end_idx
    return new_data

# Preparation of the data for training, validation and testing a pytorch network. 
# Note that the test data is not in use for this lab.
def get_loader(batch_size =128, num_workers = 0, train_val_test_split = [0.7,0.1,0.2], data=None):
    r"""This function is used to read the data file and split the data into three subsets, i.e, 
    train data, validation data and test data. Their corresponding DataLoader objects are returned."""
    
    [train_data, val_data, test_data] = train_val_test_random_split(data, fracs = train_val_test_split)

    train_data = DatasetArray(data = train_data)
    val_data = DatasetArray(data = val_data)
    test_data = DatasetArray(data = test_data)

    #The pytorch built-in class DataLoader can help us to shuffle the data, draw mini-batch,
    #do transformations, etc. 
    train_loader = DataLoader(
        train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
    )

    val_loader = DataLoader(
        val_data,
        batch_size=100,
        shuffle=False,
        num_workers=num_workers,
    )

    test_loader = DataLoader(
        test_data,
        batch_size=100,
        num_workers=num_workers,
        shuffle=False,
    )
    return train_loader, val_loader, test_loader

In [73]:
class NpzDataset(Dataset):
    def __init__(self, X, y, is_cifar=False):
        self.X = X.astype(np.float32)
        self.y = y.astype(np.int64)
        self.is_cifar = is_cifar

        # Normalize to [0,1]
        self.X = self.X / 255.0 if self.X.max() > 1.0 else self.X

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        x = self.X[idx]
        if x.ndim == 1:
            # flat; try to infer shape 28x28 or 32x32x3
            if x.size == 28*28:
                x = x.reshape(1, 28, 28)
            elif x.size == 32*32*3:
                x = x.reshape(3, 32, 32)
            else:
                raise ValueError("Unknown flat image shape: {}".format(x.shape))
        else:
            # (H,W) or (H,W,C)
            if x.ndim == 2:
                x = x[None, ...]  # to (1,H,W)
            elif x.ndim == 3:
                # assume HWC -> CHW
                x = np.transpose(x, (2, 0, 1))
            else:
                raise ValueError(f"Unexpected image dims: {x.shape}")
        return torch.from_numpy(x), torch.tensor(self.y[idx])


def load_npz(path):
    d = np.load(path)
    Xtr, Str = d['Xtr'], d['Str']
    Xts, Yts = d['Xts'], d['Yts']
    return Xtr, Str, Xts, Yts


def make_loaders(Xtr, Str, batch_size=128, seed=0, test_size=0.2):
    # 80/20 split each repetition
    X_tr, X_val, y_tr, y_val = train_test_split(
        Xtr, Str, test_size=test_size, random_state=seed, stratify=Str
    )

    is_cifar = (X_tr.shape[-1] == 3) if X_tr.ndim == 4 else (X_tr.shape[-1] == 32*32*3)

    train_ds = NpzDataset(X_tr, y_tr, is_cifar)
    val_ds   = NpzDataset(X_val, y_val, is_cifar)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0)
    val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=0)

    return train_loader, val_loader, is_cifar

def make_test_loader(Xts, Yts, batch_size=256):
    is_cifar = (Xts.shape[-1] == 3) if Xts.ndim == 4 else (Xts.shape[-1] == 32*32*3)
    test_ds = NpzDataset(Xts, Yts, is_cifar)
    return DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0)

In [74]:
class ForwardCorrectedCE(nn.Module):
    """
    Forward loss correction: minimizes CE between T^T p and noisy labels.
    T: class-transition matrix where T[i,j] = P(S=j | Y=i). Shape [C,C].
    """
    def __init__(self, T):
        super().__init__()
        self.register_buffer('T', T)  # [C,C]

    def forward(self, logits, y_noisy):
        # logits -> p(y|x)
        p = F.softmax(logits, dim=1)  # [B,C]
        # mix via T^T
        mixed = torch.clamp(p @ self.T.t(), 1e-6, 1.0)
        log_mixed = torch.log(mixed)
        return F.nll_loss(log_mixed, y_noisy)

class ForwardCorrectedGCE(nn.Module):
    def __init__(self, T, q=0.7):
        super().__init__()
        self.register_buffer('T', T)
        self.q = q
    def forward(self, logits, y_noisy):
        p_noisy = torch.clamp(F.softmax(logits,1) @ self.T, 1e-6, 1.0)
        p_s = p_noisy.gather(1, y_noisy.view(-1,1)).clamp(1e-6,1.0)
        return (-(p_s.log()) if self.q==1.0 else (1 - p_s**self.q)/self.q).mean()


class GeneralizedCrossEntropy(nn.Module):
    """
    GCE loss: L_q(p, y) = (1 - p_y^q) / q, with q in (0,1].
    q→1 recovers CE; smaller q is more robust to label noise.
    """
    def __init__(self, q=0.7):
        super().__init__()
        assert 0 < q <= 1
        self.q = q

    def forward(self, logits, y):
        p = F.softmax(logits, dim=1)
        p_y = p.gather(1, y.view(-1,1)).clamp(min=1e-6, max=1.0)
        if self.q == 1.0:
            return -torch.log(p_y).mean()
        return ((1 - p_y.pow(self.q)) / self.q).mean()

In [75]:
def conv_block(cin, cout):
    return nn.Sequential(
        nn.Conv2d(cin, cout, 3, padding=1),
        nn.BatchNorm2d(cout),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(2)
    )

class SmallCNN28(nn.Module):
    """For 1×28×28 images."""
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            conv_block(1, 32),  # 14x14
            conv_block(32, 64), # 7x7
            nn.Flatten(),
            nn.Linear(64*7*7, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 3)
        )
    def forward(self, x):
        return self.net(x)


class SmallCNNCifar(nn.Module):
    """For 3×32×32 images."""
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            conv_block(3, 32),   # 16x16
            conv_block(32, 64),  # 8x8
            conv_block(64, 128), # 4x4
            nn.Flatten(),
            nn.Linear(128*4*4, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 3)
        )
    def forward(self, x):
        return self.net(x)


def make_model(is_cifar):
    return SmallCNNCifar() if is_cifar else SmallCNN28()

In [76]:
@torch.no_grad()
def predict_proba(model, loader, device):
    model.eval()
    p_arr = []
    ys = []
    for xb, yb in loader:
        xb = xb.to(device)
        output = model(xb)
        p = F.softmax(output, dim=1).cpu().numpy()
        p_arr.append(p)
        ys.append(yb.numpy())
    return np.concatenate(p_arr), np.concatenate(ys)


def estimate_transition_anchor(t, train_loader, is_cifar, q, device='cpu', epochs=5):
    """
    Simple anchor/confident-example estimator (Patrini et al., 2017 style):
    1) Train a base classifier on noisy data.
    2) Get p(y|x) on training set.
    3) For each clean class i, find indices whose predicted argmax == noisy label == i and with high confidence.
    4) For those indices, estimate column i of T as average of empirical noisy label distribution given model predicts i.
    Here: since we only have noisy labels S, we approximate T[:, i] ≈ E[ onehot(S) | argmax p = i, p_i >= τ ].
    Normalize columns to sum to 1.
    """

    device = torch.device(device)
    model = make_model(is_cifar).to(device)

    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
    criterion = GeneralizedCrossEntropy(q=q)
    # quick warmup training on noisy labels
    for _ in range(epochs):
        train_one_epoch(model, train_loader, optimizer, criterion, device=device) 

    # collect probs & noisy labels
    p_arr, y_noisy = predict_proba(model, train_loader, device)
    preds = p_arr.argmax(axis=1)
    maxp = p_arr.max(axis=1)

    C = num_classes
    T = t
    # choose class-wise thresholds based on quantiles for stability
    for i in range(C):
        idx = np.where(preds == i)[0]
        if idx.size == 0:
            T[:, i] = np.ones(C) / C
            continue
        # high-confidence subset (top 30% by p_i)
        conf = maxp[idx]
        if conf.size > 50:
            tau = np.quantile(conf, 0.7)
        else:
            tau = np.min(conf)  # keep all if tiny
        keep = idx[conf >= tau]
        if keep.size == 0:
            keep = idx
        # empirical distribution of noisy labels in this confident set
        hist = np.bincount(y_noisy[keep], minlength=C).astype(np.float64)
        if hist.sum() == 0:
            T[:, i] = np.ones(C) / C
        else:
            T[:, i] = hist / hist.sum()

    # column-normalize
    colsum = T.sum(axis=0, keepdims=True)
    T = np.divide(T, np.maximum(colsum, 1e-8))
    return T.astype(np.float32)

In [77]:
def estimate_transition_trevision(T_init, train_loader, is_cifar, q, device="cpu", epochs=5, lambda_reg=1e-4, lr_t=5e-3, lr_model=1e-3, warmup_epochs=3, log_every=1
):
    """
    Refine transition matrix using T-Revision method (Patrini et al. style).

    Args:
        T_init (np.ndarray or torch.Tensor): initial transition matrix [C, C]
        train_loader: noisy dataloader (x, y_noisy)
        is_cifar (bool): dataset selector for model architecture
        device (str): 'cpu', 'cuda', or 'mps'
        epochs (int): number of refinement epochs for ΔT
        lambda_reg (float): regularization to keep T close to T_init
        lr_t (float): learning rate for ΔT
        lr_model (float): learning rate for model warm-up
        warmup_epochs (int): number of model warm-up epochs
        log_every (int): print interval

    Returns:
        np.ndarray: refined transition matrix [C, C]
    """

    device = torch.device(device)
    C = 3

    # ---------------------------
    # 1. Base model setup
    # ---------------------------
    model = make_model(is_cifar).to(device)
    opt_model = optim.Adam(model.parameters(), lr=lr_model, weight_decay=1e-4)
    criterion = GeneralizedCrossEntropy(q=q)

    # Warm-up (train classifier on noisy labels)
    #print(f"[Warm-up] training base classifier for {warmup_epochs} epochs...")
    for e in range(warmup_epochs):
        train_one_epoch(model, train_loader, opt_model, criterion=criterion, device=device)
        #print(f"  done epoch {e+1}/{warmup_epochs}")

    # ---------------------------
    # 2. Get predicted probabilities
    # ---------------------------
    probs, y_noisy = predict_proba(model, train_loader, device)
    p = torch.tensor(probs, dtype=torch.float32, device=device)
    y_t = torch.tensor(y_noisy, dtype=torch.long, device=device)

    # ---------------------------
    # 3. Initialize learnable ΔT (T-Revision)
    # ---------------------------
    T_init_torch = torch.tensor(T_init, dtype=torch.float32, device=device)
    delta_T = nn.Parameter(torch.zeros_like(T_init_torch))
    optimizer_T = optim.Adam([delta_T], lr=lr_t)

    #print(f"[Optimization] refining transition matrix for {epochs} epochs...")

    # ---------------------------
    # 4. Optimize ΔT
    # ---------------------------
    for ep in range(epochs):
        optimizer_T.zero_grad()

        # Proposed transition
        T_prime = T_init_torch + delta_T
        T_prime = torch.clamp(T_prime, min=1e-6)

        # Forward correction: p(y_noisy | x) = p(y|x) * T'
        noisy_pred = torch.clamp(p @ T_prime.t(), 1e-6, 1.0)
        log_noisy = torch.log(noisy_pred)

        # Loss = NLL + regularization
        loss_ce = nn.NLLLoss()(log_noisy, y_t)
        reg = lambda_reg * torch.norm(T_prime - T_init_torch, p="fro")
        loss = loss_ce + reg

        loss.backward()
        optimizer_T.step()

        if (ep + 1) % log_every == 0:
            grad_norm = delta_T.grad.abs().mean().item() if delta_T.grad is not None else 0
            #print(f"Epoch {ep+1}/{epochs} | loss={loss.item():.5f} | grad={grad_norm:.5e}")

    # ---------------------------
    # 5. Normalize once at the end
    # ---------------------------
    T_final = T_init_torch + delta_T.data
    T_final = torch.clamp(T_final, min=1e-6)
    T_final = T_final / T_final.sum(dim=0, keepdim=True)

    #print("\n[Done] Refined Transition Matrix:")
    #print(T_final.detach().cpu().numpy())

    return T_final.detach().cpu().numpy().astype(np.float32)


In [78]:
@torch.no_grad()
def accuracy(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        pred = logits.argmax(dim=1)
        correct += (pred == yb).sum().item()
        total += yb.numel()
    return correct / max(total, 1)


def train_one_epoch(model, loader, optimizer, criterion, device='mps'):
    model.train()
    total_loss = 0.0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad(set_to_none=True)
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * yb.size(0)
    return total_loss / len(loader.dataset)


def fit_model(model, train_loader, val_loader, device, loss_name='gce', T=None, q=0.7, beta=0.2, epochs=10, lr=1e-3):

    if loss_name == 'forward':
        assert T is not None, "Forward correction requires known/estimated T"
        print('forward loss')
        criterion = ForwardCorrectedCE(torch.tensor(T, dtype=torch.float32, device=device))
    elif loss_name == 'forwardGCE':
        print('forwardGCE loss')
        criterion = ForwardCorrectedGCE(q=q)
    else:
        print('GCE loss')
        criterion = GeneralizedCrossEntropy(q=q)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

    best_val = -np.inf
    best_state = None

    for _ in range(epochs):
        train_one_epoch(model, train_loader, optimizer, criterion, device)
        # early stopping on val accuracy (cheap)
        val_acc = accuracy(model, val_loader, device)
        if val_acc > best_val:
            best_val = val_acc
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

    if best_state is not None:
        model.load_state_dict(best_state)
    return model

In [79]:
def run_once(args, seed):
    set_seed(seed)

    if args['dataset']=='fashion03':
        q = 0.3
    elif args['dataset']=='fashion06':
        q = 0.6
    else:
        q = 0.7
    device = torch.device('mps')
    print(f'q:{q}')
    # load data
    Xtr, Str, Xts, Yts = load_npz(DATA_PATHS[args['dataset']])

    # loaders for this split
    train_loader, val_loader, is_cifar = make_loaders(Xtr, Str, batch_size=args['batch_size'], seed=seed)
    test_loader = make_test_loader(Xts, Yts, batch_size=512)
  
    # choose model
    model = make_model(is_cifar).to(device)

    # Transition matrix
    T = None
    if args['loss'] == 'forward' or args['loss'] == 'gce':
        if args['estimate_T'] or args['dataset']=='cifar':
            T = np.zeros((3, 3), dtype=np.float64)
            T = estimate_transition_anchor(T, train_loader, is_cifar, q, device=device, epochs=args['est_epochs'])
            sanity(T, 'Est T After Anchor point', args['dataset'])
            T = estimate_transition_trevision(T, train_loader, is_cifar, q, device=device, epochs=args['est_epochs'])
            sanity(T, 'Est T After T Revision', args['dataset'])
        else:
            T = pick_known_T(args['dataset'])
            if T is None:
                raise ValueError("Forward loss selected but no known T for this dataset; use --estimate_T.")

    # fit
    model = fit_model(
        model,
        train_loader,
        val_loader,
        device,
        loss_name=args['loss'],
        T=T,
        q=q,
        beta=args['beta'],
        epochs=args['epochs'],
        lr=args['lr'],
    )

    # evaluate on clean test set
    test_acc = accuracy(model, test_loader, device)
    return float(test_acc), (T.tolist() if T is not None else None)

In [80]:
now = datetime.now()
now = now.strftime("%Y-%m-%d-%H:%M")
folder = "results"+now
if os.path.exists(folder) and os.path.isdir(folder):
    os.rmdir(folder)
    os.mkdir(folder)
else:
    os.mkdir(folder)
datasets = ['fashion03']
# create each cfg
estimate = [True, False]
configs = []
for i, ds in enumerate(datasets):
    for loss in losses:
        for t in estimate:
            cfg = {**base, "dataset": ds, "out": folder+'/'+ds+'_'+loss+'_'+str(t)+'_'+now+'.json', "loss":loss, "estimate_T":t}
            if t and ds !='cifar':
                print(f"dataset: {ds}, estimate_T:{t}, loss:{loss}")
                configs.append(cfg)
            elif not t:
                print(f"dataset: {ds}, estimate_T:{t}, loss:{loss}")
                configs.append(cfg)


dataset: fashion03, estimate_T:True, loss:forward
dataset: fashion03, estimate_T:False, loss:forward
dataset: fashion03, estimate_T:True, loss:gce
dataset: fashion03, estimate_T:False, loss:gce
dataset: fashion03, estimate_T:True, loss:forwardGCE
dataset: fashion03, estimate_T:False, loss:forwardGCE


In [None]:
for cfg in configs:
    all_acc = []
    last_T = None
    t_arr = []
    for r in range(cfg['runs']):
        start = time.perf_counter()
        acc, T = run_once(cfg, seed=1000+r)

        all_acc.append(acc)
        if cfg['estimate_T'] or cfg['dataset']=='cifar':
            t_arr.append(T)
        last_T = T if T is not None else last_T
        print(f"Run {r+1:02d}/{cfg['runs']}: test acc = {acc*100:.2f}%")
        end = time.perf_counter()
        print(f"{cfg['device']}: {r+1} steps -> {end - start:.2f} sec | avg {1000*(end - start)/(r+1):.1f} ms/step")
    mean = float(np.mean(all_acc))
    std  = float(np.std(all_acc))

    summary = {
        'cfg':cfg,
        'dataset': cfg['dataset'],
        'loss': cfg['loss'],
        'estimate_T': bool(cfg['estimate_T']),
        'epochs': cfg['epochs'],
        'runs': cfg['runs'],
        'mean_test_acc': mean,
        'std_test_acc': std,
        'last_estimated_T': last_T,
        't_arr':t_arr,
        'per_run_acc': all_acc,
    }
    print("="*72)
    print(f"{cfg['dataset']} | {cfg['loss']} | mean±std over {cfg['runs']} runs: {mean*100:.2f}±{std*100:.2f}%")

    with open(cfg['out'], 'w') as f:
        json.dump(summary, f, indent=2)
    print(f"Saved summary to {cfg['out']}")

q:0.3

Est T After Anchor point
row sums: [1.0159502 0.9755118 1.008538 ]
col sums: [1. 1. 1.]
Fro: 0.7002944
MAE: 0.19909726

Est T After T Revision
row sums: [0.98345774 0.96573895 1.0508033 ]
col sums: [1.        1.        1.0000001]
Fro: 0.70211977
MAE: 0.19250295
forward loss
Run 01/10: test acc = 98.37%
mps: 1 steps -> 19.95 sec | avg 19947.8 ms/step
q:0.3

Est T After Anchor point
row sums: [0.9758113  0.98749506 1.0366936 ]
col sums: [0.99999994 1.         1.        ]
Fro: 0.6827116
MAE: 0.19896436

Est T After T Revision
row sums: [0.9469021 0.9766675 1.0764304]
col sums: [1.        1.        1.0000001]
Fro: 0.683123
MAE: 0.19226576
forward loss
Run 02/10: test acc = 98.40%
mps: 2 steps -> 18.57 sec | avg 9283.8 ms/step
q:0.3

Est T After Anchor point
row sums: [1.0210499 0.9751001 1.00385  ]
col sums: [1. 1. 1.]
Fro: 0.7091402
MAE: 0.19640109

Est T After T Revision
row sums: [0.9750798 1.0082357 1.0166847]
col sums: [1.        1.        1.0000001]
Fro: 0.69183856
MAE: 0.1962

In [None]:
for filename in os.listdir(folder):
    if filename.endswith(".json"):
        filepath = os.path.join(folder, filename)
        try:
            with open(filepath, "r", encoding="utf-8") as f:
                data = json.load(f)
            
            dataset = data.get("dataset", "N/A")
            loss = data.get("loss", "N/A")
            mean_acc = data.get("mean_test_acc", None)
            std_acc = data.get("std_test_acc", None)

            print(f"{filename}:")
            print(f"  dataset       = {dataset}")
            print(f"  loss          = {loss}")
            print(f"  mean_test_acc = {mean_acc:.4f}" if mean_acc is not None else "  mean_test_acc = N/A")
            print(f"  std_test_acc  = {std_acc:.4f}" if std_acc is not None else "  std_test_acc = N/A")
            print("-" * 60)

        except Exception as e:
            print(f"Error reading {filename}: {e}")

fashion03_forward_True_2025-10-30-09:05.json:
  dataset       = fashion03
  loss          = forward
  mean_test_acc = 0.4004
  std_test_acc  = 0.0798
------------------------------------------------------------
fashion06_forward_False_2025-10-30-09:05.json:
  dataset       = fashion06
  loss          = forward
  mean_test_acc = 0.3010
  std_test_acc  = 0.0589
------------------------------------------------------------
fashion03_gce_True_2025-10-30-09:05.json:
  dataset       = fashion03
  loss          = gce
  mean_test_acc = 0.5101
  std_test_acc  = 0.0464
------------------------------------------------------------
fashion03_forward_False_2025-10-30-09:05.json:
  dataset       = fashion03
  loss          = forward
  mean_test_acc = 0.3668
  std_test_acc  = 0.0373
------------------------------------------------------------
fashion03_gce_False_2025-10-30-09:05.json:
  dataset       = fashion03
  loss          = gce
  mean_test_acc = 0.4860
  std_test_acc  = 0.0492
-------------------

In [None]:
C = 3

# pattern for files starting with "name" and ending with ".json"
files = sorted(glob.glob(os.path.join(folder,"fashion03*True*.json")))

# pick the first matching file
first_file = files[0]
print("Loading:", first_file)

# load the JSON contents
with open(first_file, "r", encoding="utf-8") as f:
    data = json.load(f)

T_prime = np.array(data['last_estimated_T'])
T_true = pick_known_T('fashion03')

#checking recreation performance
print(T_prime)
print(T_true)
print(f"Fro error: {np.linalg.norm(T_prime - T_true, 'fro')}")
print(f"rre error: {np.linalg.norm(T_prime - T_true, 'fro') / np.linalg.norm(T_true, 'fro')}")
print(f"mae error: {np.mean(np.abs(T_prime - T_true))}")


corrs = [st.pearsonr(T_true[i], T_prime[i])[0] for i in range(C)]
print("Per-row correlations:", corrs)
print("Mean:", np.mean(corrs))


# pattern for files starting with "name" and ending with ".json"
files = sorted(glob.glob(os.path.join(folder,"fashion06*True**.json")))


# pick the first matching file
first_file = files[0]
print("\nLoading:", first_file)
# load the JSON contents
with open(first_file, "r", encoding="utf-8") as f:
    data = json.load(f)
print()
T_prime = np.array(data['last_estimated_T'])
T_true = pick_known_T('fashion06')

#checking recreation performance
print(T_prime)
print(T_true)
print(f"Fro error: {np.linalg.norm(T_prime - T_true, 'fro')}")
print(f"rre error: {np.linalg.norm(T_prime - T_true, 'fro') / np.linalg.norm(T_true, 'fro')}")
print(f"mae error: {np.mean(np.abs(T_prime - T_true))}")


corrs = [st.pearsonr(T_true[i], T_prime[i])[0] for i in range(C)]
print("Per-row correlations:", corrs)
print("Mean:", np.mean(corrs))

Loading: results2025-10-30-09:05/fashion03_forward_True_2025-10-30-09:05.json
[[0.37333062 0.35970184 0.27876514]
 [0.32170659 0.35969746 0.31239942]
 [0.30496281 0.28060067 0.40883544]]
[[0.7 0.3 0. ]
 [0.  0.7 0.3]
 [0.3 0.  0.7]]
Fro error: 0.755632206441637
rre error: 0.5728432934797962
mae error: 0.21291920873853895
Per-row correlations: [np.float64(0.8905745381555876), np.float64(0.8091376512635876), np.float64(0.966010180771437)]
Mean: 0.8885741233968707

Loading: results2025-10-30-09:05/fashion06_forward_True_2025-10-30-09:05.json

[[6.48417354e-01 9.09397670e-07 3.19373101e-01]
 [3.01015973e-01 7.01399863e-01 9.09404264e-07]
 [5.05666547e-02 2.98599243e-01 6.80626035e-01]]
[[0.4 0.3 0.3]
 [0.3 0.4 0.3]
 [0.3 0.3 0.4]]
Fro error: 0.6884026830183025
rre error: 0.6816202084504638
mae error: 0.18907384606246877
Per-row correlations: [np.float64(0.8702991343561096), np.float64(0.903904007053476), np.float64(0.9205048686529939)]
Mean: 0.8982360033541932
