In [1]:
from train_policy import prepare_data, filter_and_remap_edges, filter_dataset, extract_arrays_from_df, create_edge_weights_from_frequency
import torch

def safe_equal_check(tensor1, tensor2, name, tolerance=1e-6):
    """Safely compare tensors with shape and value checks"""
    print(f"\n{name}:")
    print(f"  V1 shape: {tensor1.shape}, V2 shape: {tensor2.shape}")
    print(f"  V1 dtype: {tensor1.dtype}, V2 dtype: {tensor2.dtype}")
    
    if tensor1.shape != tensor2.shape:
        print(f"  ❌ SHAPE MISMATCH!")
        return False
    
    if tensor1.dtype != tensor2.dtype:
        print(f"  ⚠️  Different dtypes, converting for comparison")
        if tensor1.dtype in [torch.float, torch.double] and tensor2.dtype in [torch.float, torch.double]:
            tensor2 = tensor2.to(tensor1.dtype)
        elif tensor1.dtype in [torch.int, torch.long] and tensor2.dtype in [torch.int, torch.long]:
            tensor2 = tensor2.to(tensor1.dtype)
    
    try:
        if tensor1.dtype in [torch.float, torch.double]:
            equal = torch.allclose(tensor1, tensor2, atol=tolerance, rtol=tolerance)
        else:
            equal = torch.equal(tensor1, tensor2)
        
        if equal:
            print(f"  ✅ VALUES MATCH!")
        else:
            print(f"  ❌ VALUES DIFFER!")
            # Show some statistics about differences
            if tensor1.dtype in [torch.float, torch.double]:
                diff = torch.abs(tensor1 - tensor2)
                print(f"    Max difference: {diff.max().item():.6f}")
                print(f"    Mean difference: {diff.mean().item():.6f}")
            else:
                diff_count = (tensor1 != tensor2).sum().item()
                print(f"    Different elements: {diff_count}/{tensor1.numel()}")
        
        return equal
        
    except Exception as e:
        print(f"  ❌ COMPARISON ERROR: {e}")
        return False

def shape_info(**args):
    for k, v in args.items():
        print(f"{k}: {v.shape}")


# Check complete dataset

In [2]:
from train_policy import prepare_data, filter_and_remap_edges, filter_dataset, extract_arrays_from_df, create_edge_weights_from_frequency
import torch
import numpy as np

# Your existing code
dataset_path = '/Users/jason/Documents/Coding Projects/2025_Claude/NetDeconf_main_hao/datasets/exps/BlogCatalog/p=0.0_k=9_seed=194.pt'

# Version 1 results
X, A, T, Y, Y1, Y0, idx_train, idx_val, idx_test = prepare_data(dataset_path)
mask = idx_train | idx_val
X_v1, T_v1, Y_v1, Y1_v1, Y0_v1 = X[mask], T[mask], Y[mask], Y1[mask], Y0[mask]
idx_train_v1, idx_val_v1 = idx_train[mask], idx_val[mask]
A_v1, _ = filter_and_remap_edges(A, mask)

# Version 2 results
data = torch.load(dataset_path, weights_only=False)
mask = data.train_mask | data.val_mask
data1 = filter_dataset(data, mask)

shape_info(**{"X_v1":X_v1, 
 "T_v1":T_v1, 
 "Y_v1":Y_v1, 
 "Y1_v1":Y1_v1, 
 "Y0_v1":Y0_v1})

T_v2, Y_v2, Y1_v2, Y0_v2= data1.treatment, data1.outcome, torch.tensor(data1.arr_Y1, dtype=torch.float), torch.tensor(data1.arr_Y0, dtype=torch.float)

X_v2 = data1.edge_attr[:data1.edge_attr.shape[0]//2].view((data1.n_units, data1.n_attrs))
idx_train_v2 = data1.train_mask
idx_val_v2 = data1.val_mask

results = {}
results['X'] = safe_equal_check(X_v1, X_v2, "Feature Matrix (X)")
results['T'] = safe_equal_check(T_v1, T_v2, "Treatment (T)")
results['Y'] = safe_equal_check(Y_v1, Y_v2, "Outcome (Y)")
results['Y1'] = safe_equal_check(Y1_v1, Y1_v2, "Potential Outcome Y1")
results['Y0'] = safe_equal_check(Y0_v1, Y0_v2, "Potential Outcome Y0")
results['train_mask'] = safe_equal_check(idx_train_v1, idx_train_v2, "Training Mask")
results['val_mask'] = safe_equal_check(idx_val_v1, idx_val_v2, "Validation Mask")

print(results)

imputed version!!!!
train complete 3117	validating complete 1040
torch.Size([5196, 20]) torch.Size([5196]) torch.Size([5196]) torch.Size([5196]) torch.Size([2, 343486]) torch.Size([5196])
X_v1: torch.Size([4157, 20])
T_v1: torch.Size([4157])
Y_v1: torch.Size([4157])
Y1_v1: torch.Size([4157])
Y0_v1: torch.Size([4157])

Feature Matrix (X):
  V1 shape: torch.Size([4157, 20]), V2 shape: torch.Size([4157, 20])
  V1 dtype: torch.float32, V2 dtype: torch.float32
  ✅ VALUES MATCH!

Treatment (T):
  V1 shape: torch.Size([4157]), V2 shape: torch.Size([4157])
  V1 dtype: torch.int64, V2 dtype: torch.int32
  ⚠️  Different dtypes, converting for comparison
  ✅ VALUES MATCH!

Outcome (Y):
  V1 shape: torch.Size([4157]), V2 shape: torch.Size([4157])
  V1 dtype: torch.float32, V2 dtype: torch.float32
  ✅ VALUES MATCH!

Potential Outcome Y1:
  V1 shape: torch.Size([4157]), V2 shape: torch.Size([4157])
  V1 dtype: torch.float32, V2 dtype: torch.float32
  ✅ VALUES MATCH!

Potential Outcome Y0:
  V1 shape

# Incomplete Data

In [3]:
from train_policy import prepare_data, filter_and_remap_edges, filter_dataset, extract_arrays_from_df, create_edge_weights_from_frequency
import torch
import numpy as np

# Your existing code
dataset_path = '/Users/jason/Documents/Coding Projects/2025_Claude/GCN_policy_ma/datasets/exps/Syn/grape/p=0.1_k=8_seed=532.pt'

# Version 2 results
data = torch.load(dataset_path, map_location='cpu', weights_only=False)
mask = data.train_mask | data.val_mask
data1 = filter_dataset(data, mask)

# Version 1 results
X, A, T, Y, Y1, Y0, idx_train, idx_val, idx_test = prepare_data(dataset_path)
mask = idx_train | idx_val
X_v1, T_v1, Y_v1, Y1_v1, Y0_v1 = X[mask], T[mask], Y[mask], Y1[mask], Y0[mask]
idx_train_v1, idx_val_v1 = idx_train[mask], idx_val[mask]
A_v1, _ = filter_and_remap_edges(A, mask)


shape_info(**{"X_v1":X_v1, 
 "T_v1":T_v1, 
 "Y_v1":Y_v1, 
 "Y1_v1":Y1_v1, 
 "Y0_v1":Y0_v1})

T_v2, Y_v2, Y1_v2, Y0_v2= data1.treatment, data1.outcome, torch.tensor(data1.arr_Y1, dtype=torch.float), torch.tensor(data1.arr_Y0, dtype=torch.float)

shape_info(**{
 "T_v1":T_v2, 
 "Y_v1":Y_v2, 
 "Y1_v1":Y1_v2, 
 "Y0_v1":Y0_v2})
# X_v2 = data1.edge_attr[:data1.edge_attr.shape[0]//2].view((data1.n_units, data1.n_attrs))

idx_train_v2 = data1.train_mask
idx_val_v2 = data1.val_mask

t_mask, y_mask = data.treatment_mask[mask], data.outcome_mask[mask]

results = {}
# results['X'] = safe_equal_check(X_v1, X_v2, "Feature Matrix (X)")
results['T'] = safe_equal_check(T_v1[t_mask], T_v2[t_mask], "Treatment (T)")
results['Y'] = safe_equal_check(Y_v1[y_mask], Y_v2[y_mask], "Outcome (Y)")
results['Y1'] = safe_equal_check(Y1_v1, Y1_v2, "Potential Outcome Y1")
results['Y0'] = safe_equal_check(Y0_v1, Y0_v2, "Potential Outcome Y0")
results['train_mask'] = safe_equal_check(idx_train_v1, idx_train_v2, "Training Mask")
results['val_mask'] = safe_equal_check(idx_val_v1, idx_val_v2, "Validation Mask")

print(results)


imputed version!!!!
train complete 2999	validating complete 1002
torch.Size([5000, 20]) torch.Size([5000]) torch.Size([5000]) torch.Size([5000]) torch.Size([2, 2362522]) torch.Size([5000])
X_v1: torch.Size([4001, 20])
T_v1: torch.Size([4001])
Y_v1: torch.Size([4001])
Y1_v1: torch.Size([4001])
Y0_v1: torch.Size([4001])
T_v1: torch.Size([4001])
Y_v1: torch.Size([4001])
Y1_v1: torch.Size([4001])
Y0_v1: torch.Size([4001])

Treatment (T):
  V1 shape: torch.Size([3612]), V2 shape: torch.Size([3612])
  V1 dtype: torch.int64, V2 dtype: torch.int32
  ⚠️  Different dtypes, converting for comparison
  ✅ VALUES MATCH!

Outcome (Y):
  V1 shape: torch.Size([3609]), V2 shape: torch.Size([3609])
  V1 dtype: torch.float32, V2 dtype: torch.float32
  ✅ VALUES MATCH!

Potential Outcome Y1:
  V1 shape: torch.Size([4001]), V2 shape: torch.Size([4001])
  V1 dtype: torch.float32, V2 dtype: torch.float32
  ✅ VALUES MATCH!

Potential Outcome Y0:
  V1 shape: torch.Size([4001]), V2 shape: torch.Size([4001])
  V1 