In [31]:
import importlib
import src.utils
import src.models
import src.counterfactual

importlib.reload(src.utils)
importlib.reload(src.models)
importlib.reload(src.counterfactual)

from src.utils import load_data, load_model, DatasetMetadata, clean_instance
from src.counterfactual import newton_op, distance
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sympy as sp
# str to sympy
from sympy.parsing.sympy_parser import parse_expr

from torch.utils.data import DataLoader
from src.models import LogisticModel
import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# device = device if not torch.backends.mps.is_available() else torch.device("mps")


In [32]:
class State:
    def __init__(self, model, metadata, max_epochs, dx_scaled, mean_scaled, upd_weights):
        self.model: LogisticModel = model
        self.metadata: DatasetMetadata = metadata
        self.dx_scaled: torch.Tensor = dx_scaled
        self.mean_scaled: torch.Tensor = mean_scaled
        self.epochs: int = 0
        self.max_epochs: int = max_epochs
        self.upd_weights: torch.Tensor = upd_weights # columns to be updated
        self.apply_reg = False # When to apply integer regularization
        self.reg_vars = False # When to apply nº variables regularization


In [33]:
def unscale_instance(instance: torch.Tensor, metadata: DatasetMetadata, inplace: bool = False):
    cols_to_unscale = instance[metadata.cols_for_scaler].reshape(1, -1)
    mean = torch.tensor(metadata.scaler.mean_)
    std = torch.tensor(metadata.scaler.scale_)
    unscaled_cols = cols_to_unscale * std + mean
    if inplace:
        instance[metadata.cols_for_scaler] = torch.tensor(unscaled_cols, dtype=torch.float32).to(device)
        return instance
    else:
        instance_clone = instance.clone()
        instance_clone[metadata.cols_for_scaler] = torch.tensor(unscaled_cols, dtype=torch.float32).to(device)
        return instance_clone
    
def scale_instance(instance: torch.Tensor, metadata: DatasetMetadata, inplace: bool = False):
    cols_to_scale = instance[metadata.cols_for_scaler].reshape(1, -1)
    mean = torch.tensor(metadata.scaler.mean_)
    std = torch.tensor(metadata.scaler.scale_)
    scaled_cols = (cols_to_scale - mean) / std
    if inplace:
        instance[metadata.cols_for_scaler] = torch.tensor(scaled_cols, dtype=torch.float32).to(device)
        return instance
    else:
        instance_clone = instance.clone()
        instance_clone[metadata.cols_for_scaler] = torch.tensor(scaled_cols, dtype=torch.float32).to(device)
        return instance_clone
    
def round_instance(instance: torch.Tensor, metadata: DatasetMetadata):
    unscaled_person = unscale_instance(instance, metadata)
    unscaled_person[metadata.int_cols == 1] = torch.round(unscaled_person[metadata.int_cols == 1])
    person_new = scale_instance(unscaled_person, metadata)
    return person_new


def unscale_batch(batch: torch.Tensor, metadata: DatasetMetadata, inplace: bool = False):
    cols_to_unscale = torch.tensor(batch[:, metadata.cols_for_scaler], dtype=torch.float32)
    mean = torch.tensor(metadata.scaler.mean_, dtype=torch.float32)
    std = torch.tensor(metadata.scaler.scale_, dtype=torch.float32)
    unscaled_cols = cols_to_unscale * std + mean
    if inplace:
        batch[:, metadata.cols_for_scaler] = torch.tensor(unscaled_cols, dtype=torch.float32).to(device)
        return batch
    else:
        batch_clone = batch.clone()
        batch_clone[:, metadata.cols_for_scaler] = torch.tensor(unscaled_cols, dtype=torch.float32).to(device)
        return batch_clone
    
def scale_batch(batch: torch.Tensor, metadata: DatasetMetadata, inplace: bool = False):
    cols_to_scale = torch.tensor(batch[:, metadata.cols_for_scaler], dtype=torch.float32)
    mean = torch.tensor(metadata.scaler.mean_, dtype=torch.float32)
    std = torch.tensor(metadata.scaler.scale_, dtype=torch.float32)
    scaled_cols = (cols_to_scale - mean) / std
    if inplace:
        batch[:, metadata.cols_for_scaler] = torch.tensor(scaled_cols, dtype=torch.float32).to(device)
        return batch
    else:
        batch_clone = batch.clone()
        batch_clone[:, metadata.cols_for_scaler] = torch.tensor(scaled_cols, dtype=torch.float32).to(device)
        return batch_clone
    
def round_batch(batch: torch.Tensor, metadata: DatasetMetadata):
    unscaled_person = unscale_batch(batch, metadata)
    unscaled_person[metadata.int_cols == 1] = torch.round(unscaled_person[metadata.int_cols == 1])
    person_new = scale_batch(unscaled_person, metadata)
    return person_new

In [34]:
filename = 'data/Loan_default.csv'
model_name = "model_small"
model_dict = "models/"+model_name+".pth"

In [35]:
# load the model
test_data: DataLoader
_, _, test_data, _, metadata = load_data(filename, batch_size=1024)

inputs = next(iter(test_data))[0].to(torch.float32).to(device)

# define model
model = load_model(model_name).to(torch.float32).to(device)

torch.save(model.state_dict(), model_dict)


## Extract model equation

In [6]:
# import sympy as sp
# import torch

# def extract_symbolic_equation(model: torch.nn.Module, instance: torch.Tensor):
#     """
#     Extracts a symbolic equation from a trained PyTorch model.
#     Assumes a feedforward structure with linear layers and activations.
#     """
#     # Define symbolic variables for input features
#     x2, x3 = sp.symbols('x2 x3')  # Inputs
#     # constants = sp.symbols(f'c1:{model.input_dim + 1}')  # Constants for other features
    
#     # Build input vector with constants
#     x = [instance[i].item() if i not in [1, 2] else (x2 if i == 1 else x3) for i in range(model.input_dim)]
    
#     # Convert to a sympy matrix
#     X = sp.Matrix(x)
#     activations = []

#     # Iterate over layers
#     for layer in model.layers:
#         if isinstance(layer, torch.nn.Linear):
#             W = sp.Matrix(layer.weight.detach().numpy())  # Extract weight matrix
#             b = sp.Matrix(layer.bias.detach().numpy())    # Extract bias
#             X = W * X + b  # Apply linear transformation
#         elif isinstance(layer, torch.nn.ReLU):
#             activations.append(X)
#             X = X.applyfunc(lambda val: sp.Max(0, val))  # ReLU activation
#         elif isinstance(layer, torch.nn.Sigmoid):
#             activations.append(X)
#             X = X.applyfunc(lambda val: 1 / (1 + sp.exp(-val)))  # Sigmoid activation
#         # X.subs({sp.symbols(f'c{i+1}'): val for i, val in enumerate(inputs[0]) if i != 1 and i != 2})
#         print("Done: ", layer)
#         # print(X)


#     # Apply softmax at the end
#     denominator = sp.Add(*(sp.exp(e) for e in X))
#     softmax_expr = sp.Matrix([sp.exp(e) / denominator for e in X])

#     return softmax_expr, activations # .simplify()

# # Example usage
# model_sym = LogisticModel(inputs.shape[1], hidden_sizes=[16, 8])
# model_sym.load_state_dict(torch.load(model_dict))  # Load trained weights
# symbolic_eq, activations = extract_symbolic_equation(model_sym, inputs[0])
# model_eq = symbolic_eq[0]
# print(symbolic_eq)


In [7]:
# # with open('small.txt', 'w') as f:
# #     f.write(str(symbolic_eq))
# with open('small.txt', 'r') as f:
#     symbolic_eq1 = f.read()
#     symbolic_eq1 = parse_expr(symbolic_eq1)

## Training

In [38]:

person: torch.Tensor = inputs[0].to(torch.float32).to(device)
outputs = model(inputs).argmax(dim=1)
inputs_useful = inputs[outputs == 1]
# metadata.cols_for_mask = [False] * len(metadata.cols_for_mask)
# metadata.cols_for_mask[1] = True
# metadata.cols_for_mask[2] = True
# metadata.cols_for_mask[3] = True
# metadata.cols_for_mask[4] = True
# metadata.cols_for_mask[5] = True
# metadata.cols_for_mask[6] = True
# metadata.cols_for_mask[7] = True
# metadata.cols_for_mask[8] = True

weights = torch.tensor(metadata.cols_for_mask, dtype=torch.float32).to(device)


In [39]:
person = inputs_useful[0].to(torch.float32).to(device)
# weights = torch.tensor([0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.int).to(device)

In [40]:
class Checks:
    def __init__(self, model: LogisticModel, metadata: DatasetMetadata, reg_int: bool = False, reg_clamp: bool = False, noise: float = 1e-3, n_points: int = 10000, noise_int: float = 2.5):
        self.model = model
        self.metadata = metadata
        self.reg_int = reg_int
        self.reg_clamp = reg_clamp
        self.noise = noise
        self.n_points = n_points
        self.noise_int = noise_int
        self.distance_threshold = 1e-4
        self.sorted_points = None
        self.model.eval()


    def __call__(self, person: torch.Tensor, person_new: torch.Tensor, weights: torch.Tensor):
        """
        Check if the new person is a valid counterfactual.
        """
        # Check if the new person is a valid counterfactual
        if not self.validity_check(person, person_new):
            print("The new person is not a valid counterfactual.")
            return False
        
        # Check if the new person is plausible
        if self.reg_clamp and not self.plausibility_check(person_new):
            print("The new person is not plausible.")
            return False

        # Check if the new person is minimal
        if self.reg_int:
            valid, sorted_points = self.integer_minimality_check(person, person_new, weights)
            if not valid:
                print("The new person is not integer minimal.", "The length of the sorted points is: ", len(sorted_points))
                self.sorted_points = sorted_points
                return False

        else:
            valid, sorted_points = self.minimality_check(person, person_new, weights)
            if not valid:
                print("The new person is not minimal.", "The length of the sorted points is: ", len(sorted_points))
                self.sorted_points = sorted_points
                return False
            
        return True


    def minimality_check(
        self,
        person: torch.Tensor,
        person_new: torch.Tensor,
        weights: torch.Tensor,
        ):
        points = torch.tensor(
            np.random.uniform(-self.noise, self.noise, (self.n_points, person_new.shape[0]))
            * weights.numpy()
            + person_new.detach().cpu().numpy().reshape(-1),
            dtype=torch.float32,
        ).to(device)
        points = (
            torch.clamp(points, self.metadata.min_values, self.metadata.max_values)
            if self.reg_clamp
            else points
        )
        outputs = model(points)
        # pandas dataset
        b = pd.DataFrame(points, columns=self.metadata.columns)
        b["output"] = torch.argmax(outputs, dim=1).detach().cpu().numpy()
        distances = torch.tensor([distance(person, p, weights) for p in points])
        b["distance"] = distances.detach().cpu().numpy()

        d = distance(person, person_new, weights).item()
        sorted_b = b[b["distance"] <= d][b["output"] == 0].sort_values(by="distance")
        # sorted_b = sorted_b[(sorted_b["distance"] - d) < self.distance_threshold]
        return len(sorted_b) == 0, sorted_b

    def integer_minimality_check(
        self,
        person: torch.Tensor,
        person_new_int: torch.Tensor,
        weights: torch.Tensor,
    ):
        w = ((weights != 0) & ~self.metadata.int_cols) * weights
        noise_tensor = np.random.uniform(
            -self.noise, self.noise, (self.n_points, person_new_int.shape[0])
        ) * w.numpy()
        rounded_noise_tensor = np.random.randint(
            -self.noise_int, self.noise_int, (self.n_points, person_new_int.shape[0])
        ) * ((weights != 0) & self.metadata.int_cols) * weights

        points = scale_batch(
            torch.tensor(
                noise_tensor
                + rounded_noise_tensor
                + unscale_instance(person_new_int, self.metadata).detach().cpu().numpy().reshape(-1),
                dtype=torch.float32,
            ),
            self.metadata,
        ).to(device)
        
        points = (
            torch.clamp(points, self.metadata.min_values, self.metadata.max_values)
            if self.reg_clamp
            else points
        )

        outputs = model(points)

        points_unscaled = unscale_batch(points, self.metadata)
        b = pd.DataFrame(points_unscaled, columns=self.metadata.columns)

        # add person_new_int to the dataframe
        b["output"] = torch.argmax(outputs, dim=1).detach().cpu().numpy()

        distances = torch.tensor([distance(person, p, weights) for p in points])
        b["distance"] = distances.detach().cpu().numpy()

        d = distance(person, person_new_int, ((weights != 0) & ~self.metadata.int_cols) * weights).item()
        sorted_b = b[b["distance"] < d][b["output"] == 0].sort_values(by="distance")

        return len(sorted_b) == 0, sorted_b
    
    def validity_check(
      self,
      person: torch.Tensor,
      person_new: torch.Tensor,
    ):
        return (
            (self.model(person_new.unsqueeze(0))[0][0].item() >= 0.5)
            != (self.model(person.unsqueeze(0))[0][0].item() >= 0.5)
        )
    
    def plausibility_check(
        self,
        person_new: torch.Tensor,
    ):
        # Check if the new person is plausible
        return (
            torch.clamp(
                person_new,
                self.metadata.min_values,
                self.metadata.max_values,
            )
            == person_new
        ).all().item()
            

## Global check

In [11]:
# import src.counterfactual

# importlib.reload(src.counterfactual)
# from src.counterfactual import newton_op, distance
# person = inputs_useful[0]
# p_new, state_p = newton_op(model, person, metadata, weights, 0.1, reg_int=False, reg_vars=False, reg_clamp=True, print_=False)
# torch.manual_seed(torch.randint(0, 100, (1,)).item())
# n = 5
# num_points = 10000
# num_linspace = 5000
# indexes = torch.nonzero(weights).reshape(-1)
# print(indexes)
# sampled_indexes = torch.tensor([6, 8]) # indexes #[torch.randint(0, len(indexes), (n,))]
# print(sampled_indexes)

# for sample_var in sampled_indexes:
#     w = weights.clone()
#     w[sample_var] = 0
#     print("Sampled variable:", sample_var.item())
#     x = p_new.repeat(num_points*num_linspace, 1)
#     print("points repeated")
#     # print("x:", x[:, w != 0])

#     # print(x)
#     x[:, w != 0] = (torch.distributions.uniform.Uniform(metadata.min_values, metadata.max_values).sample((num_points,)) * w)[:, w != 0].repeat(num_linspace, 1)
#     print("points generated")

#     x[:, sample_var] = torch.linspace(metadata.min_values[sample_var], metadata.max_values[sample_var], num_linspace).repeat(num_points)
#     print("linspace generated")

#     x = x[model(x)[:, 0] > metadata.threshold]
#     print("model filtered")

#     # calculate the distance
#     dists = distance(person, x, weights, state_p, with_sum=False)
#     print(torch.min(dists), distance(person, p_new, weights, state_p))
#     x = x[dists < distance(person, p_new, weights, state_p)]
#     print("distance filtered")
#     print(len(x))

## Trials

### Only 1 person

In [49]:
import src.counterfactual

importlib.reload(src.counterfactual)
from src.counterfactual import newton_op

reg_int=False
reg_clamp=True

metadata.threshold = 0.5 + 1e-5
person = inputs_useful[48]
p_new, state_p = newton_op(model, person, metadata, weights, 0.2, reg_int=reg_int, reg_clamp=reg_clamp, print_=True)
check = Checks(model, metadata, reg_int=reg_int, reg_clamp=reg_clamp)
valid = check(person, p_new, weights)
print("Valid:", valid)
display(pd.DataFrame([unscale_instance(p_new, metadata).detach().numpy()], columns=metadata.columns))
display(check.sorted_points)
print(model(p_new.unsqueeze(0))[0][0].item(), model(p_new.unsqueeze(0)).argmax(dim=1))
if check.sorted_points is not None:
    minimal = torch.tensor(check.sorted_points.iloc[0, :-2].values).float()
    print(distance(person, p_new, weights).item(), distance(person, minimal, weights).item())
    display(pd.DataFrame(unscale_batch(torch.tensor(check.sorted_points.to_numpy()[:, :-2]).float(), metadata), columns=metadata.columns))

Epoch: 0
Using subhessian: tensor([-2.4860e-03, -7.7670e-04, -2.3549e-03,  6.9448e-04,  3.1606e-03,
         8.6982e-05,  5.2141e-04, -8.0432e-04, -1.2386e-03],
       grad_fn=<MvBackward0>)
Subhessiana
dist: 3.5420844142208807e-06 , threshold: 0.3744104206562042
Changes:  delta1: -0.0009308687294833362  delta_l: 0.0
Epoch: 1
Using subhessian: tensor([-1.5551e-03, -4.8583e-04, -1.4731e-03,  4.3443e-04,  1.9772e-03,
         5.4439e-05,  3.2615e-04, -5.0315e-04, -7.7485e-04],
       grad_fn=<MvBackward0>)
Subhessiana
dist: 9.358929673908278e-06 , threshold: 0.37438660860061646
Changes:  delta1: -0.0005822451203130186  delta_l: 0.0
Epoch: 2
Using subhessian: tensor([-9.7289e-04, -3.0393e-04, -9.2158e-04,  2.7177e-04,  1.2369e-03,
         3.4054e-05,  2.0403e-04, -3.1477e-04, -4.8475e-04],
       grad_fn=<MvBackward0>)
Subhessiana
dist: 1.4406736227101646e-05 , threshold: 0.3743717670440674
Changes:  delta1: -0.0003642385418061167  delta_l: 0.0
Epoch: 3
Using subhessian: tensor([-6.0871e

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education_High School,...,EmploymentType_Unemployed,MaritalStatus_Married,MaritalStatus_Single,HasMortgage_Yes,HasDependents_Yes,LoanPurpose_Business,LoanPurpose_Education,LoanPurpose_Home,LoanPurpose_Other,HasCoSigner_Yes
0,19.0,28432.857422,200096.0,379.123413,34.081585,2.999224,17.339025,59.998528,0.27988,0.0,...,0.0,0.0,1.0,0.000804,0.001239,0.0,0.0,0.0,1.0,0.0


None

0.1256631314754486 tensor([1])


### 1 person, different weights


In [37]:
import src.counterfactual

importlib.reload(src.counterfactual)
from src.counterfactual import newton_op

reg_int=True
reg_clamp=False
check = Checks(model, metadata, reg_int=reg_int, reg_clamp=reg_clamp)

person = inputs_useful[1]

weights1 = weights.clone()
print(weights1)
p_new1, state_p = newton_op(model, person, metadata, weights1, 0.2, reg_int=reg_int, reg_clamp=reg_clamp, print_=True)
valid1 = check(person, p_new, weights)
print("Valid1:", valid1)

weights2 = weights.clone()
weights2[1] = 2
print(weights2)
p_new2, state_p = newton_op(model, person, metadata, weights2, 0.2, reg_int=reg_int, reg_clamp=reg_clamp, print_=True)
valid2 = check(person, p_new, weights)
print("Valid2:", valid2)

display(pd.DataFrame([unscale_instance(person, metadata).detach().numpy(), unscale_instance(p_new1, metadata).detach().numpy(), unscale_instance(p_new2, metadata).detach().numpy()], columns=metadata.columns))
# display(check.sorted_points)
# print(model(p_new.unsqueeze(0))[0][0].item(), model(p_new.unsqueeze(0)).argmax(dim=1))
# if check.sorted_points is not None:
#     minimal = torch.tensor(check.sorted_points.iloc[0, :-2].values).float()
#     print(distance(person, p_new, weights).item(), distance(person, minimal, weights).item())
#     display(pd.DataFrame(unscale_batch(torch.tensor(check.sorted_points.to_numpy()[:, :-2]).float(), metadata), columns=metadata.columns))

tensor([0., 1., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.])
Epoch: 0
Jacobian is too small, using gradient descent: 0.12495197355747223
dist: 0.08864012360572815 , threshold: 0.25044816732406616
Changes:  delta1: -0.1524519920349121  delta_l: 0.0
Epoch: 1
Jacobian is too small, using gradient descent: 0.19856710731983185
dist: 0.300493448972702 , threshold: 0.18970298767089844
Changes:  delta1: -0.12824369966983795  delta_l: 0.0
Epoch: 2
Jacobian is too small, using gradient descent: 0.2916100323200226
dist: 0.5444599986076355 , threshold: 0.12619057297706604
Changes:  delta1: -0.09718967974185944  delta_l: 0.0
Epoch: 3
dist: 1.1451679468154907 , threshold: -0.025415658950805664
Changes:  delta1: -0.1708526760339737  delta_l: -4.92734432220459
Epoch: 4
dist: 26.333782196044922 , threshold: -0.00034743547439575195
Changes:  delta1: 4.4576592017619987e-07  delta_l: 1.4858976602554321
Epoch: 1
dist: 14.000776290893555 , threshold: -2.26

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education_High School,...,EmploymentType_Unemployed,MaritalStatus_Married,MaritalStatus_Single,HasMortgage_Yes,HasDependents_Yes,LoanPurpose_Business,LoanPurpose_Education,LoanPurpose_Home,LoanPurpose_Other,HasCoSigner_Yes
0,19.0,29467.0,151769.0,606.0,33.0,1.0,6.63,24.0,0.48,1.0,...,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
1,19.0,50847.0,151769.0,635.0,51.0,1.0,2.167314,24.0,0.458714,1.0,...,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,19.0,41773.0,151769.0,640.0,54.0,1.0,1.461189,24.0,0.455444,1.0,...,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0


### 1 person, different flags (clamp, int)

In [101]:
import src.counterfactual

importlib.reload(src.counterfactual)
from src.counterfactual import newton_op, distance, unscale_instance, scale_instance
person = inputs_useful[47]
person_new, state = newton_op(model, person, metadata, weights, 0.1, print_=True)
person_new_clamp, _ = newton_op(model, person, metadata, weights, 0.1, reg_clamp=True, print_=True)
person_new_int, _ = newton_op(model, person, metadata, weights, reg_int=True, print_=True)
person_new_clamp_int, _ = newton_op(model, person, metadata, weights, 0.1, reg_int=True, reg_clamp=True, print_=True)
# person_new_vars, _ = newton_op(model, person, metadata, weights, reg_vars=True, print_=True)
# person_new_int_vars, _ = newton_op(model, person, metadata, weights, 0.1, reg_int=True, reg_vars=True, print_=True)

names = ['person', 'person_new', 'person_new_clamp','person_new_int', 'person_new_clamp_int']
ps = [eval(i) for i in names]
outputs = [model(p.unsqueeze(0))[0][0].item() for p in ps]

distances = [distance(person, p, weights, state=state).item() for p in ps]

a = pd.DataFrame([unscale_instance(x, metadata).detach().cpu().numpy().reshape(-1) for x in ps], columns=metadata.columns)
a['output'] = outputs
a['distance'] = distances
# set index
a['names'] = names
a = a.set_index('names')
print(a.columns)
a

Epoch: 0
dist: 0.007501005195081234 , threshold: -0.0014559626579284668
Changes:  delta1: -0.044722311198711395  delta_l: 0.16280853748321533
Epoch: 1
dist: 0.007003797683864832 , threshold: -1.430511474609375e-06
Changes:  delta1: 0.0015100843738764524  delta_l: -0.0028347671031951904
Epoch: 2
dist: 0.007003319915384054 , threshold: 2.9802322387695312e-08
Changes:  delta1: 1.4378665582626127e-06  delta_l: 2.1329742594389245e-06
Epoch: 3
dist: 0.007003319915384054 , threshold: 2.9802322387695312e-08
Changes:  delta1: -3.407927451348769e-08  delta_l: -3.6754300936081563e-07
Epoch: 4
dist: 0.007003319915384054 , threshold: 2.9802322387695312e-08
Changes:  delta1: -3.268034021175481e-08  delta_l: -8.762540915085992e-09
Epoch: 5
dist: 0.007003319915384054 , threshold: 2.9802322387695312e-08
Changes:  delta1: -3.268034021175481e-08  delta_l: -8.762540915085992e-09
Epoch: 6
dist: 0.007003319915384054 , threshold: 2.9802322387695312e-08
Changes:  delta1: -3.268034021175481e-08  delta_l: -8.76

Unnamed: 0_level_0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education_High School,...,MaritalStatus_Single,HasMortgage_Yes,HasDependents_Yes,LoanPurpose_Business,LoanPurpose_Education,LoanPurpose_Home,LoanPurpose_Other,HasCoSigner_Yes,output,distance
names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
person,54.0,34432.0,209972.0,820.0,92.0,4.0,21.84,60.0,0.89,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.459618,0.0
person_new,54.0,36115.617188,209972.0,822.309204,93.438438,3.98668,21.476183,59.976562,0.888343,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.007003
person_new_clamp,54.0,36115.617188,209972.0,822.309204,93.438438,3.98668,21.476183,59.976562,0.888343,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.007003
person_new_int,54.0,36116.0,209972.0,822.0,93.0,4.0,21.393187,60.0,0.887965,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.00747
person_new_clamp_int,54.0,36116.0,209972.0,822.0,93.0,4.0,21.393187,60.0,0.887965,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.00747


### Batch

In [47]:
import src.counterfactual
importlib.reload(src.counterfactual)
from src.counterfactual import newton_op, distance, minimality_check, integer_minimality_check

reg_int = False
reg_clamp = True


successes = 0
epochs = 0
bad_idxs = []
total = 0
check = Checks(model, metadata, reg_int=reg_int, reg_clamp=reg_clamp)
for idx, p in enumerate(inputs_useful):
    print("Person:", idx)
    p_new, ep = newton_op(model, p, metadata, weights, 0.2, reg_int=reg_int, reg_clamp=reg_clamp)
    # TODO: poner la minimalidad
    valid = check(p, p_new, weights)
    successes += valid # and (((state_p.metadata.max_values < p_new) | (state_p.metadata.min_values > p_new)).sum() == 0))
    # print("Person:", idx, "Rate of grad desc:",minimality_check(p, p_new, weights, ep, model))
    epochs += ep.epochs
    total += 1
    if not valid:
        bad_idxs.append(idx)
        # print(idx, valid)
print("Successes:", successes, "Total:", total)
print("Average epochs:", epochs / total)
print("Success rate:", successes / total)

Person: 0
The new person is not minimal. The length of the sorted points is:  38
Person: 1
The new person is not minimal. The length of the sorted points is:  34
Person: 2
The new person is not minimal. The length of the sorted points is:  55
Person: 3
Person: 4
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhessiana
Subhe

KeyboardInterrupt: 

### All batches

In [189]:
# successes = 0
# bad_idxs = []
# total = 0
# for i, inputs in enumerate(test_data):
#     print(i, end='\r')
#     outputs = model(inputs[0]).argmax(dim=1)
#     inputs_useful = inputs[0][outputs == 1]
#     for idx, p in enumerate(inputs_useful):
#         _, ep = newton_op(model, p, weights, 0.1) #if idx not in [103, 105, 237, 406, 417, 450] else None
#         # print("Person:", idx, "Success:", not ep)
#         successes += ep
#         total += 1
#         # if not ep:
#         #     bad_idxs.append(idx)
#     print(successes/total)
# print("Successes:", successes, "Total:", total)
# print("Success rate:", successes / total)