In [1]:
import sys
sys.path.append('../')
from aqua.data.preset_dataloaders import load_credit_fraud, load_adult
from aqua.evaluation.uniform_noise import UniformNoise
from aqua.configs import data_configs
from sklearn.metrics import confusion_matrix
import numpy as np

ImportError: libffi.so.7: cannot open shared object file: No such file or directory

In [8]:
n = 10000 # Number of data points
k = 5 # Number of classes
f = 64 # Number of features 
noise_rate = 0.2

X = np.random.random((n, f))
y = np.argmax(np.random.multinomial(n=1, size=n, pvals=k*[1/k]), axis=1)

In [5]:
# Testing script

# Generate fake data
n = 1000 # Number of data points
k = 2 # Number of classes
f = 25 # Number of features 
noise_rate = 0.2

X = np.random.random((n, f))
y = np.zeros(n)
y[:n//2] = 1 

# Noise injection
noise_obj = UniformNoise(n_classes=k, noise_rate=noise_rate)
print('Noise transition matrix:\n', noise_obj.noise_transition_matrix)

# Add noise
noisy_X, noisy_y = noise_obj.add_noise(X=X, y=y)

assert np.allclose(X, noisy_X), "Features should not change"

estimated_noise_rate = noise_obj.estimate_noise_rate(y=y, noisy_y=noisy_y)
print(f'Estimated noise rate: {estimated_noise_rate}')
assert np.abs(estimated_noise_rate - noise_rate) < 0.05, "Added and estimate noise rate should be close"

empirical_noise_transition_matrix = noise_obj.estimate_noise_transition_matrix(y=y.astype(int), noisy_y=noisy_y)
print(f'Estimated noise transition matrix:\n {empirical_noise_transition_matrix}')
assert np.allclose(empirical_noise_transition_matrix , noise_obj.noise_transition_matrix, atol=0.05), "Added and estimated noise transition matrices should be close"

Noise transition matrix:
 [[0.8 0.2]
 [0.2 0.8]]
Estimated noise rate: 0.177
Estimated noise transition matrix:
 [[0.81481481 0.18518519]
 [0.16837782 0.83162218]]




In [24]:
train_data, test_data = load_adult(cfg=data_configs['adult'])
n_classes = data_configs['adult']['out_classes']

y = np.zeros(len(train_data.labels))
y[:len(train_data.labels)//2] = 1

In [26]:
noise_obj = UniformNoise(n_classes=n_classes, noise_rate=0.2)

In [27]:
noise_obj.noise_transition_matrix

array([[0.8, 0.2],
       [0.2, 0.8]])

In [28]:
noisy_X, noisy_y = noise_obj.add_noise(X=train_data.data, y=y)



In [29]:
np.allclose(train_data.data, noisy_X)

True

In [30]:
confusion_matrix(y_true=y, y_pred=noisy_y)

array([[12948,  3333],
       [ 3351, 12929]])

In [32]:
noise_obj.estimate_noise_rate(y=y, noisy_y=noisy_y)

20.52762507294002

In [33]:
noise_or_not = (train_data.labels != noisy_y).astype(int)

In [38]:
empirical_noise_transition_matrix = noise_obj.estimate_noise_transition_matrix(y=y.astype(int), noisy_y=noisy_y)

In [39]:
np.around(empirical_noise_transition_matrix, 3)

array([[0.794, 0.206],
       [0.205, 0.795]])