# Synthetic Validations

## Setup Environment

In [3]:
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp

## Reasonable Validation

In [4]:
def gen_reasonable_validation(confidence):
    alpha = tfp.distributions.Normal(loc=0, scale=0.1).sample(confidence.shape)
    beta = tfp.distributions.Normal(loc=-3, scale=0.01).sample(confidence.shape)
    
    logistic_p = 1./(1.+tf.exp(beta * confidence + alpha))
    validation = tfp.distributions.Bernoulli(probs=logistic_p).sample()
    
    return validation
    

In [10]:
gen_reasonable_validation(tf.constant([1.]*10)).numpy()

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [14]:
gen_reasonable_validation(tf.constant([0.5]*10)).numpy()

array([1, 1, 1, 1, 1, 0, 1, 1, 0, 0], dtype=int32)

In [22]:
gen_reasonable_validation(tf.constant([0.]*10)).numpy()

array([0, 0, 1, 0, 0, 0, 1, 1, 1, 0], dtype=int32)

## Random Validation

In [23]:
def gen_random_validation(confidence):
    validation = tfp.distributions.Binomial(
        total_count=1,
        probs=.5,
    ).sample(confidence.shape)
    
    return validation

In [24]:
gen_random_validation(tf.constant([1.]*10)).numpy()

array([1., 1., 1., 1., 1., 1., 1., 1., 0., 1.], dtype=float32)

In [25]:
gen_random_validation(tf.constant([0.5]*10)).numpy()

array([0., 0., 0., 1., 1., 1., 0., 1., 0., 1.], dtype=float32)

In [26]:
gen_random_validation(tf.constant([0.]*10)).numpy()

array([1., 1., 1., 0., 1., 0., 0., 1., 0., 1.], dtype=float32)

## Generate Data

In [27]:
NUM_SAMPLES = 1000

In [28]:
uniform = tfp.distributions.Uniform(
    low=0.,
    high=1.,
)

In [29]:
reasonable_confidence = uniform.sample(NUM_SAMPLES)
reasonable_validation = gen_reasonable_validation(reasonable_confidence)

print('Confidence:\n', reasonable_confidence.numpy()[:10])
print('Validation:\n', reasonable_validation.numpy()[:10])

Confidence:
 [0.11102808 0.5427574  0.17898357 0.8609053  0.6965505  0.07935023
 0.36490715 0.15408278 0.9153838  0.68575895]
Validation:
 [0 1 1 1 1 1 1 1 1 1]


In [30]:
random_confidence = uniform.sample(NUM_SAMPLES)
random_validation = gen_random_validation(random_confidence)

print('Confidence:\n', random_confidence.numpy()[:10])
print('Validation:\n', random_validation.numpy()[:10])

Confidence:
 [0.9305465  0.10123158 0.3935758  0.51589155 0.48255086 0.25689507
 0.82058    0.24733055 0.7059243  0.03914046]
Validation:
 [0. 0. 1. 1. 0. 1. 0. 1. 0. 1.]


### Write output

In [31]:
reasonable_data = pd.DataFrame(
    {
        "confidence": reasonable_confidence,
        "validation": reasonable_validation,
    },
)
reasonable_data

Unnamed: 0,confidence,validation
0,0.111028,0
1,0.542757,1
2,0.178984,1
3,0.860905,1
4,0.696550,1
...,...,...
995,0.276820,1
996,0.175565,0
997,0.323033,1
998,0.715153,1


In [32]:
random_data = pd.DataFrame(
    {
        "confidence": random_confidence,
        "validation": random_validation,
    },
)
random_data

Unnamed: 0,confidence,validation
0,0.930547,0.0
1,0.101232,0.0
2,0.393576,1.0
3,0.515892,1.0
4,0.482551,0.0
...,...,...
995,0.343078,1.0
996,0.117733,1.0
997,0.862322,1.0
998,0.969050,0.0


In [33]:
reasonable_data.to_csv("validations/reasonable.csv", index=False)
random_data.to_csv("validations/random.csv", index=False)