# Synthetic Validations

## Setup Environment

In [1]:
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp

## Reasonable Validation

In [2]:
def gen_reasonable_validation(confidence):
    alpha = tfp.distributions.Normal(loc=0, scale=1).sample(confidence.shape)
    beta = tfp.distributions.Normal(loc=-3, scale=0.1).sample(confidence.shape)
    
    logistic_p = 1./(1.+tf.exp(beta * confidence + alpha))
    validation = tfp.distributions.Bernoulli(probs=logistic_p).sample()
    
    return validation
    

In [3]:
gen_reasonable_validation(tf.constant([1.]*10)).numpy()

array([1, 1, 0, 1, 1, 1, 0, 1, 1, 1], dtype=int32)

In [4]:
gen_reasonable_validation(tf.constant([0.5]*10)).numpy()

array([0, 1, 1, 0, 1, 1, 1, 1, 1, 1], dtype=int32)

In [5]:
gen_reasonable_validation(tf.constant([0.]*10)).numpy()

array([1, 0, 1, 1, 0, 1, 0, 0, 1, 0], dtype=int32)

## Random Validation

In [6]:
def gen_random_validation(confidence):
    validation = tfp.distributions.Binomial(
        total_count=1,
        probs=.5,
    ).sample(confidence.shape)
    
    return validation

In [7]:
gen_random_validation(tf.constant([1.]*10)).numpy()

array([0., 1., 1., 1., 1., 1., 1., 0., 0., 1.], dtype=float32)

In [8]:
gen_random_validation(tf.constant([0.5]*10)).numpy()

array([0., 1., 1., 0., 0., 0., 0., 0., 1., 1.], dtype=float32)

In [9]:
gen_random_validation(tf.constant([0.]*10)).numpy()

array([0., 1., 1., 0., 1., 0., 1., 1., 1., 1.], dtype=float32)

## Generate Data

In [10]:
NUM_SAMPLES = 1000

In [11]:
uniform = tfp.distributions.Uniform(
    low=0.,
    high=1.,
)

In [12]:
reasonable_confidence = uniform.sample(NUM_SAMPLES)
reasonable_validation = gen_reasonable_validation(reasonable_confidence)

print('Confidence:\n', reasonable_confidence.numpy()[:10])
print('Validation:\n', reasonable_validation.numpy()[:10])

Confidence:
 [0.0399965  0.6036886  0.86811566 0.13329482 0.36443937 0.5811448
 0.8931091  0.08046353 0.4354422  0.15751529]
Validation:
 [0 1 1 1 1 0 1 1 1 0]


In [13]:
random_confidence = uniform.sample(NUM_SAMPLES)
random_validation = gen_random_validation(random_confidence)

print('Confidence:\n', random_confidence.numpy()[:10])
print('Validation:\n', random_validation.numpy()[:10])

Confidence:
 [0.4533124  0.45111668 0.24680614 0.00626028 0.9502162  0.32123518
 0.29199398 0.90561545 0.2606982  0.36115336]
Validation:
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 1.]


### Write output

In [14]:
reasonable_data = pd.DataFrame(
    {
        "confidence": reasonable_confidence,
        "validation": reasonable_validation,
    },
)
reasonable_data

Unnamed: 0,confidence,validation
0,0.039997,0
1,0.603689,1
2,0.868116,1
3,0.133295,1
4,0.364439,1
...,...,...
995,0.767571,1
996,0.313861,1
997,0.196307,0
998,0.718977,1


In [15]:
random_data = pd.DataFrame(
    {
        "confidence": random_confidence,
        "validation": random_validation,
    },
)
random_data

Unnamed: 0,confidence,validation
0,0.453312,0.0
1,0.451117,0.0
2,0.246806,0.0
3,0.006260,1.0
4,0.950216,0.0
...,...,...
995,0.974103,1.0
996,0.179485,1.0
997,0.802218,1.0
998,0.994755,1.0


In [16]:
reasonable_data.to_csv("validations/reasonable.csv")
random_data.to_csv("validations/random.csv")