# Frequency Test
## Validating the Bit Array Randomness 

In [25]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import quad


import logging

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
FOLDER_PATH = r"/content/drive/MyDrive/LAB6/data/02_processed/"

In [8]:
BACKGROUND_FILE = "bit_fundo.csv"
CESIUM_25MM_FILE = "bit_25mm_cs.csv"
CESIUM_65MM_FILE = "bit_65mm_cs.csv"

In [86]:
bit_background = []
with open(FOLDER_PATH + BACKGROUND_FILE, "r") as my_file:
    for line in my_file:
        for item in line.strip():
            bit_background.append(int(item))

bit_background = np.array(bit_background)

In [58]:
bit_25mm_cs = np.loadtxt(FOLDER_PATH + CESIUM_25MM_FILE, delimiter=" ", dtype=int)
bit_25mm_cs = bit_25mm_cs.flatten()

In [60]:
bit_65mm_cs = np.loadtxt(FOLDER_PATH + CESIUM_65MM_FILE, delimiter=" ", dtype=int)
bit_65mm_cs = bit_65mm_cs.flatten()

In [34]:
def erfc(s):
    """Complementary Error Function
    """
    def integrand(u):
        return (2/np.sqrt(np.pi))*np.exp(-u**2)

    return quad(integrand, s, np.inf)[0]

In [74]:
def frequency_test(bit_array: np.array, n_bit_streams: int=2) -> np.array:
    bit_array = bit_array.copy()

    bit_array = bit_array.reshape(n_bit_streams, -1)

    pvalue_array = np.zeros(n_bit_streams, dtype=float)

    for i, bit_stream in enumerate(bit_array):
        S = abs(sum(bit_stream==1) - sum(bit_stream==0))
        s_obs = S/np.sqrt(bit_stream.size)
        
        pvalue = erfc(s_obs/np.sqrt(2))

        pvalue_array[i] = pvalue

    return pvalue_array

In [97]:
frequency_test(bit_background, n_bit_streams=10)

array([0.31731051, 0.76417716, 0.84148058, 0.80258735, 0.34211225,
       0.68915652, 0.88076462, 0.61707508, 0.61707508, 0.51569222])

In [101]:
sum(frequency_test(bit_25mm_cs, n_bit_streams=1000)>0.01)

997

In [100]:
sum(frequency_test(bit_65mm_cs, n_bit_streams=1000)>0.01)

995