# L4 Distribution Visualization
Visualize how the l4 distribution looks like and some sanity checks

In [11]:
from pathlib import Path
import json
import pandas as pd

l4_dist_data_path = Path().resolve().parent.parent / "data" / "processed_data" / "l4_bins.pkl"
l4_dist_term = pd.read_pickle(l4_dist_data_path)
l4_dist_config_path = l4_dist_data_path.with_suffix('.json')
with open(l4_dist_config_path, 'r') as file:
    l4_dist_config = json.load(file)
chosen_detector_index = l4_dist_config['chosen_detector_index']

# Sort by wavelength
l4_dist_columns = l4_dist_config['features']
l4_dist_columns = [str(x) for x in l4_dist_columns]
wv1_l4_dist = (l4_dist_term[l4_dist_term['Wave Int'] == 1])[['Maternal Wall Thickness'] + l4_dist_columns]
wv2_l4_dist = (l4_dist_term[l4_dist_term['Wave Int'] == 2])[['Maternal Wall Thickness'] + l4_dist_columns]

In [12]:
wv1_l4_dist.head()

Unnamed: 0,Maternal Wall Thickness,0.0,1.45708,2.7891,5.33881,10.21937,19.56159,37.44415,71.67437,137.19674,262.61751
2,6,0.312103,0.013937,0.022342,0.032796,0.045969,0.061152,0.076766,0.089899,0.095177,0.088883
3,16,0.767299,0.004674,0.007378,0.010559,0.01416,0.018154,0.022445,0.026212,0.02877,0.029018
6,11,0.597701,0.00859,0.013639,0.019443,0.026307,0.03397,0.041664,0.048322,0.051934,0.050519
7,18,0.810505,0.003702,0.005788,0.008276,0.011113,0.014291,0.017639,0.020824,0.02311,0.023594
12,31,0.938607,0.000931,0.001449,0.002086,0.002882,0.003761,0.004784,0.005836,0.006846,0.007516


In [13]:
wv2_l4_dist.head()

Unnamed: 0,Maternal Wall Thickness,0.0,1.45708,2.7891,5.33881,10.21937,19.56159,37.44415,71.67437,137.19674,262.61751
0,14,0.707548,0.00589,0.009654,0.014133,0.019081,0.024692,0.030225,0.035007,0.037641,0.037014
1,8,0.414523,0.0121,0.020084,0.029892,0.041073,0.054029,0.066543,0.076601,0.079856,0.074127
4,7,0.351096,0.013348,0.022004,0.032939,0.045684,0.060485,0.075219,0.086315,0.089818,0.082412
5,29,0.931399,0.001036,0.001699,0.002474,0.003462,0.004518,0.005767,0.007014,0.008006,0.008657
8,13,0.671919,0.006681,0.010967,0.016061,0.021746,0.02821,0.034467,0.039901,0.042486,0.0414


# The Excess (1 - Sum of All bins) Should always be A Small Positive Number

In [17]:
# Calculate the excess for both of these distributions
wv1_l4_dist["Excess"] = 1.0 - wv1_l4_dist[l4_dist_columns].sum(axis=1)
wv2_l4_dist["Excess"] = 1.0 - wv2_l4_dist[l4_dist_columns].sum(axis=1)

# Describe the excess
wv1_l4_dist["Excess"].describe()

count    31.000000
mean      0.073413
std       0.048739
min       0.021194
25%       0.033640
50%       0.056663
75%       0.103566
max       0.181847
Name: Excess, dtype: float64

In [18]:
wv2_l4_dist["Excess"].describe()

count    31.000000
mean      0.068099
std       0.045237
min       0.019047
25%       0.030624
50%       0.052474
75%       0.098418
max       0.161026
Name: Excess, dtype: float64