In [1]:
import TFO_dataset
from math import pi
from sklearn.gaussian_process import *
from inverse_modelling_tfo.data.intensity_interpolation import interpolate_exp_chunk, get_interpolate_fit_params
from inverse_modelling_tfo.data import normalize_zero_mean 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from typing import Union, List

In [12]:
# train_data = pd.read_pickle(r'/home/rraiyan/personal_projects/tfo_inverse_modelling/data/intensity/intensity_averaged_sim_data.pkl')
train_data = pd.read_pickle(
    r'/home/rraiyan/personal_projects/tfo_inverse_modelling/data/intensity/intensity_summed_sim_data_equidistance_detector.pkl')
# Only for intensity_summed_sim_data_equidistance_detector.pkl
sdd = train_data['SDD'].to_numpy()[:20]
detector_count = [11, 16, 22, 27, 32, 38, 43, 48, 53,
                  59, 64, 69, 75, 80, 85, 90, 96, 101, 106, 111]
sdd_to_detector_count_map = {
    dist: count for dist, count in zip(sdd, detector_count)}
train_data['Intensity'] /= train_data['SDD'].map(
    sdd_to_detector_count_map)

# For the other cases
# train_data['Intensity'] /= 20   # Normalize by the number of detectors per ring

train_data['Intensity'] /= 1e9  # Photon count/Initial intensity



interpolated_training_data = get_interpolate_fit_params(
    train_data, weights=[1, -2])

interpolated_training_data.head()


Unnamed: 0,Wave Int,Uterus Thickness,Maternal Wall Thickness,Maternal Mu_a,Fetal Mu_a,alpha0,alpha1,alpha2,alpha3
0,2.0,5.0,26.0,0.005,0.05,-16.959114,1.036984,-29.212411,42.008351
1,2.0,5.0,26.0,0.007,0.05,-18.635541,1.097803,-31.440929,45.713937
2,2.0,5.0,26.0,0.009,0.05,-20.16195,1.15422,-33.478903,49.096596
3,2.0,5.0,26.0,0.005,0.07,-16.95921,1.036986,-29.212498,42.008514
4,2.0,5.0,26.0,0.007,0.07,-18.635603,1.097804,-31.440986,45.714042


In [13]:
interpolated_training_data.describe()

Unnamed: 0,Wave Int,Uterus Thickness,Maternal Wall Thickness,Maternal Mu_a,Fetal Mu_a,alpha0,alpha1,alpha2,alpha3
count,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0
mean,1.5,5.0,20.0,0.007,0.07,-21.351539,1.161318,-34.392205,50.957188
std,0.500733,0.0,10.970502,0.001635,0.016354,4.335894,0.082025,4.378542,7.949997
min,1.0,5.0,2.0,0.005,0.05,-37.637443,1.023883,-50.998421,41.231932
25%,1.0,5.0,10.0,0.005,0.05,-22.552118,1.099423,-35.70411,45.769656
50%,1.5,5.0,20.0,0.007,0.07,-20.266092,1.154008,-33.536098,49.251434
75%,2.0,5.0,30.0,0.009,0.09,-18.65028,1.204571,-31.477528,52.981612
max,2.0,5.0,38.0,0.009,0.09,-16.532142,1.460359,-28.776783,81.243376


In [3]:

# Incorporate both wavelengths by moving to a Wide Format from Long Format
interpolated_training_data = interpolated_training_data.pivot_table(
    index=['Uterus Thickness', 'Maternal Wall Thickness', 'Maternal Mu_a', 'Fetal Mu_a'], columns='Wave Int', values=['alpha0', 'alpha1', 'alpha2', 'alpha3']).reset_index()


print(interpolated_training_data.columns)

def _renaming_func(x, y):
    if y == '':
        return f'{x}'
    else:
        return f'{x}_{int(y)}'


interpolated_training_data.columns = [_renaming_func(
    x, y) for x, y in interpolated_training_data.columns]
interpolated_training_data.head()


MultiIndex([(       'Uterus Thickness',  ''),
            ('Maternal Wall Thickness',  ''),
            (          'Maternal Mu_a',  ''),
            (             'Fetal Mu_a',  ''),
            (                 'alpha0', 1.0),
            (                 'alpha0', 2.0),
            (                 'alpha1', 1.0),
            (                 'alpha1', 2.0),
            (                 'alpha2', 1.0),
            (                 'alpha2', 2.0),
            (                 'alpha3', 1.0),
            (                 'alpha3', 2.0)],
           names=[None, 'Wave Int'])


Unnamed: 0,Uterus Thickness,Maternal Wall Thickness,Maternal Mu_a,Fetal Mu_a,alpha0_1,alpha0_2,alpha1_1,alpha1_2,alpha2_1,alpha2_2,alpha3_1,alpha3_2
0,5.0,2.0,0.005,0.05,-18.731813,-20.444467,1.136306,1.165762,-33.474455,-38.806152,48.387194,56.459243
1,5.0,2.0,0.005,0.07,-19.993693,-23.640858,1.140056,1.240522,-34.389651,-41.728256,50.292328,61.884206
2,5.0,2.0,0.005,0.09,-21.041806,-25.905711,1.147346,1.294433,-35.192921,-43.812067,51.919289,65.743294
3,5.0,2.0,0.007,0.05,-18.779575,-20.207374,1.150033,1.170677,-33.72377,-38.781909,48.693747,56.276399
4,5.0,2.0,0.007,0.07,-20.033502,-23.426282,1.153174,1.24565,-34.626898,-41.720371,50.580324,61.734832


In [4]:
# Fitting only on both WV

# Create Features & Normalize the fitting params
interpolated_training_data['Bias Ratio'] = interpolated_training_data['alpha0_1'] / interpolated_training_data['alpha0_2'] 

X = interpolated_training_data[['Bias Ratio', 'alpha1_1', 'alpha1_2', 'alpha2_1', 'alpha2_2', 'alpha3_1', 'alpha3_2']].to_numpy()
alpha_scaler = preprocessing.StandardScaler().fit(X)
X = alpha_scaler.transform(X)

y = interpolated_training_data[['Fetal Mu_a']].to_numpy().flatten()
# y = interpolated_training_data[['Maternal Mu_a']].to_numpy().flatten()
y_scaler = preprocessing.StandardScaler().fit(y.reshape(-1, 1))
y = y_scaler.transform(y.reshape(-1, 1)).flatten()

rng = np.random.RandomState(1)
random_indices = rng.choice(np.arange(y.size), size=y.size, replace=False)
training_count = int(y.size * 1)  # 80% Training Data
training_indices = random_indices[:training_count]
test_indices = random_indices[training_count:]

X_train, y_train = X[training_indices], y[training_indices]
X_test, y_test = X[test_indices], y[test_indices]

In [5]:
# kernel = 1 * kernels.RBF(length_scale=1.0, length_scale_bounds=(1e-4, 1e1))
kernel = 1 * kernels.Matern(length_scale=1.0, length_scale_bounds=(1e-6, 1e-2))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gp.fit(X_train, y_train)
gp.kernel_

1**2 * Matern(length_scale=8.82e-06, nu=1.5)

In [6]:
# # PRedict on Simulation
# X_test = X_train
# y_test = y_train
# mean_prediction, std_prediction = gp.predict(X_test, return_std=True)
# mae = np.abs(mean_prediction - y_test)
# mse = np.square(mean_prediction - y_test)
# df = pd.DataFrame(
#     {
#         'True a0' : X_test[:, 0],
#         'True a1' : X_test[:, 1],
#         'True a2' : X_test[:, 2],
#         'True a3' : X_test[:, 3],
#         'True y'  : y_test,
#         'Prediction' : mean_prediction,
#         'Confidence' : std_prediction,
#         'MAE(%)' : mae * 100,
#         'MSE(%)' : mse * 100,
#     }
# )
# pd.set_option('display.max_rows', 1200)
# df

In [7]:
# df['MAE(%)'].describe()

In [8]:
def prepare_patient_ppg(ppg_data : pd.DataFrame, sample_number : Union[int, List], SDD = [15, 30, 45, 70, 100]) -> np.ndarray:
    """Prepare PPG data to be used directly into the GPR prediction.

    Args:
        ppg_data (pd.DataFrame): PPG data Dataframe. You can feed data directly from the the TFO_dataset package.
        (Note: This should ideally be the optically normalized data)
        sample_number (int): which sample to choose. You can either pass a single integer or an array
        SDD (_type_, optional): Detector distances in TFO device(in mm). Defaults to SDD=[15, 30, 45, 70, 100].
    """
    # The code is generalized to run on any array. make necessary conversions 
    if isinstance(sample_number, int):
        sample_number = [sample_number]
    
    patient_features = []
    for sample_point in sample_number:
        # Pick a point in time
        spatial_intensity = ppg_data.iloc[sample_point].copy()  # at 300s with 80Hz sampling freq.
        spatial_intensity *=  pi * 4   # from unit area -> pi r^2 area -> match simulation
        # Reshape ppg data to fit the format
        spatial_intensity_wv1 = pd.DataFrame(data={
            'SDD' : SDD,
            'Intensity' : spatial_intensity.to_numpy()[:5]
        })
        spatial_intensity_wv2 = pd.DataFrame(data={
            'SDD' : SDD,
            'Intensity' : spatial_intensity.to_numpy()[5:]
        })
        alpha_wv1 = interpolate_exp_chunk(spatial_intensity_wv1, weights=[1.0, -2.0], return_alpha=True).flatten()
        alpha_wv2 = interpolate_exp_chunk(spatial_intensity_wv2, weights=[1.0, -2.0], return_alpha=True).flatten()
        patient_features.append([alpha_wv1[0]/alpha_wv2[0], alpha_wv1[1], alpha_wv2[1], alpha_wv1[2], alpha_wv2[2], alpha_wv1[3], alpha_wv2[3]])
        
    return np.array(patient_features)
    

In [9]:
# Predict on reallife data
sheep_id = 23
data = TFO_dataset.SheepData('iq_demod_optical').get(sheep_id)
print(TFO_dataset.SheepData('iq_demod_optical').get_tuple(sheep_id))

features = prepare_patient_ppg(data, [1000, 2000, 3000])

# Create a DF for better viz.
print('Non- normalized Features')
feature_names = [f'f{i + 1}' for i in range(7)]
feature_df = pd.DataFrame(columns=feature_names, data=features)
print(feature_df)
features = alpha_scaler.transform(features)

(2, 1, 'sp2021', 'Recovery')
Non- normalized Features
         f1        f2        f3          f4          f5          f6  \
0  0.922549 -7.762497 -8.428254  432.869728  469.762372 -918.673743   
1  0.952050 -8.270747 -8.690338  460.292467  483.691475 -975.992748   
2  0.913166 -7.338907 -8.055188  409.441753  449.065345 -869.434792   

            f7  
0  -996.156135  
1 -1025.166854  
2  -952.622172  


In [10]:
estimate, confidence = gp.predict(features, return_cov=True)
# estimate, confidence = gp.predict(X_train[0, :].reshape(1, -1), return_std=True)
print(y_scaler.inverse_transform(np.array(estimate).reshape(-1, 1)))
print(confidence)

[[0.07]
 [0.07]
 [0.07]]
[[1.00164536 0.         0.        ]
 [0.         1.00164536 0.        ]
 [0.         0.         1.00164536]]


In [11]:
print(f'alpha means (From training) : {alpha_scaler.mean_}')
print(f'alpha variance (From training) : {alpha_scaler.var_}')

alpha means (From training) : [  0.94523291   1.16150141   1.16113513 -33.69234246 -35.0920684
  49.50365467  52.41072202]
alpha variance (From training) : [1.36340464e-02 2.78140933e-03 1.06352380e-02 5.24406179e+00
 3.20074684e+01 1.63604967e+01 1.05449276e+02]
