In [1]:
import TFO_dataset
from math import pi
from sklearn.gaussian_process import *
from inverse_modelling_tfo.data.intensity_interpolation import interpolate_exp_chunk, get_interpolate_fit_params
from inverse_modelling_tfo.data import normalize_zero_mean 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from typing import Union, List

In [2]:
train_data = pd.read_pickle(r'/home/rraiyan/personal_projects/tfo_inverse_modelling/data/intensity/intensity_averaged_sim_data.pkl')
train_data['Intensity'] /= 20   # Normalize by the number of detectors per ring

# Make the TMPs zero mean

train_data.head()
interpolated_training_data = get_interpolate_fit_params(train_data, weights=[1, -2])

# Incorporate both wavelengths by moving to a Wide Format from Long Format
interpolated_training_data = interpolated_training_data.pivot_table(index=['Uterus Thickness', 'Maternal Wall Thickness', 'Maternal Mu_a', 'Fetal Mu_a'], columns='Wave Int', values=['alpha0', 'alpha1', 'alpha2', 'alpha3']).reset_index()
def _renaming_func(x, y):
    if y == '':
        return f'{x}'
    else:
        return f'{x}_{int(y)}'
interpolated_training_data.columns = [_renaming_func(x, y) for x,y in interpolated_training_data.columns]
interpolated_training_data.head()

Unnamed: 0,Uterus Thickness,Maternal Wall Thickness,Maternal Mu_a,Fetal Mu_a,alpha0_1,alpha0_2,alpha1_1,alpha1_2,alpha2_1,alpha2_2,alpha3_1,alpha3_2
0,5.0,2.0,0.005,0.05,-12.592608,3.395675,0.373749,0.413286,-14.666515,-10.940015,23.954485,10.568332
1,5.0,2.0,0.005,0.055,-13.102171,2.622967,0.388989,0.431987,-15.231653,-11.677655,24.949038,11.923494
2,5.0,2.0,0.005,0.06,-13.573916,1.912768,0.402962,0.449012,-15.751852,-12.352166,25.866022,13.164688
3,5.0,2.0,0.005,0.065,-14.013569,1.251053,0.41586,0.464721,-16.233896,-12.977252,26.717161,14.316873
4,5.0,2.0,0.005,0.07,-14.42541,0.62806,0.427827,0.479371,-16.682951,-13.562583,27.511341,15.39759


In [3]:
# Fitting only on both WV

# Create Features & Normalize the fitting params
interpolated_training_data['Bias Ratio'] = interpolated_training_data['alpha0_1'] / interpolated_training_data['alpha0_2'] 

X = interpolated_training_data[['Bias Ratio', 'alpha1_1', 'alpha1_2', 'alpha2_1', 'alpha2_2', 'alpha3_1', 'alpha3_2']].to_numpy()
alpha_scaler = preprocessing.StandardScaler().fit(X)
X = alpha_scaler.transform(X)

y = interpolated_training_data[['Maternal Wall Thickness']].to_numpy().flatten()
# y = interpolated_training_data[['Maternal Mu_a']].to_numpy().flatten()
y_scaler = preprocessing.StandardScaler().fit(y.reshape(-1, 1))
y = y_scaler.transform(y.reshape(-1, 1)).flatten()

rng = np.random.RandomState(1)
random_indices = rng.choice(np.arange(y.size), size=y.size, replace=False)
training_count = int(y.size * 1)  # 80% Training Data
training_indices = random_indices[:training_count]
test_indices = random_indices[training_count:]

X_train, y_train = X[training_indices], y[training_indices]
X_test, y_test = X[test_indices], y[test_indices]

In [4]:
# kernel = 1 * kernels.RBF(length_scale=1.0, length_scale_bounds=(1e-4, 1e1))
kernel = 1 * kernels.Matern(length_scale=1.0, length_scale_bounds=(1e-4, 1e1))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gp.fit(X_train, y_train)
gp.kernel_

62.5**2 * Matern(length_scale=6.47, nu=1.5)

In [5]:
# # PRedict on Simulation
# X_test = X_train
# y_test = y_train
# mean_prediction, std_prediction = gp.predict(X_test, return_std=True)
# mae = np.abs(mean_prediction - y_test)
# mse = np.square(mean_prediction - y_test)
# df = pd.DataFrame(
#     {
#         'True a0' : X_test[:, 0],
#         'True a1' : X_test[:, 1],
#         'True a2' : X_test[:, 2],
#         'True a3' : X_test[:, 3],
#         'True y'  : y_test,
#         'Prediction' : mean_prediction,
#         'Confidence' : std_prediction,
#         'MAE(%)' : mae * 100,
#         'MSE(%)' : mse * 100,
#     }
# )
# pd.set_option('display.max_rows', 1200)
# df

In [6]:
# df['MAE(%)'].describe()

In [7]:
def prepare_patient_ppg(ppg_data : pd.DataFrame, sample_number : Union[int, List], SDD = [15, 30, 45, 70, 100]) -> np.ndarray:
    """Prepare PPG data to be used directly into the GPR prediction.

    Args:
        ppg_data (pd.DataFrame): PPG data Dataframe. You can feed data directly from the the TFO_dataset package.
        (Note: This should ideally be the optically normalized data)
        sample_number (int): which sample to choose. You can either pass a single integer or an array
        SDD (_type_, optional): Detector distances in TFO device(in mm). Defaults to SDD=[15, 30, 45, 70, 100].
    """
    # The code is generalized to run on any array. make necessary conversions 
    if isinstance(sample_number, int):
        sample_number = [sample_number]
    
    patient_features = []
    for sample_point in sample_number:
        # Pick a point in time
        spatial_intensity = ppg_data.iloc[sample_point].copy()  # at 300s with 80Hz sampling freq.
        spatial_intensity *=  pi * 4   # from unit area -> pi r^2 area -> match simulation
        # Reshape ppg data to fit the format
        spatial_intensity_wv1 = pd.DataFrame(data={
            'SDD' : SDD,
            'Intensity' : spatial_intensity.to_numpy()[:5]
        })
        spatial_intensity_wv2 = pd.DataFrame(data={
            'SDD' : SDD,
            'Intensity' : spatial_intensity.to_numpy()[5:]
        })
        alpha_wv1 = interpolate_exp_chunk(spatial_intensity_wv1, weights=[1.0, -2.0], return_alpha=True).flatten()
        alpha_wv2 = interpolate_exp_chunk(spatial_intensity_wv2, weights=[1.0, -2.0], return_alpha=True).flatten()
        patient_features.append([alpha_wv1[0]/alpha_wv2[0], alpha_wv1[1], alpha_wv2[1], alpha_wv1[2], alpha_wv2[2], alpha_wv1[3], alpha_wv2[3]])
        
    return np.array(patient_features)
    

In [8]:
# Predict on reallife data
sheep_id = 21
data = TFO_dataset.SheepData('iq_demod_optical').get(sheep_id)
print(TFO_dataset.SheepData('iq_demod_optical').get_tuple(sheep_id))

features = prepare_patient_ppg(data, [100 * 80, 200 * 80, 1000 * 80, 2000 * 80])

# Create a DF for better viz.
print('Non- normalized Features')
feature_names = [f'f{i + 1}' for i in range(7)]
feature_df = pd.DataFrame(columns=feature_names, data=features)
print(feature_df)
features = alpha_scaler.transform(features)

(2, 1, 'sp2022')
Non- normalized Features
         f1        f2        f3         f4        f5         f6         f7
0  1.804691 -0.388345 -0.222053  13.789534  6.878564 -29.756532 -16.434083
1  1.812935 -0.381013 -0.217476  13.375781  6.627063 -28.893555 -15.907345
2  1.879090 -0.389864 -0.223130  13.761774  6.786435 -29.636649 -16.093875
3  2.215944 -0.382401 -0.193924  13.267262  5.322043 -28.588720 -13.226143


In [9]:
estimate, confidence = gp.predict(features, return_cov=True)
# estimate, confidence = gp.predict(X_train[0, :].reshape(1, -1), return_std=True)
print(y_scaler.inverse_transform(np.array(estimate).reshape(-1, 1)))
print(confidence)

[[106.67518907]
 [107.54110061]
 [104.0299961 ]
 [109.51145205]]
[[141.64484029 134.21474545 140.86727447 130.61922064]
 [134.21474545 127.47718692 133.50134026 124.27737817]
 [140.86727447 133.50134026 140.112953   130.01853325]
 [130.61922064 124.27737817 130.01853325 122.32902487]]


In [10]:
print(f'alpha means (From training) : {alpha_scaler.mean_}')
print(f'alpha variance (From training) : {alpha_scaler.var_}')

alpha means (From training) : [ 1.04939786  0.01790294 -0.03600841 -6.99034763 -6.16144278 13.70085319
 12.28273509]
alpha variance (From training) : [6.57791855e+03 9.80856727e-02 3.21627942e-01 1.40932083e+02
 4.86134570e+02 4.77191920e+02 1.64217933e+03]
