# Get started

### Imports

In [None]:
#! pip install -U ipywidgets
#! pip install matplotlib 
#! pip install scikit-learn 
#! pip install ray 
#! pip install fsspec 
#! pip install pyarrow 
#! pip install sqlalchemy
#! pip install torchinfo

In [1]:
import sys

# Set path to root directory
sys.path.append(r'/home/rlfowler/Documents/research/tfo_inverse_modelling')

from pathlib import Path
from torch.optim import Adam, SGD
import torch.nn as nn
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from sklearn import preprocessing
from inverse_modelling_tfo.data import (
    generate_data_loaders,
    config_based_normalization,
)
from inverse_modelling_tfo.data.intensity_interpolation import (
    interpolate_exp,
    get_interpolate_fit_params,
    exp_piecewise_affine,
)
from inverse_modelling_tfo.data.interpolation_function_zoo import *
from inverse_modelling_tfo.models import RandomSplit, ValidationMethod, HoldOneOut, CVSplit, CombineMethods
from inverse_modelling_tfo.models.custom_models import (
    SplitChannelCNN,
    PerceptronReLU,
    PerceptronBN,
    PerceptronDO,
    PerceptronBD,
)
from inverse_modelling_tfo.features.build_features import (
    FetalACFeatureBuilder,
    RowCombinationFeatureBuilder,
    TwoColumnOperationFeatureBuilder,
    FetalACbyDCFeatureBuilder,
    LogTransformFeatureBuilder,
    ConcatenateFeatureBuilder,
)
from inverse_modelling_tfo.features.data_transformations import (
    LongToWideIntensityTransformation,
    ToFittingParameterTransformation,
)
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
import torchinfo
from inverse_modelling_tfo.misc.misc_training import set_seed

# Set my GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
#DATA_PATH = r'/home/rraiyan/simulations/tfo_sim/data/compiled_intensity/dan_iccps_pencil.pkl'
#DATA_PATH = r'/home/rlfowler/Documents/research/tfo_sim/data/compiled_intensity/dan_iccps_RD1_nojson.pkl'
DATA_PATH = r'/home/rlfowler/Documents/research/tfo_sim/data/compiled_intensity/dan_iccps_RD1_1.pkl'
CONFIG_PATH = Path(r'/home/rlfowler/Documents/research/tfo_sim/data/compiled_intensity/dan_iccps_RD1_nojson.json')
#CONFIG_PATH = Path(r'/home/rraiyan/simulations/tfo_sim/data/compiled_intensity/dan_iccps_pencil.json')

# DATA_PATH = r'/home/rraiyan/simulations/tfo_sim/data/compiled_intensity/weitai_data.pkl'
# CONFIG_PATH = r'/home/rraiyan/simulations/tfo_sim/data/compiled_intensity/weitai_data.json'

DATA_PATH = r'/home/rlfowler/Documents/research/tfo_sim/data/compiled_intensity/dan_iccps_RD1_5.pkl'
CONFIG_PATH = Path(r'/home/rlfowler/Documents/research/tfo_sim/data/compiled_intensity/randall_data.json')

# Load data
data = pd.read_pickle(DATA_PATH)

print(data.shape)
data.head()
# (19906560, 9)
# (655331160, 9)

(651817320, 11)


Unnamed: 0,Wave Int,SDD,Uterus Thickness,Maternal Wall Thickness,Fetal Radius,Fetal Displacement,Maternal Hb Concentration,Maternal Saturation,Fetal Hb Concentration,Fetal Saturation,Intensity
0,1.0,10,5.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,364222.6875
1,1.0,15,5.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,33945.113281
2,1.0,19,5.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,6615.960449
3,1.0,24,5.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,1106.256714
4,1.0,28,5.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,309.963318


In [3]:
data = data.astype(np.float32)

In [12]:
print(data["Maternal Wall Thickness"].value_counts())

14.0    15000
32.0    15000
12.0    15000
27.0    15000
20.0    15000
24.0    15000
22.0    15000
30.0    15000
15.0    15000
21.0    15000
10.0    15000
25.0    15000
19.0    15000
28.0    15000
34.0    15000
26.0    15000
8.0     15000
9.0     15000
17.0    15000
31.0    15000
4.0     15000
23.0    15000
33.0    15000
13.0    15000
18.0    15000
11.0    15000
29.0    15000
7.0     15000
16.0    15000
6.0     15000
5.0     15000
Name: Maternal Wall Thickness, dtype: int64


In [4]:
# Normalize data using the json file
config_based_normalization(data, CONFIG_PATH) # May need to change this for my own code

# # Drop Uterus Thickness for now
data = data.drop(columns="Uterus Thickness")

print(data.shape)
data.head()

(651817320, 10)


Unnamed: 0,Wave Int,SDD,Maternal Wall Thickness,Fetal Radius,Fetal Displacement,Maternal Hb Concentration,Maternal Saturation,Fetal Hb Concentration,Fetal Saturation,Intensity
0,1.0,10.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,2.898392e-05
1,1.0,15.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,2.701266e-06
2,1.0,19.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,5.264814e-07
3,1.0,24.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,8.803311e-08
4,1.0,28.0,6.0,190.0,15.0,11.0,0.9,11.0,0.1,2.46661e-08


In [5]:
print(data.columns)
data.head()

Index(['Wave Int', 'SDD', 'Maternal Wall Thickness', 'Fetal Radius',
       'Fetal Displacement', 'Maternal Hb Concentration',
       'Maternal Saturation', 'Fetal Hb Concentration', 'Fetal Saturation',
       'Intensity'],
      dtype='object')


Unnamed: 0,Wave Int,SDD,Maternal Wall Thickness,Fetal Radius,Fetal Displacement,Maternal Hb Concentration,Maternal Saturation,Fetal Hb Concentration,Fetal Saturation,Intensity
0,1.0,10,8.0,90.0,40.0,11.0,0.9,11.0,0.1,2.807128e-05
1,1.0,15,8.0,90.0,40.0,11.0,0.9,11.0,0.1,2.363534e-06
2,1.0,19,8.0,90.0,40.0,11.0,0.9,11.0,0.1,3.988691e-07
3,1.0,24,8.0,90.0,40.0,11.0,0.9,11.0,0.1,5.007371e-08
4,1.0,28,8.0,90.0,40.0,11.0,0.9,11.0,0.1,1.084429e-08


#### This is for file with no json and lack of columns for radius and displacement

In [5]:
from glob import glob
from pathlib import Path

# Load the raw data
data_folder = r"/home/rlfowler/Documents/research/tfo_sim/data/raw_dan_iccps_RD1"
all_pickle_files = glob(f"{data_folder}/*.pkl")
num_files = 746
data_per_file = data.shape[0] / num_files
print(f"Data per file: {data_per_file}")
print(f"Fixing {num_files} out of {len(all_pickle_files)}")
radius = []
displacement = []
for file in all_pickle_files[0:num_files]:
    r = file.split("_")[-3]
    d = file.split("_")[-1].split(".")[0]
    for i in range(int(data_per_file)):
        radius.append(r)
        displacement.append(d)

data.insert(7, "Fetal Radius", radius)
data.insert(8, "Fetal Displacement", displacement)

Data per file: 878460.0
Fixing 746 out of 3726


In [6]:
print(data.columns)

Index(['Wave Int', 'SDD', 'Maternal Wall Thickness',
       'Maternal Hb Concentration', 'Maternal Saturation',
       'Fetal Hb Concentration', 'Fetal Saturation', 'Fetal Radius',
       'Fetal Displacement', 'Intensity'],
      dtype='object')


This typically takes 6 or 7 minutes to finish. It will put the data into the format of (number of samples, number of detectors * number of wavelengths + basic info).

Typically, basic info includes 5 things of maternal wall thickeness, maternal concentration, maternal saturation, fetal concentration, and fetal saturation. In other words, fetal depth, fetal and maternal concentrations, and fetal and maternal saturations.

(497664,45) for example run

In [5]:
data_transformer = LongToWideIntensityTransformation()

data = data_transformer.transform(data)
labels = data_transformer.get_label_names()
intensity_columns = data_transformer.get_feature_names()

print(data.shape)
data.head()

(29648025, 47)


Unnamed: 0,Maternal Wall Thickness,Fetal Radius,Fetal Displacement,Maternal Hb Concentration,Maternal Saturation,Fetal Hb Concentration,Fetal Saturation,10.0_1.0,15.0_1.0,19.0_1.0,...,55.0_2.0,59.0_2.0,64.0_2.0,68.0_2.0,72.0_2.0,77.0_2.0,81.0_2.0,86.0_2.0,90.0_2.0,94.0_2.0
0,2.0,50.0,10.0,11.0,0.9,10.725,0.1,,,,...,2.908946e-14,2.965748e-15,1.338872e-16,3.1825410000000005e-17,1.1386100000000001e-17,6.159060999999999e-19,7.462819e-21,6.054328999999999e-19,1.412835e-22,4.3751380000000004e-23
1,2.0,50.0,10.0,11.0,0.9,10.725,0.15,,,,...,2.867838e-14,2.878566e-15,1.297553e-16,3.097944e-17,1.102948e-17,5.874455e-19,7.067106e-21,6.002877e-19,1.336257e-22,4.1011060000000004e-23
2,2.0,50.0,10.0,11.0,0.9,10.725,0.2,,,,...,2.828155e-14,2.795001e-15,1.258003e-16,3.0171140000000003e-17,1.0689100000000001e-17,5.612817999999999e-19,6.693608e-21,5.951876e-19,1.264041e-22,3.846733e-23
3,2.0,50.0,10.0,11.0,0.9,10.725,0.25,,,,...,2.789824e-14,2.714879e-15,1.220133e-16,2.9398300000000005e-17,1.0364130000000001e-17,5.372165e-19,6.341025e-21,5.901316999999999e-19,1.195928e-22,3.6102710000000005e-23
4,2.0,50.0,10.0,11.0,0.9,10.725,0.3,,,,...,2.752785e-14,2.638016e-15,1.183861e-16,2.8658920000000003e-17,1.0053800000000001e-17,5.150687e-19,6.008132e-21,5.851198e-19,1.1316760000000001e-22,3.3901690000000005e-23


In [6]:
# Drop rows that contain NULL values
data.dropna(inplace=True)

print(data.shape)
data.head()

(2942841, 47)


Unnamed: 0,Maternal Wall Thickness,Fetal Radius,Fetal Displacement,Maternal Hb Concentration,Maternal Saturation,Fetal Hb Concentration,Fetal Saturation,10.0_1.0,15.0_1.0,19.0_1.0,...,55.0_2.0,59.0_2.0,64.0_2.0,68.0_2.0,72.0_2.0,77.0_2.0,81.0_2.0,86.0_2.0,90.0_2.0,94.0_2.0
922383,2.0,210.0,15.0,11.0,0.9,10.725,0.1,4e-05,5e-06,1e-06,...,4.932045e-13,1.012942e-13,7.496905e-15,2.977679e-16,1.666331e-17,6.03479e-19,2.769264e-19,1.8891849999999998e-20,7.600356000000001e-23,4.8543710000000007e-23
922384,2.0,210.0,15.0,11.0,0.9,10.725,0.15,4e-05,5e-06,1e-06,...,4.884339e-13,9.934039e-14,7.398276e-15,2.872794e-16,1.55485e-17,5.444254999999999e-19,2.666606e-19,1.61362e-20,6.722763e-23,3.8665680000000006e-23
922385,2.0,210.0,15.0,11.0,0.9,10.725,0.2,4e-05,5e-06,1e-06,...,4.837939e-13,9.74468e-14,7.302288e-15,2.772875e-16,1.452903e-17,4.918943999999999e-19,2.5703769999999996e-19,1.378956e-20,5.959611000000001e-23,3.0878620000000003e-23
922386,2.0,210.0,15.0,11.0,0.9,10.725,0.25,4e-05,5e-06,1e-06,...,4.792785e-13,9.561098e-14,7.208776e-15,2.67759e-16,1.3595930000000001e-17,4.451096e-19,2.4799389999999996e-19,1.1790449999999999e-20,5.294528000000001e-23,2.4735250000000003e-23
922387,2.0,210.0,15.0,11.0,0.9,10.725,0.3,4e-05,5e-06,1e-06,...,4.748812e-13,9.383029e-14,7.117585e-15,2.586638e-16,1.274117e-17,4.0339129999999996e-19,2.3947309999999997e-19,1.008672e-20,4.7136350000000007e-23,1.98844e-23


Use this data with normalizations.

Use Log Tranform feature builder

Chaining feature builders will perform multiple feature building on the same data

## Build Features

What is the fetal conc group?

In [2]:
import numpy as np
import pandas as pd

data = pd.read_pickle(r'/home/rlfowler/Documents/research/tfo_inverse_modelling/Randalls Folder/data/randall_data_intensities.pkl')

In [24]:
print(np.unique(data["Fetal Hb Concentration"]))
print(len(np.unique(data["Fetal Hb Concentration"])))

[10.725  11.     11.2125 11.275  11.5    11.7    11.7875 12.     12.1875
 12.3    12.5    12.675  12.8125 13.     13.1625 13.325  13.5    13.65
 13.8375 14.     14.1375 14.35   14.5    14.625  14.8625 15.     15.1125
 15.375  15.5    15.6    15.8875 16.     16.4   ]
33


In [71]:
print(data["Fetal Hb Concentration"].shape)

(35929014,)


In [3]:
def get_fetal_concetration_group():
    f_c_range = (11., 16.)
    count = 11
    perc = 0.025
    fetal_concentrations = np.linspace(f_c_range[0], f_c_range[1], count, endpoint=True)
    fetal_conc_group_mapping = {}
    for i, f_c in enumerate(fetal_concentrations): 
        fetal_conc_group_mapping[np.round(f_c*(1-perc),4)] = i
        fetal_conc_group_mapping[f_c] = i
        fetal_conc_group_mapping[np.round(f_c*(1+perc),4)] = i
    return fetal_conc_group_mapping

def get_FconcCenters(fetal_conc):
    f_c_range = (11., 16.)
    count = 11
    perc = 0.026
    fetal_concentrations = np.linspace(f_c_range[0], f_c_range[1], count, endpoint=True)
    FconcCenters = np.ones(fetal_conc.shape, dtype=np.float32)*np.Inf

    for i, f_c in enumerate(fetal_concentrations): 
        FconcCenters[np.bitwise_and(fetal_conc >= np.round(f_c*(1-perc),4), fetal_conc <= np.round(f_c*(1+perc),4))] = i
    return FconcCenters

# fetal_conc_group_mapping = get_fetal_concetration_group()
# print(fetal_conc_group_mapping)

data['FconcCenters'] = get_FconcCenters(data['Fetal Hb Concentration'])
print(np.unique(data['FconcCenters']))


[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]


In [68]:
fetal_conc_group_mapping = {10.725: 0, 11.0: 0, 11.275: 0, 11.2125: 1, 11.5: 1, 11.7875: 1, 11.7: 2, 12.0: 2, 12.3: 2, 12.1875: 3, 12.5: 3, 12.8125: 3, 12.675: 4, 13.0: 4, 13.325: 4, 13.1625: 5, 13.5: 5, 13.8375: 5, 13.65: 6, 14.0: 6, 14.35: 6, 14.1375: 7, 14.5: 7, 14.8625: 7, 14.625: 8, 15.0: 8, 15.375: 8, 15.1125: 9, 15.5: 9, 15.8875: 9, 15.6: 10, 16.0: 10, 16.4: 10}

data["Fetal Hb Concentration Group"] = np.round(data["Fetal Hb Concentration"],4).map(fetal_conc_group_mapping)

In [66]:
# fetal_conc_group_mapping = {
#     10.45 : 0,
#     10.88 : 0,
#     11. : 0,
#     11.31: 1,
#     11.45: 1,
#     11.55: 1,
#     11.75: 2,
#     11.91: 2,
#     12.03: 2,
#     12.18: 3,
#     12.36: 3,
#     12.5: 3,
#     12.61: 4,
#     12.82: 4,
#     12.98: 4,
#     13.04: 5,
#     13.27: 5,
#     13.46: 5,
#     13.47: 6,
#     13.73: 6,
#     13.9: 6,
#     13.94: 7,
#     14.18: 7,
#     14.34: 7,
#     14.41: 8,
#     14.64: 8,
#     14.77: 8,
#     14.89: 9,
#     15.09: 9,
#     15.2: 9,
#     15.37: 10,
#     15.55: 10,
#     15.85: 10,
#     16.: 11,
#     16.32: 11,
#     16.8: 11, 
# }
# print(fetal_conc_group_mapping.keys())
#data['FconcCenters'] = data['Fetal Hb Concentration'].round(4).map(fetal_conc_group_mapping)
data['FconcCenters'] = np.round(data['Fetal Hb Concentration'],4).map(fetal_conc_group_mapping)
print(data.shape)
data.head()

(35929014, 48)


Unnamed: 0,Maternal Wall Thickness,Fetal Radius,Fetal Displacement,Maternal Hb Concentration,Maternal Saturation,Fetal Hb Concentration,Fetal Saturation,10.0_1.0,15.0_1.0,19.0_1.0,...,59.0_2.0,64.0_2.0,68.0_2.0,72.0_2.0,77.0_2.0,81.0_2.0,86.0_2.0,90.0_2.0,94.0_2.0,FconcCenters
43923,2.0,50.0,5.0,11.0,0.9,10.725,0.1,4.1e-05,5e-06,1e-06,...,5.848743e-15,2.763929e-16,1.500358e-17,3.3986070000000002e-18,1.015295e-18,7.130596999999999e-20,8.710054e-22,4.0124559999999997e-20,2.3557480000000003e-22,
43924,2.0,50.0,5.0,11.0,0.9,10.725,0.15,4.1e-05,5e-06,1e-06,...,5.768395e-15,2.710312e-16,1.4636420000000003e-17,3.212955e-18,9.575111e-19,6.831443999999999e-20,8.155644e-22,3.8358879999999996e-20,2.151092e-22,
43925,2.0,50.0,5.0,11.0,0.9,10.725,0.2,4.1e-05,5e-06,1e-06,...,5.689761e-15,2.658178e-16,1.4287270000000002e-17,3.039417e-18,9.030256999999999e-19,6.545149e-20,7.6462310000000005e-22,3.66709e-20,1.9642150000000002e-22,
43926,2.0,50.0,5.0,11.0,0.9,10.725,0.25,4.1e-05,5e-06,1e-06,...,5.612798e-15,2.607455e-16,1.3955180000000002e-17,2.877101e-18,8.516512999999999e-19,6.271132e-20,7.177746000000001e-22,3.505719e-20,1.793575e-22,
43927,2.0,50.0,5.0,11.0,0.9,10.725,0.3,4.1e-05,5e-06,1e-06,...,5.537444e-15,2.558081e-16,1.3639310000000001e-17,2.725183e-18,8.032089999999999e-19,6.008842e-20,6.746510000000001e-22,3.35145e-20,1.637759e-22,


In [70]:
print(np.unique(data['Fetal Hb Concentration'].round(4)))
s = set()
#b = np.unique(np.round(data['Fetal Hb Concentration'], 4))
for n in np.unique(data['Fetal Hb Concentration'].round(4)):
    if n not in fetal_conc_group_mapping.keys():
        s.add(n)
print(s)
print(len(s))
print(fetal_conc_group_mapping)

[10.725  11.     11.2125 11.275  11.5    11.7    11.7875 12.     12.1875
 12.3    12.5    12.675  12.8125 13.     13.1625 13.325  13.5    13.65
 13.8375 14.     14.1375 14.35   14.5    14.625  14.8625 15.     15.1125
 15.375  15.5    15.6    15.8875 16.     16.4   ]
{10.725, 11.275, 11.7, 11.2125, 11.7875, 12.3, 12.675, 13.1625, 13.325, 13.65, 13.8375, 14.1375, 14.35, 14.8625, 15.1125, 15.8875, 16.4, 15.6}
18
{10.725: 0, 11.0: 0, 11.275: 0, 11.2125: 1, 11.5: 1, 11.7875: 1, 11.7: 2, 12.0: 2, 12.3: 2, 12.1875: 3, 12.5: 3, 12.8125: 3, 12.675: 4, 13.0: 4, 13.325: 4, 13.1625: 5, 13.5: 5, 13.8375: 5, 13.65: 6, 14.0: 6, 14.35: 6, 14.1375: 7, 14.5: 7, 14.8625: 7, 14.625: 8, 15.0: 8, 15.375: 8, 15.1125: 9, 15.5: 9, 15.8875: 9, 15.6: 10, 16.0: 10, 16.4: 10}


Group id for getting AC/DC

In [69]:
# print(data['FconcCenters'])
print(np.unique(data['FconcCenters']))
print(np.unique(data[data['FconcCenters'] == 0]['Fetal Hb Concentration']))

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. nan]
(array([], dtype=int64),)
[11.]


Typically takes 10 or 11 minutes to run.

In [7]:
intensity_columns = data.columns[7:].tolist()
labels = data.columns[0:7].tolist()
print(intensity_columns)
print(labels)

['10.0_1.0', '15.0_1.0', '19.0_1.0', '24.0_1.0', '28.0_1.0', '33.0_1.0', '37.0_1.0', '41.0_1.0', '46.0_1.0', '50.0_1.0', '55.0_1.0', '59.0_1.0', '64.0_1.0', '68.0_1.0', '72.0_1.0', '77.0_1.0', '81.0_1.0', '86.0_1.0', '90.0_1.0', '94.0_1.0', '10.0_2.0', '15.0_2.0', '19.0_2.0', '24.0_2.0', '28.0_2.0', '33.0_2.0', '37.0_2.0', '41.0_2.0', '46.0_2.0', '50.0_2.0', '55.0_2.0', '59.0_2.0', '64.0_2.0', '68.0_2.0', '72.0_2.0', '77.0_2.0', '81.0_2.0', '86.0_2.0', '90.0_2.0', '94.0_2.0', 'FconcCenters']
['Maternal Wall Thickness', 'Fetal Radius', 'Fetal Displacement', 'Maternal Hb Concentration', 'Maternal Saturation', 'Fetal Hb Concentration', 'Fetal Saturation']


In [8]:

fb1 = FetalACbyDCFeatureBuilder('FconcCenters', 'perm', intensity_columns, labels, "max")
data = fb1(data)

print(data.shape)
data.head()

KeyboardInterrupt: 

In [32]:
labels = fb1.get_label_names()
print(labels)

['Maternal Wall Thickness', 'Maternal Hb Concentration', 'Maternal Saturation', 'Fetal Saturation', 'FconcCenters', 'Fetal Hb Concentration 0', 'Fetal Hb Concentration 1']


In [41]:
x_columns = fb1.get_feature_names()
y_columns = ["Fetal Saturation"]
print(x_columns)
print(len(x_columns))

['MAX_ACbyDC_WV1_0', 'MAX_ACbyDC_WV1_1', 'MAX_ACbyDC_WV1_2', 'MAX_ACbyDC_WV1_3', 'MAX_ACbyDC_WV1_4', 'MAX_ACbyDC_WV1_5', 'MAX_ACbyDC_WV1_6', 'MAX_ACbyDC_WV1_7', 'MAX_ACbyDC_WV1_8', 'MAX_ACbyDC_WV1_9', 'MAX_ACbyDC_WV1_10', 'MAX_ACbyDC_WV1_11', 'MAX_ACbyDC_WV1_12', 'MAX_ACbyDC_WV1_13', 'MAX_ACbyDC_WV1_14', 'MAX_ACbyDC_WV1_15', 'MAX_ACbyDC_WV1_16', 'MAX_ACbyDC_WV1_17', 'MAX_ACbyDC_WV1_18', 'MAX_ACbyDC_WV1_19', 'MAX_ACbyDC_WV2_0', 'MAX_ACbyDC_WV2_1', 'MAX_ACbyDC_WV2_2', 'MAX_ACbyDC_WV2_3', 'MAX_ACbyDC_WV2_4', 'MAX_ACbyDC_WV2_5', 'MAX_ACbyDC_WV2_6', 'MAX_ACbyDC_WV2_7', 'MAX_ACbyDC_WV2_8', 'MAX_ACbyDC_WV2_9', 'MAX_ACbyDC_WV2_10', 'MAX_ACbyDC_WV2_11', 'MAX_ACbyDC_WV2_12', 'MAX_ACbyDC_WV2_13', 'MAX_ACbyDC_WV2_14', 'MAX_ACbyDC_WV2_15', 'MAX_ACbyDC_WV2_16', 'MAX_ACbyDC_WV2_17', 'MAX_ACbyDC_WV2_18', 'MAX_ACbyDC_WV2_19']
40


In [34]:
## Scale y
y_scaler = preprocessing.StandardScaler()
data[y_columns] = y_scaler.fit_transform(data[y_columns])

## Scale x
x_scaler = preprocessing.StandardScaler()
data[x_columns] = x_scaler.fit_transform(data[x_columns])

StandardScaler()
StandardScaler()


In [35]:
IN_FEATURES = len(x_columns)
OUT_FEATURES = len(y_columns)
print("In Features :", IN_FEATURES)  
print("Out Features:", OUT_FEATURES)

In Features : 40
Out Features: 1


Stored to save time...

In [7]:
data.to_pickle('dan_iccps_RD1_5_intensities.pkl')


## Load data just processed

In [37]:
data = pd.read_pickle('rishad_data.pkl')
data.head()

Unnamed: 0,Maternal Wall Thickness,Maternal Hb Concentration,Maternal Saturation,Fetal Saturation,FconcCenters,Fetal Hb Concentration 0,Fetal Hb Concentration 1,MAX_ACbyDC_WV1_0,MAX_ACbyDC_WV2_0,MAX_ACbyDC_WV1_1,...,MAX_ACbyDC_WV1_15,MAX_ACbyDC_WV2_15,MAX_ACbyDC_WV1_16,MAX_ACbyDC_WV2_16,MAX_ACbyDC_WV1_17,MAX_ACbyDC_WV2_17,MAX_ACbyDC_WV1_18,MAX_ACbyDC_WV2_18,MAX_ACbyDC_WV1_19,MAX_ACbyDC_WV2_19
0,4.0,11.0,0.9,-1.593255,0.0,10.45,10.881818,4.065299,4.941459,4.430259,...,2.037649,1.805695,1.300163,1.648643,0.802957,1.698323,1.521571,1.645864,1.362108,1.319674
1,4.0,11.0,0.9,-1.593255,0.0,10.45,11.0,5.11694,6.229803,5.61621,...,2.578698,2.281599,1.644967,2.082326,1.016351,2.145934,1.925202,2.078976,1.720388,1.667477
2,4.0,11.0,0.9,-1.593255,0.0,10.881818,10.45,-4.065299,-4.941459,-4.430259,...,-2.037649,-1.805695,-1.300163,-1.648643,-0.802957,-1.698323,-1.521571,-1.645864,-1.362108,-1.319674
3,4.0,11.0,0.9,-1.593255,0.0,10.881818,11.0,1.051686,1.289497,1.18666,...,0.549395,0.488265,0.34977,0.446102,0.217127,0.460549,0.410825,0.445024,0.366479,0.356184
4,4.0,11.0,0.9,-1.593255,0.0,11.0,10.45,-5.11694,-6.229803,-5.61621,...,-2.578698,-2.281599,-1.644967,-2.082326,-1.016351,-2.145934,-1.925202,-2.078976,-1.720388,-1.667477


In [43]:
x_columns = data.columns[7:].tolist()
y_columns = ["Fetal Saturation"]
print(x_columns)
print(len(x_columns))

['MAX_ACbyDC_WV1_0', 'MAX_ACbyDC_WV2_0', 'MAX_ACbyDC_WV1_1', 'MAX_ACbyDC_WV2_1', 'MAX_ACbyDC_WV1_2', 'MAX_ACbyDC_WV2_2', 'MAX_ACbyDC_WV1_3', 'MAX_ACbyDC_WV2_3', 'MAX_ACbyDC_WV1_4', 'MAX_ACbyDC_WV2_4', 'MAX_ACbyDC_WV1_5', 'MAX_ACbyDC_WV2_5', 'MAX_ACbyDC_WV1_6', 'MAX_ACbyDC_WV2_6', 'MAX_ACbyDC_WV1_7', 'MAX_ACbyDC_WV2_7', 'MAX_ACbyDC_WV1_8', 'MAX_ACbyDC_WV2_8', 'MAX_ACbyDC_WV1_9', 'MAX_ACbyDC_WV2_9', 'MAX_ACbyDC_WV1_10', 'MAX_ACbyDC_WV2_10', 'MAX_ACbyDC_WV1_11', 'MAX_ACbyDC_WV2_11', 'MAX_ACbyDC_WV1_12', 'MAX_ACbyDC_WV2_12', 'MAX_ACbyDC_WV1_13', 'MAX_ACbyDC_WV2_13', 'MAX_ACbyDC_WV1_14', 'MAX_ACbyDC_WV2_14', 'MAX_ACbyDC_WV1_15', 'MAX_ACbyDC_WV2_15', 'MAX_ACbyDC_WV1_16', 'MAX_ACbyDC_WV2_16', 'MAX_ACbyDC_WV1_17', 'MAX_ACbyDC_WV2_17', 'MAX_ACbyDC_WV1_18', 'MAX_ACbyDC_WV2_18', 'MAX_ACbyDC_WV1_19', 'MAX_ACbyDC_WV2_19']
40


## Create Model