# Regression analysis and training

In [1]:
import glob
import os
import os.path
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import logging
from sklearn.preprocessing import StandardScaler
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models.annotations import Title
from maweight import model_selection
from config import elastix_params
import pickle
output_notebook()

from config import leg_features_path, loin_features_path, belly_features_path, groin_features_path
from config import shoulder_features_path, body_features_path
from config import xls_path, path_prefix_results

import warnings
warnings.filterwarnings('ignore')

# setting the logging format
FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s'
logging.basicConfig(format=FORMAT, level=logging.INFO)

Executables being used: /opt/elastix-5.1.0-linux/bin/elastix /opt/elastix-5.1.0-linux/bin/transformix


## Constructing the training dataframe

In [2]:
# Reading the extracted features
["leg", "loin", "belly", "groin", "shoulder", "body"]
leg_features= pd.read_csv(leg_features_path)
loin_features= pd.read_csv(loin_features_path)
belly_features= pd.read_csv(belly_features_path)
groin_features= pd.read_csv(groin_features_path)
shoulder_features= pd.read_csv(shoulder_features_path)
body_features= pd.read_csv(body_features_path)

# Determining the ids and positions
leg_features['id']= leg_features['filename'].apply(lambda x: x.split(os.sep)[-1]).apply(lambda x: x[:-7] if x.endswith(".nii.gz") else x[:-4]) 
loin_features['id']= loin_features['filename'].apply(lambda x: x.split(os.sep)[-1]).apply(lambda x: x[:-7] if x.endswith(".nii.gz") else x[:-4])
belly_features['id']= belly_features['filename'].apply(lambda x: x.split(os.sep)[-1]).apply(lambda x: x[:-7] if x.endswith(".nii.gz") else x[:-4]) 
groin_features['id']= groin_features['filename'].apply(lambda x: x.split(os.sep)[-1]).apply(lambda x: x[:-7] if x.endswith(".nii.gz") else x[:-4])
shoulder_features['id']= shoulder_features['filename'].apply(lambda x: x.split(os.sep)[-1]).apply(lambda x: x[:-7] if x.endswith(".nii.gz") else x[:-4]) 
body_features['id']= body_features['filename'].apply(lambda x: x.split(os.sep)[-1]).apply(lambda x: x[:-7] if x.endswith(".nii.gz") else x[:-4])

# Reading the XLS data
xls_data= pd.read_excel(xls_path, engine='openpyxl')

# Extracting the ground truth data
target= pd.DataFrame(data= {'ct_num': xls_data[u'id'].astype(str).str.zfill(3), 
                            'leg': xls_data[u'HAM'].astype(float),
                            'loin': xls_data[u'LOIN'].astype(float),
                            'belly': xls_data[u'BELLY'].astype(float),
                            'groin': xls_data[u'VENTRA_ PART_BELLY'].astype(float), 
                            'shoulder': xls_data[u'SHOULDER'].astype(float),
                            'body': xls_data[u'L_CW'].astype(float)})

target['id']= target['ct_num']
target= target.sort_values('id')
target= target.reset_index()

# Merging the skull features with the ground truth data
leg_target= target[['id', 'leg']]
loin_target= target[['id', 'loin']]
belly_target= target[['id', 'belly']]
groin_target= target[['id', 'groin']]
shoulder_target= target[['id', 'shoulder']]
body_target= target[['id', 'body']]

leg_data= pd.merge(leg_features, leg_target, how='inner', on='id').dropna()
loin_data= pd.merge(loin_features, loin_target, how='inner', on='id').dropna()
belly_data= pd.merge(belly_features, belly_target, how='inner', on='id').dropna()
groin_data= pd.merge(groin_features, groin_target, how='inner', on='id').dropna()
shoulder_data= pd.merge(shoulder_features, shoulder_target, how='inner', on='id').dropna()
body_data= pd.merge(body_features, body_target, how='inner', on='id').dropna()

In [3]:
leg_target= leg_data['leg']
leg_features= leg_data.drop(['filename', 'id', 'leg'], axis='columns')
loin_target= loin_data['loin']
loin_features= loin_data.drop(['filename', 'id', 'loin'], axis='columns')
belly_target= belly_data['belly']
belly_features= belly_data.drop(['filename', 'id', 'belly'], axis='columns')
groin_target= groin_data['groin']
groin_features= groin_data.drop(['filename', 'id', 'groin'], axis='columns')
shoulder_target= shoulder_data['shoulder']
shoulder_features= shoulder_data.drop(['filename', 'id', 'shoulder'], axis='columns')
body_target= body_data['body']
body_features= body_data.drop(['filename', 'id', 'body'], axis='columns')

# encoding the positions
#skull_features['pos']= skull_features['pos'].apply(lambda x: {'a': 0, 'k': 1, 'f': 2}[x])
#head_features['pos']= head_features['pos'].apply(lambda x: {'a': 0, 'k': 1, 'f': 2}[x])

## Model selection with feature selection

In [4]:
masks= np.unique([c.split('-')[1] for c in leg_features.columns if ('mean_mask' not in c and len(c) > 10) and len(c.split('-')[1]) == 4])

In [5]:
masks

array(['p004', 'p029', 'p032', 'p041', 'p048'], dtype='<U4')

In [6]:
results= []
results_no_fs= []

### leg using all features

In [None]:
results.append(model_selection(leg_features, leg_target, dataset='leg', type='all'))

Objective KNNR_Objective:


 18%|███████▏                               | 1472/8000 [03:04<13:36,  8.00it/s]
75it [00:00, 747.85it/s]

iterations: 1472
Number of used features: 24
Used features: ['Unnamed: 0', 'num-p004-leg.nii.gz-0.500000', 'std-p004-leg.nii.gz-0.500000', 'hist-2-p004-leg.nii.gz-0.500000', 'num-p029-leg.nii.gz-0.500000', 'skew-p029-leg.nii.gz-0.500000', 'hist-1-p029-leg.nii.gz-0.500000', 'hist-5-p029-leg.nii.gz-0.500000', 'hist-10-p029-leg.nii.gz-0.500000', 'hist-17-p029-leg.nii.gz-0.500000', 'hist-9-p032-leg.nii.gz-0.500000', 'hist-13-p032-leg.nii.gz-0.500000', 'hist-14-p032-leg.nii.gz-0.500000', 'hist-17-p032-leg.nii.gz-0.500000', 'num-p041-leg.nii.gz-0.500000', 'hist-12-p041-leg.nii.gz-0.500000', 'hist-13-p041-leg.nii.gz-0.500000', 'std-p048-leg.nii.gz-0.500000', 'hist-8-p048-leg.nii.gz-0.500000', 'hist-14-p048-leg.nii.gz-0.500000', 'hist-16-p048-leg.nii.gz-0.500000', 'num-0.500000-mean_mask', 'hist-1-0.500000-mean_mask', 'hist-7-0.500000-mean_mask'] 
Score: -0.897328883702748


200it [00:00, 747.76it/s]
  0%|                                                  | 0/8000 [00:00<?, ?it/s]

1
1 0.90442231799878
Objective LinearRegression_Objective:


  2%|▊                                       | 158/8000 [00:16<12:36, 10.37it/s]

### leg using the mean mask features

In [None]:
results.append(model_selection(leg_features[[c for c in leg_features.columns if 'mean_mask' in c or c == 'type']], leg_target, dataset='leg', type='mean_mask'))

### leg using the features of the individual masks

In [None]:
for m in masks:
    results.append(model_selection(leg_features[[c for c in leg_features.columns if m in c or c == 'type']], skull_target, dataset='leg', type=m))

### loin using all features

In [None]:
results.append(model_selection(loin_features, loin_target, dataset='head', type='all'))

### head using the mean mask features

In [None]:
results.append(model_selection(head_features[[c for c in head_features.columns if 'mean_mask' in c or c == 'type']], head_target, dataset='head', type='mean_mask'))

### head using the features of the individual masks

In [None]:
for m in masks:
    results.append(model_selection(head_features[[c for c in head_features.columns if m in c or c == 'type']], head_target, dataset='head', type=m))

### Saving the results

In [None]:
results= pd.concat(results)
results.to_csv(os.path.join(path_prefix_results,'results.csv'), index=False)
pickle.dump(results, open(os.path.join(path_prefix_results,'results.pickle'), 'wb'))

## Without feature selection

### skull with all features

In [None]:
results_no_fs.append(model_selection(skull_features, skull_target, dataset='skull', type='all', disable_feature_selection=True))

### skull mean mask

In [None]:
results_no_fs.append(model_selection(skull_features[[c for c in skull_features.columns if 'mean_mask' in c or c == 'type']], skull_target, dataset='skull', type='mean_mask', disable_feature_selection=True))

### skull individual masks

In [None]:
for m in masks:
    results_no_fs.append(model_selection(skull_features[[c for c in skull_features.columns if m in c or c == 'type']], skull_target, dataset='skull', type=m, disable_feature_selection=True))

### head all features

In [None]:
results_no_fs.append(model_selection(head_features, head_target, dataset='head', type='all', disable_feature_selection=True))

### head mean mask

In [None]:
results_no_fs.append(model_selection(head_features[[c for c in head_features.columns if 'mean_mask' in c or c == 'type']], head_target, dataset='head', type='mean_mask', disable_feature_selection=True))

### head individual masks

In [None]:
for m in masks:
    results_no_fs.append(model_selection(head_features[[c for c in head_features.columns if m in c or c == 'type']], head_target, dataset='head', type=m, disable_feature_selection=True))

### Saving the resulst

In [None]:
results_no_fs= pd.concat(results_no_fs)
results_no_fs.to_csv(os.path.join(path_prefix_results,'results_no_fs.csv'), index=False)
pickle.dump(results_no_fs, open(os.path.join(path_prefix_results,'results_no_fs.pickle'), 'wb'))