# Regression analysis and training

In [1]:
import glob
import os
import os.path
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import logging
from sklearn.preprocessing import StandardScaler
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models.annotations import Title
from maweight import model_selection
import pickle
output_notebook()

from config import thigh_features_path, breast_features_path, xls_path, path_prefix_results

import warnings
warnings.filterwarnings('ignore')

# setting the logging format
FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s'
logging.basicConfig(format=FORMAT, level=logging.INFO)

Executables being used: /opt/elastix-5.1.0-linux/bin/elastix /opt/elastix-5.1.0-linux/bin/transformix


## Constructing the training dataframe

In [2]:
# Reading the extracted features
thigh_features= pd.read_csv(thigh_features_path)
breast_features= pd.read_csv(breast_features_path)

# Determining the ids and positions
thigh_features['id']= thigh_features['filename'].apply(lambda x: x.split(os.sep)[-1][:-7])
breast_features['id']= breast_features['filename'].apply(lambda x: x.split(os.sep)[-1][:-7])

# Reading the XLS data
xls_data= pd.read_excel(xls_path, engine='openpyxl')

# Extracting the ground truth data
target= pd.DataFrame(data= {'ct_num': xls_data[u'CT_num'].astype(str).str.zfill(3),
                               'pos': xls_data[u'position'].astype(str),
                               'thigh': xls_data[u'thigh'].astype(float),
                               'breast': xls_data[u'breast'].astype(float)})
target['id']= target['ct_num'] + target['pos']
target= target.sort_values('id')
target= target.reset_index()

# Merging the thigh features with the ground truth data
thigh_target= target[['id', 'thigh', 'pos']]
breast_target= target[['id', 'breast', 'pos']]

thigh_data= pd.merge(thigh_features, thigh_target, how='inner', on='id').dropna()
breast_data= pd.merge(breast_features, breast_target, how='inner', on='id').dropna()

FileNotFoundError: [Errno 2] No such file or directory: 'data/chicken_data.xlsx'

In [None]:
thigh_target= thigh_data['thigh']
thigh_features= thigh_data.drop(['filename', 'id', 'thigh'], axis='columns')
breast_target= breast_data['breast']
breast_features= breast_data.drop(['filename', 'id', 'breast'], axis='columns')

# encoding the positions
thigh_features['pos']= thigh_features['pos'].apply(lambda x: {'a': 0, 'k': 1, 'f': 2}[x])
breast_features['pos']= breast_features['pos'].apply(lambda x: {'a': 0, 'k': 1, 'f': 2}[x])

## Model selection with feature selection

In [None]:
masks= np.unique([c.split('-')[1] for c in thigh_features.columns if ('mean_mask' not in c and len(c) > 10) and len(c.split('-')[1]) == 4])

In [None]:
masks

In [None]:
results= []
results_no_fs= []

### thigh using all features

In [None]:
results.append(model_selection(thigh_features, thigh_target, dataset='thigh', type='all'))

### thigh using the mean mask features

In [None]:
results.append(model_selection(thigh_features[[c for c in thigh_features.columns if 'mean_mask' in c or c == 'type']], thigh_target, dataset='thigh', type='mean_mask'))

### thigh using the features of the individual masks

In [None]:
for m in masks:
    results.append(model_selection(thigh_features[[c for c in thigh_features.columns if m in c or c == 'type']], thigh_target, dataset='thigh', type=m))

### breast using all features

In [None]:
results.append(model_selection(breast_features, breast_target, dataset='breast', type='all'))

### breast using the mean mask features

In [None]:
results.append(model_selection(breast_features[[c for c in breast_features.columns if 'mean_mask' in c or c == 'type']], breast_target, dataset='breast', type='mean_mask'))

### breast using the features of the individual masks

In [None]:
for m in masks:
    results.append(model_selection(breast_features[[c for c in breast_features.columns if m in c or c == 'type']], breast_target, dataset='breast', type=m))

### Saving the results

In [None]:
results= pd.concat(results)
results.to_csv(os.path.join(path_prefix_results,'results.csv'), index=False)
pickle.dump(results, open(os.path.join(path_prefix_results,'results.pickle'), 'wb'))

## Without feature selection

### thigh with all features

In [None]:
results_no_fs.append(model_selection(thigh_features, thigh_target, dataset='thigh', type='all', disable_feature_selection=True))

### thigh mean mask

In [None]:
results_no_fs.append(model_selection(thigh_features[[c for c in thigh_features.columns if 'mean_mask' in c or c == 'type']], thigh_target, dataset='thigh', type='mean_mask', disable_feature_selection=True))

### thigh individual masks

In [None]:
for m in masks:
    results_no_fs.append(model_selection(thigh_features[[c for c in thigh_features.columns if m in c or c == 'type']], thigh_target, dataset='thigh', type=m, disable_feature_selection=True))

### breast all features

In [None]:
results_no_fs.append(model_selection(breast_features, breast_target, dataset='breast', type='all', disable_feature_selection=True))

### breast mean mask

In [None]:
results_no_fs.append(model_selection(breast_features[[c for c in breast_features.columns if 'mean_mask' in c or c == 'type']], breast_target, dataset='breast', type='mean_mask', disable_feature_selection=True))

### breast individual masks

In [None]:
for m in masks:
    results_no_fs.append(model_selection(breast_features[[c for c in breast_features.columns if m in c or c == 'type']], breast_target, dataset='breast', type=m, disable_feature_selection=True))

### Saving the resulst

In [None]:
results_no_fs= pd.concat(results_no_fs)
results_no_fs.to_csv(os.path.join(path_prefix_results,'results_no_fs.csv'), index=False)
pickle.dump(results_no_fs, open(os.path.join(path_prefix_results,'results_no_fs.pickle'), 'wb'))