## <font color='green'> Regression (Dense)<font>

### <font color='green'> 1. Description<font>

Relative location of CT slices on axial axis data set using 53500 CT images from 74 different patients (43 male, 31 female).
Dataset can be downloaded from https://archive.ics.uci.edu/ml/machine-learning-databases/00206/slice_localization_data.zip

Each CT slice is described by two histograms in polar space. The first histogram describes the location of bone structures in the image, the second the location of air inclusions inside of the body.

The class variable (relative location of an image on the axial axis) was constructed by manually annotating up to 10 different distinct landmarks in each CT Volume with known location. The location of slices in between landmarks was interpolated.

### <font color='green'> 2. Data Preprocessing <font>

In [1]:
import os
import time
import numpy as np
import pandas as pd
from collections import OrderedDict

In [2]:
def preprocess_data(fname):
    '''
    For CT slices regression we will perform some data preparation and data cleaning steps.
    '''
    df = pd.read_csv(fname)
    df = df.drop(columns=["patientId"])
    x = df.iloc[:,:-1]
    y = df.iloc[:, -1]
    from sklearn.model_selection import train_test_split
    return train_test_split(x, y, test_size=0.20, random_state=42)

In [3]:
#---- Data Preparation ----
# Please uncomment the below lines to download and unzip the dataset.
#!wget -N https://archive.ics.uci.edu/ml/machine-learning-databases/00206/slice_localization_data.zip
#!unzip -o slice_localization_data.zip
#!mv slice_localization_data.csv datasets

DATA_FILE = "datasets/slice_localization_data.csv"
x_train, x_test, y_train, y_test = preprocess_data(DATA_FILE)
print("shape of train data: {}".format(x_train.shape))
print("shape of test data: {}".format(x_test.shape))

shape of train data: (42800, 384)
shape of test data: (10700, 384)


### <font color='green'> 3. Algorithm Evaluation <font>

In [4]:
train_time = []
test_time = []
train_score = []
test_score = []
estimator_name = []

In [5]:
def evaluate(estimator, estimator_nm,
             x_train, y_train,
             x_test, y_test):
    '''
    To generate performance report for both frovedis and sklearn estimators
    '''
    estimator_name.append(estimator_nm)

    start_time = time.time()
    estimator.fit(x_train, y_train)
    train_time.append(round(time.time() - start_time, 4))

    start_time = time.time()
    train_score.append(estimator.score(x_train, y_train))
    test_score.append(estimator.score(x_test, y_test))
    test_time.append(round(time.time() - start_time, 4))

#### 3.1 LinearRegression

In [6]:
TARGET = "lnr"
import frovedis
from frovedis.exrpc.server import FrovedisServer
FrovedisServer.initialize("mpirun -np 8 " +  os.environ["FROVEDIS_SERVER"])
from frovedis.mllib.linear_model import LinearRegression as fLNR
f_est = fLNR()
E_NM = TARGET + "_frovedis_" + frovedis.__version__
evaluate(f_est, E_NM, x_train, y_train, x_test, y_test)
f_est.release()
FrovedisServer.shut_down()

import sklearn
from sklearn.linear_model import LinearRegression as sLNR
s_est = sLNR()
E_NM = TARGET + "_sklearn_" + sklearn.__version__
evaluate(s_est, E_NM, x_train, y_train, x_test, y_test)

#### 3.2 SGDRegressor

In [7]:
TARGET = "sgd"
import frovedis
from frovedis.exrpc.server import FrovedisServer
FrovedisServer.initialize("mpirun -np 8 " +  os.environ["FROVEDIS_SERVER"])
from frovedis.mllib.linear_model import SGDRegressor as fSGDReg
f_est = fSGDReg(loss="squared_loss", penalty="l2",
                eta0=0.00001)
E_NM = TARGET + "_frovedis_" + frovedis.__version__
evaluate(f_est, E_NM, x_train, y_train, x_test, y_test)
f_est.release()
FrovedisServer.shut_down()

import sklearn
from sklearn.linear_model import SGDRegressor as sSGDReg
s_est = sSGDReg(loss="squared_loss", penalty="l2",
                eta0=0.00001, random_state=42)
E_NM = TARGET + "_sklearn_" + sklearn.__version__
evaluate(s_est, E_NM, x_train, y_train, x_test, y_test)



#### 3.3 KNNRegressor

In [8]:
TARGET = "knn"
import frovedis
from frovedis.exrpc.server import FrovedisServer
FrovedisServer.initialize("mpirun -np 8 " +  os.environ["FROVEDIS_SERVER"])
from frovedis.mllib.neighbors import KNeighborsRegressor as fKNR
f_est = fKNR(n_neighbors=1000, metric="euclidean",
             algorithm="brute")
E_NM = TARGET + "_frovedis_" + frovedis.__version__
evaluate(f_est, E_NM, x_train, y_train, x_test, y_test)
f_est.release()
FrovedisServer.shut_down()

import sklearn
from sklearn.neighbors import KNeighborsRegressor as sKNR
s_est = sKNR(n_neighbors=1000, metric="euclidean",
             algorithm="brute")
E_NM = TARGET + "_sklearn_" + sklearn.__version__
evaluate(s_est, E_NM, x_train, y_train, x_test, y_test)

#### 3.4 RidgeRegressor

In [9]:
TARGET = "ridge"
import frovedis
from frovedis.exrpc.server import FrovedisServer
FrovedisServer.initialize("mpirun -np 8 " +  os.environ["FROVEDIS_SERVER"])
from frovedis.mllib.linear_model import Ridge as fRR
f_est = fRR(alpha=0.001, lr_rate=1e-05, max_iter=200)
E_NM = TARGET + "_frovedis_" + frovedis.__version__
evaluate(f_est, E_NM, x_train, y_train, x_test, y_test)
f_est.release()
FrovedisServer.shut_down()

import sklearn
from sklearn.linear_model import Ridge as sRR
s_est = sRR(alpha=0.001, solver='sag', max_iter=200)
E_NM = TARGET + "_sklearn_" + sklearn.__version__
evaluate(s_est, E_NM, x_train, y_train, x_test, y_test)

#### 3.5 LinearSupportVectorRegressor

In [10]:
TARGET = "svr"
import frovedis
from frovedis.exrpc.server import FrovedisServer
FrovedisServer.initialize("mpirun -np 8 " +  os.environ["FROVEDIS_SERVER"])
from frovedis.mllib.svm import LinearSVR as fLSVR
f_est = fLSVR(lr_rate=0.0008)
E_NM = TARGET + "_frovedis_" + frovedis.__version__
evaluate(f_est, E_NM, x_train, y_train, x_test, y_test)
f_est.release()
FrovedisServer.shut_down()

import sklearn
from sklearn import svm as sSVMR
s_est = sSVMR.SVR(epsilon=0.0, max_iter=1000, tol=0.0001)
E_NM = TARGET + "_sklearn_" + sklearn.__version__
evaluate(s_est, E_NM, x_train, y_train, x_test, y_test)



#### 3.6 DecisionTreeRegressor

In [11]:
TARGET = "dtr"
import frovedis
from frovedis.exrpc.server import FrovedisServer
FrovedisServer.initialize("mpirun -np 8 " +  os.environ["FROVEDIS_SERVER"])
from frovedis.mllib.tree import DecisionTreeRegressor as fDTR
f_est = fDTR()
E_NM = TARGET + "_frovedis_" + frovedis.__version__
evaluate(f_est, E_NM, x_train, y_train, x_test, y_test)
f_est.release()
FrovedisServer.shut_down()

import sklearn
from sklearn.tree import DecisionTreeRegressor as sDTR
s_est = sDTR(max_depth=5)
E_NM = TARGET + "_sklearn_" + sklearn.__version__
evaluate(s_est, E_NM, x_train, y_train, x_test, y_test)

#### 3.7 LassoRegressor

In [12]:
TARGET = "lasso"
import frovedis
from frovedis.exrpc.server import FrovedisServer
FrovedisServer.initialize("mpirun -np 8 " +  os.environ["FROVEDIS_SERVER"])
from frovedis.mllib.linear_model import Lasso as fLR
f_est = fLR(alpha=0.001, lr_rate=1e-05, max_iter=800)
E_NM = TARGET + "_frovedis_" + frovedis.__version__
evaluate(f_est, E_NM, x_train, y_train, x_test, y_test)
f_est.release()
FrovedisServer.shut_down()

import sklearn
from sklearn.linear_model import Lasso as sLR
s_est = sLR(alpha=0.001, max_iter=800)
E_NM = TARGET + "_sklearn_" + sklearn.__version__
evaluate(s_est, E_NM, x_train, y_train, x_test, y_test)

### <font color='green'> 4. Performance Summary <font>

In [13]:
summary = pd.DataFrame(OrderedDict({ "estimator": estimator_name,
                                     "train time": train_time,
                                     "test time": test_time,
                                     "train-score": train_score,
                                     "test-score": test_score
                                  }))
print(summary)

                estimator  train time  test time  train-score  test-score
0     lnr_frovedis_0.9.10      0.7077     0.2203     0.864830    0.861450
1      lnr_sklearn_0.24.1      0.4939     0.1314     0.864834    0.861452
2     sgd_frovedis_0.9.10      0.2518     0.2311     0.829700    0.829750
3      sgd_sklearn_0.24.1     49.3236     0.0634     0.815368    0.814967
4     knn_frovedis_0.9.10      0.2209     3.5279     0.821526    0.828232
5      knn_sklearn_0.24.1      0.0204    43.3279     0.821526    0.828232
6   ridge_frovedis_0.9.10      0.2197     0.2426     0.825246    0.825292
7    ridge_sklearn_0.24.1      9.2877     0.0414     0.864844    0.861442
8     svr_frovedis_0.9.10      0.5324     0.2557     0.776946    0.778071
9      svr_sklearn_0.24.1     24.5304    32.9193     0.772782    0.770263
10    dtr_frovedis_0.9.10      0.3235     0.2687     0.790788    0.786716
11     dtr_sklearn_0.24.1      1.6639     0.0454     0.792260    0.786357
12  lasso_frovedis_0.9.10      0.4499 