# 1_PRREDICT LONGITUDE

# Import libraries

In [1]:
# basic
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Regression Models
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso

# tools
from sklearn.model_selection import train_test_split

# regression model metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score

#Hyper parameter tuning
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

# To save the ML models
import joblib

# FUNCTIONS

In [2]:
def regressionModel_GSCV (model, cv = 1, param_dist = None):
    
    GSCV = GridSearchCV(model, param_dist, cv=cv)
    
    GSCV.fit(X_train,y_train.values.ravel())

    # Print the tuned parameters and score
    print("Tuned Decision Parameters: {}".format(GSCV.best_params_))    
    
    # Make Predictions
    predictions = GSCV.predict(X_test)

    # Evaluate Predictions
    print("Mean Square Error (MSE): {}".format(mean_squared_error(y_test, predictions)))
    
    return GSCV

In [2]:
def save_model (model, file_name):
    try:
        path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/MLmodels/'
 
        # Save the model as a pickle in a file
        joblib.dump(model, path + file_name)
        print('Model Saved')
    except:
        print('Model NOT!! Saved')

In [3]:
def load_model (file_name):
    path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/MLmodels/'
 
    # Load the model from the file
    model_from_joblib = joblib.load(path + file_name)
    
    return model_from_joblib

# Import Data

In [3]:
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/'
 
file = "0_DataPrepro.csv"

Tdata = pd.read_csv(path + file)

Tdata.head()




Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7541.2643,4864921.0,2,1,106,2,2,23,1371713733
1,100,100,100,100,100,100,100,100,100,100,...,100,-7536.6212,4864934.0,2,1,106,2,2,23,1371713691
2,100,100,100,100,100,100,100,-97,100,100,...,100,-7519.1524,4864950.0,2,1,103,2,2,23,1371714095
3,100,100,100,100,100,100,100,100,100,100,...,100,-7524.5704,4864934.0,2,1,102,2,2,23,1371713807
4,100,100,100,100,100,100,100,100,100,100,...,100,-7632.1436,4864982.0,0,0,122,2,11,13,1369909710


# Cascade Model

Also remember the cascade model thing. Predict first one thing, then another, then another etc.. in every iteration we add the features we want to predict. 

Steps
1. Keep Useful features
2. dummify
3. sample to iterate, but just to trial error 
4. X-y Split
5. Multicolinearity check
6. train/test Split
7. apply the model & performace metrics

##  2. Predict Longitude 

As i'm using the cascade method, there will be one model for each feature i want to predict. each one will include a new feature. 

- **Predictors / Input**  > WAPS[0..520), BUILDINGID, FLOOR, Latitude
- **Predicted / Output** > Longitude

## 2.1 Keep Usefull features

In [4]:
# make a copy of the dataset, keeping only the predictor/predicted features
Tdata_LON = Tdata.drop(columns = ['SPACEID', 'RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP'])

Tdata_LON.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7541.2643,4864921.0,2,1
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7536.6212,4864934.0,2,1
2,100,100,100,100,100,100,100,-97,100,100,...,100,100,100,100,100,100,-7519.1524,4864950.0,2,1
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7524.5704,4864934.0,2,1
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7632.1436,4864982.0,0,0


### 2.2 Dummify

Now, as there is a categorical variable that will be used as a predictor (input) is important to dummify (binarize) it. 

In [5]:
Tdata_LON = pd.get_dummies(Tdata_LON, columns = ['BUILDINGID', 'FLOOR'])
Tdata_LON.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,LONGITUDE,LATITUDE,BUILDINGID_0,BUILDINGID_1,BUILDINGID_2,FLOOR_0,FLOOR_1,FLOOR_2,FLOOR_3,FLOOR_4
0,100,100,100,100,100,100,100,100,100,100,...,-7541.2643,4864921.0,0,1,0,0,0,1,0,0
1,100,100,100,100,100,100,100,100,100,100,...,-7536.6212,4864934.0,0,1,0,0,0,1,0,0
2,100,100,100,100,100,100,100,-97,100,100,...,-7519.1524,4864950.0,0,1,0,0,0,1,0,0
3,100,100,100,100,100,100,100,100,100,100,...,-7524.5704,4864934.0,0,1,0,0,0,1,0,0
4,100,100,100,100,100,100,100,100,100,100,...,-7632.1436,4864982.0,1,0,0,1,0,0,0,0


## 2.3 Sample Data

As there is a lot of data, is a good idea to fail faster with a reduced proportion of the data

In [6]:
sample = Tdata_LON

In [7]:
Tdata_LON = sample.sample(frac =.45, random_state= 200)

print("Len of the DF :" + str(len(Tdata_LON)))

#Tdata_Building.head()

Len of the DF :8972


## 2.4 X/y Split

- Input  > WAPS[0..520), BUILDINGID, FLOOR, LAT
- Output > LON

In [8]:
y_lon = Tdata_LON.iloc[:,520:521]
X_lon = Tdata_LON.drop(columns = ['LONGITUDE'])


X_lon.head() # check predictors

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LATITUDE,BUILDINGID_0,BUILDINGID_1,BUILDINGID_2,FLOOR_0,FLOOR_1,FLOOR_2,FLOOR_3,FLOOR_4
998,100,100,100,100,100,100,100,100,100,100,...,100,4864867.0,0,1,0,0,0,1,0,0
2013,100,100,100,100,100,100,100,100,100,100,...,100,4864838.0,0,0,1,0,0,0,1,0
1665,100,100,100,100,100,100,100,100,100,100,...,100,4864774.0,0,0,1,0,0,1,0,0
6188,100,100,100,100,100,100,100,100,100,100,...,100,4864769.0,0,0,1,1,0,0,0,0
2671,100,100,100,100,100,100,100,100,100,100,...,100,4864786.0,0,0,1,0,0,0,1,0


In [9]:
y_lon.head() # check predicted

Unnamed: 0,LONGITUDE
998,-7562.1862
2013,-7352.3397
1665,-7375.183
6188,-7333.0691
2671,-7398.1108


## 2.5 Multicolinearity

For reference, the goal of regression is to isolate the relationship between each independent variable and the dependent variable. Multicollinearity weakens the statistical power of your model, thus leaving you unable to trust the p-values identifying which independent variables are statistically significant. In summary, multicollinearity won’t let you know the true effect of each variable.

### Calculate the Variance Inflation Factor (VIF).
VIF measures the collinearity among independent variables within a regression model. Then remove the multicorrelated features. 

https://kaiserm.medium.com/how-to-tackle-multicollinearity-79afe58e9479

In [10]:
from statsmodels.stats.outliers_influence import variance_inflation_factor


# Compute VIF
vif = pd.DataFrame()
vif["variables"] = X_lon.columns
vif["VIF"] = [variance_inflation_factor(X_lon.values, i) for i in range(X_lon.shape[1])]

vif


  return 1 - self.ssr/self.centered_tss
  vif = 1. / (1. - r_squared_i)


Unnamed: 0,variables,VIF
0,WAP001,1.240558
1,WAP002,1.594728
2,WAP003,0.000000
3,WAP004,0.000000
4,WAP005,1.474042
...,...,...
524,FLOOR_0,inf
525,FLOOR_1,inf
526,FLOOR_2,inf
527,FLOOR_3,inf


### create a list with the features that exceeds the VIF treshold value
The [documentation](https://www.statsmodels.org/stable/generated/statsmodels.stats.outliers_influence.variance_inflation_factor.html) states that an independent variable is highly collinear with other independent variables when VIF > 5, and the parameter estimates will have large standard errors because of this.

In [11]:
count = 0
VIF_blackList = []

correlated_columns = []
for i in range(len(vif)):
    if vif.loc[i,"VIF"] >= 5:
        VIF_blackList.append(vif.loc[i,'variables'])
#        print(vif.loc[i,'variables'],vif.loc[i,'VIF'] )
        count += 1
print("Wap's with a VIF >= 5  -> " + str(count))

# uncomment to see the list of features to remove
#VIF_blackList

Wap's with a VIF >= 5  -> 98


### remove from the VIF_blackList

In [12]:
X_lon = X_lon.drop(columns = VIF_blackList)
X_lon.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP509,WAP510,WAP511,WAP512,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520
998,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
2013,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
1665,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
6188,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-83,-85,100,100,100
2671,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100


# Save PreProcessed Data

In [13]:
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/'
file_X = "4_DataPrepro_LON_X.csv"
file_y = "4_DataPrepro_LON_y.csv"

X_lon.to_csv(path + file_X, index = False)
y_lon.to_csv(path + file_y, index = False)

# Preprocess VALIDATION Data

In [38]:
# Load File
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/UJIndoorLoc/'
file = 'TestData.csv'

Val_data = pd.read_csv(path + file)
                                     
# make a copy of the dataset, keeping only the predictor/predicted features
Vdata_LON = Val_data.drop(columns = ['SPACEID', 'RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP'])

# X/y split
y_Vd_LON = Vdata_LON.iloc[:,520:521]
X_Vd_LON = Vdata_LON.drop(columns = ['LONGITUDE'])

# dummify Building
X_Vd_LON = pd.get_dummies(X_Vd_LON, columns = ['BUILDINGID', 'FLOOR'])

# same process than the training set. Must drop the same features from the VIFblacklist
X_Vd_LON = X_Vd_LON.drop(columns = VIF_blackList)

print(y_Vd_LON.head())
X_Vd_LON.head()

     LONGITUDE
0 -7515.916799
1 -7383.867221
2 -7374.302080
3 -7365.824883
4 -7641.499303


Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP509,WAP510,WAP511,WAP512,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
2,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100


In [39]:
# save files
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/'
file_X = "4_Val_DataPrepro_LON_X.csv"
file_y = "4_Val_DataPrepro_LON_y.csv"

X_Vd_LON.to_csv(path + file_X, index = False)
y_Vd_LON.to_csv(path + file_y, index = False)

# Load TRAINING preprocessed Data

In [14]:
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/'
file_X = "4_DataPrepro_LON_X.csv"
file_y = "4_DataPrepro_LON_y.csv"

Saved_X = pd.read_csv(path + file_X)
Saved_y = pd.read_csv(path + file_y)

print(Saved_y.head())
Saved_X.head()

   LONGITUDE
0 -7562.1862
1 -7352.3397
2 -7375.1830
3 -7333.0691
4 -7398.1108


Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP509,WAP510,WAP511,WAP512,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
2,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-83,-85,100,100,100
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100


## 2.6 Train/Test Split

In [18]:
# define X and y
X = Saved_X
y = Saved_y

# define train/test_ratio
train_ratio = 0.75
test_ratio = 0.25

# split data
X_train, X_test, y_train, y_test = train_test_split(X_lon, y_lon, test_size=(1-train_ratio) )

# check dimensions
print('DF: ', Tdata_LON.shape)
#print('X:', X.shape,'y:', y.shape)
print('X_train:', X_train.shape,'y_train:', y_train.shape)
print('X_test: ', X_test.shape,' y_test: ', y_test.shape)

DF:  (8972, 530)
X_train: (6729, 431) y_train: (6729, 1)
X_test:  (2243, 431)  y_test:  (2243, 1)


## 3.7 Models

### 3.7.1 sklearn.tree.DecisionTreeRegressor¶

In [20]:
model_DTR = DecisionTreeRegressor()

# print the full list of parametrizable parameters
# model.get_params(deep=True)

In [22]:
#Setup the parameters and distributions to sample from: param_dist
cv_DTR = 10

param_dist_DTR = {'ccp_alpha': [0.03, 0.045, 0.06] ,
                 'criterion': ["mse", "friedman_mse"] ,
                 #'max_depth': None,
                 #'max_features': None,
                 #'max_leaf_nodes': None,
                 #'min_impurity_decrease': 0.0,
                 #'min_impurity_split': None,
                 #'min_samples_leaf': 1,
                 #'min_samples_split': 2,
                 #'min_weight_fraction_leaf': 0.0,
                 #'random_state': None,
                 #'splitter': 'best'
                }

DTR = regressionModel_GSCV(model = model_DTR, cv = cv_DTR, param_dist = param_dist_DTR)

Tuned Decision Parameters: {'ccp_alpha': 0.045, 'criterion': 'mse'}
Mean Square Error (MSE): 227.8139255449943


### save DTR

**Pickled** model as a file using joblib: Joblib is the replacement of pickle as it is more efficient on objects that carry large numpy arrays. These functions also accept file-like object instead of filenames.
     

- joblib.dump to serialize an object hierarchy 
- joblib.load to deserialize a data stream

In [23]:
file = "LON_DTR.joblib"
model = DTR

save_model(model, file)

['/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/LON_DTR.joblib']

### Load DRT

In [24]:
file = 'LON_DTR.joblib'

# Load the model from the file
DTR_from_joblib = load_model(file)

# Use the loaded model to make predictions
print("MSE from loaded model : {}".format(mean_squared_error(y_test, DTR_from_joblib.predict(X_test))))

MSE from loaded model : 227.8139255449943


### 3.7.1 KNeighborsRegressor

In [25]:
model = KNeighborsRegressor()

# print the full list of parametrizable parameters
#model.get_params(deep=True)

In [26]:
model = KNeighborsRegressor()

#Setup the parameters and distributions to sample from: param_dist
cv_KNreg = 10

param_dist_KNreg = {# 'algorithm': 'auto',
                     'leaf_size': [1,2,4,6] ,
                    # 'metric': 'minkowski',
                    # 'metric_params': None,
                     'n_jobs': [-1],
                     'n_neighbors': [1,2,4,8,10] ,
                     #'p': [2]
                     'weights': ["uniform", "distance"]
                   }

KNreg = regressionModel_GSCV(model, cv_KNreg, param_dist_KNreg)
#KNreg = regressionModel_GSCV(model = model, cv = cv_DTR, param_dist = param_dist_KNreg)

Tuned Decision Parameters: {'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 2, 'weights': 'distance'}
Mean Square Error (MSE): 165.78515863042858


In [27]:
model = KNeighborsRegressor()

#Setup the parameters and distributions to sample from: param_dist
cv_KNreg = 10

param_dist_KNreg = {# 'algorithm': 'auto',
                     'leaf_size': [1,2,4] ,
                    # 'metric': 'minkowski',
                    # 'metric_params': None,
                     'n_jobs': [-1],
                     'n_neighbors': [1,2,4] ,
                     #'p': [2]
                     'weights': ["uniform", "distance"]
                   }

KNreg = regressionModel_GSCV(model, cv_KNreg, param_dist_KNreg)
#KNreg = regressionModel_GSCV(model = model, cv = cv_DTR, param_dist = param_dist_KNreg)

Tuned Decision Parameters: {'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 2, 'weights': 'distance'}
Mean Square Error (MSE): 165.78515863042858


### save KNreg

In [28]:
file = "LON_KNreg.joblib"
model = KNreg

save_model(model, file)

['/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/LON_KNreg.joblib']

### Load kNreg

In [29]:
file = 'LON_KNreg.joblib'

# Load the model from the file
KNreg_from_joblib = load_model(file)

# Use the loaded model to make predictions
print("MSE from loaded model : {}".format(mean_squared_error(y_test, KNreg_from_joblib.predict(X_test))))

MSE from loaded model : 165.78515863042858


### 3.7.3 Multi-layer Perceptron regressor

In [30]:
model = MLPRegressor()

# print the full list of parametrizable parameters
#model.get_params(deep=True)

In [31]:
model = MLPRegressor()

#Setup the parameters and distributions to sample from: param_dist
cv_MLP = 10

param_dist_MLP ={#'activation': ['logistic', "identity", "logistic", "tanh", "relu"],
                 #'alpha': 0.0001,
                 #'batch_size': 'auto',
                 #'beta_1': 0.9,
                 #'beta_2': 0.999,
                 'early_stopping': [True] ,
                 #'epsilon': 1e-08,
                 #'hidden_layer_sizes': (100,),
                 'learning_rate': ['invscaling'],
                 'learning_rate_init': [0.001, 0.01, 0,1, 1],
                 #'max_fun': 15000,
                 'max_iter': [800],
                 #'momentum': 0.9,
                 'n_iter_no_change': [5],
                 #'nesterovs_momentum': True,
                 #'power_t': 0.5,
                 'random_state': [1],
                 #'shuffle': True,
                 #'solver': 'adam',
                 #'tol': 0.0001,
                 #'validation_fraction': 0.1,
                 'verbose': [False] ,
                 #'warm_start': False
                }

MLP = regressionModel_GSCV(model, cv_MLP, param_dist_MLP)


Traceback (most recent call last):
  File "/home/ale/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/ale/anaconda3/lib/python3.8/site-packages/sklearn/neural_network/_multilayer_perceptron.py", line 673, in fit
    return self._fit(X, y, incremental=False)
  File "/home/ale/anaconda3/lib/python3.8/site-packages/sklearn/neural_network/_multilayer_perceptron.py", line 357, in _fit
    self._validate_hyperparameters()
  File "/home/ale/anaconda3/lib/python3.8/site-packages/sklearn/neural_network/_multilayer_perceptron.py", line 420, in _validate_hyperparameters
    raise ValueError("learning_rate_init must be > 0, got %s." %
ValueError: learning_rate_init must be > 0, got invscaling.

Traceback (most recent call last):
  File "/home/ale/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train,

Tuned Decision Parameters: {'early_stopping': True, 'learning_rate': 'invscaling', 'learning_rate_init': 0.001, 'max_iter': 800, 'n_iter_no_change': 5, 'random_state': 1, 'verbose': False}
Mean Square Error (MSE): 1995.0154303723807


### save MLP

**Pickled** model as a file using joblib: Joblib is the replacement of pickle as it is more efficient on objects that carry large numpy arrays. These functions also accept file-like object instead of filenames.
     

- joblib.dump to serialize an object hierarchy 
- joblib.load to deserialize a data stream

In [32]:
file = "LON_MLP.joblib"
model = MLP
 
# Save the model as a pickle in a file
save_model(model,file) 

['/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/LON_MLP.joblib']

### Load k-NN

In [33]:
file = 'LON_MLP.joblib'

# Load the model from the file
MLP_from_joblib = load_model(file)

# Use the loaded model to make predictions
print("MSE from loaded model : {}".format(mean_squared_error(y_test, MLP_from_joblib.predict(X_test))))

MSE from loaded model : 1995.0154303723807


### 3.7.4 SVR

In [34]:
model = SVR()

# print the full list of parametrizable parameters
#model.get_params(deep=True)

In [35]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.svm import SVR

model = SVR()

#Setup the parameters and distributions to sample from: param_dist
cv_SVR = 10

param_dist_SVR ={'C': [50, 100, 200, 400],
                 #'cache_size': 200,
                 #'coef0': 0.0,
                 #'degree': [2],
                 #'epsilon': 1,
                 #'gamma': 'scale',
                 'kernel': ['rbf'], #'rbf',
                 #'max_iter': -1,
                 #'shrinking': True,
                 #'tol': 0.001,
                 'verbose': [True]
                }

SVR = regressionModel_GSCV(model, cv_SVR, param_dist_SVR)


[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]Tuned Decision Parameters: {'C': 400, 'kernel': 'rbf', 'verbose': True}
Mean Square Error (MSE): 210.9955555808584


### save DTR

**Pickled** model as a file using joblib: Joblib is the replacement of pickle as it is more efficient on objects that carry large numpy arrays. These functions also accept file-like object instead of filenames.
     

- joblib.dump to serialize an object hierarchy 
- joblib.load to deserialize a data stream

In [36]:
file = "LON_SVR.joblib"
model = SVR
 
# Save the model as a pickle in a file
save_model(model,file)

['/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/LON_SVR.joblib']

### Load DTR

In [37]:
file = 'LON_SVR.joblib'

# Load the model from the file
SVR_from_joblib = load_model(file)

# Use the loaded model to make predictions
print("MSE from loaded model : {}".format(mean_squared_error(y_test, SVR_from_joblib.predict(X_test))))

MSE from loaded model : 210.9955555808584
