# 1_PRREDICT LATITUDE

# Import libraries

In [1]:
# basic
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Regression Models
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
#from sklearn.linear_model import LinearRegression, Ridge, Lasso

# tools
from sklearn.model_selection import train_test_split

# regression model metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score

#Hyper parameter tuning
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

# To save the ML models
import joblib

# FUNCTIONS

In [2]:
def makePredictions(loaded_model, X_test, y_test):
    model_predictions = loaded_model.predict(X_test)
    print(accuracy_score(y_test, model_predictions))
    print(confusion_matrix(y_test, model_predictions))

In [3]:
def regressionModel_GSCV (model, cv = 1, param_dist = None):
    
    GSCV = GridSearchCV(model, param_dist, cv=cv)
    
    GSCV.fit(X_train,y_train.values.ravel())

    # Print the tuned parameters and score
    print("Tuned Decision Parameters: {}".format(GSCV.best_params_))    
    
    # Make Predictions
    predictions = GSCV.predict(X_test)

    # Evaluate Predictions
    print("Mean Square Error (MSE): {}".format(mean_squared_error(y_test, predictions)))
    
    return GSCV

In [2]:
def save_model (model, file_name):
    try:
        path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/MLmodels/'
 
        # Save the model as a pickle in a file
        joblib.dump(model, path + file_name)
        print('Model Saved')
    except:
        print('Model NOT!! Saved')

In [3]:
def load_model (file_name):
    path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/MLmodels/'
 
    # Load the model from the file
    model_from_joblib = joblib.load(path + file_name)
    
    return model_from_joblib

# Import Data

In [4]:
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/'
 
file = "0_DataPrepro.csv"

Tdata = pd.read_csv(path + file)

Tdata.head()




Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7541.2643,4864921.0,2,1,106,2,2,23,1371713733
1,100,100,100,100,100,100,100,100,100,100,...,100,-7536.6212,4864934.0,2,1,106,2,2,23,1371713691
2,100,100,100,100,100,100,100,-97,100,100,...,100,-7519.1524,4864950.0,2,1,103,2,2,23,1371714095
3,100,100,100,100,100,100,100,100,100,100,...,100,-7524.5704,4864934.0,2,1,102,2,2,23,1371713807
4,100,100,100,100,100,100,100,100,100,100,...,100,-7632.1436,4864982.0,0,0,122,2,11,13,1369909710


# Cascade Model

Also remember the cascade model thing. Predict first one thing, then another, then another etc.. in every iteration we add the features we want to predict. 

Steps
1. Keep Useful features
2. dummify
3. sample to iterate, but just to trial error 
4. X-y Split
5. Multicolinearity check
6. train/test Split
7. apply the model & performace metrics

##  2. Predict Floor 

As i'm using the cascade method, there will be one model for each feature i want to predict. each one will include a new feature. 

- **Predictors / Input**  > WAPS[0..520), BUILDINGID, FLOOR
- **Predicted / Output** > LATITUDE

## 3.1 Keep Usefull features

In [5]:
# make a copy of the dataset, keeping only the predictor/predicted features
Tdata_LAT = Tdata.drop(columns = ['LONGITUDE', 'SPACEID', 'RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP'])

Tdata_LAT.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LATITUDE,FLOOR,BUILDINGID
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,4864921.0,2,1
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,4864934.0,2,1
2,100,100,100,100,100,100,100,-97,100,100,...,100,100,100,100,100,100,100,4864950.0,2,1
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,4864934.0,2,1
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,4864982.0,0,0


### 3.2 Dummify

Now, as there is a categorical variable that will be used as a predictor (input) is important to dummify (binarize) it. 

In [6]:
Tdata_LAT = pd.get_dummies(Tdata_LAT, columns = ['BUILDINGID', 'FLOOR'])
Tdata_LAT.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LATITUDE,BUILDINGID_0,BUILDINGID_1,BUILDINGID_2,FLOOR_0,FLOOR_1,FLOOR_2,FLOOR_3,FLOOR_4
0,100,100,100,100,100,100,100,100,100,100,...,100,4864921.0,0,1,0,0,0,1,0,0
1,100,100,100,100,100,100,100,100,100,100,...,100,4864934.0,0,1,0,0,0,1,0,0
2,100,100,100,100,100,100,100,-97,100,100,...,100,4864950.0,0,1,0,0,0,1,0,0
3,100,100,100,100,100,100,100,100,100,100,...,100,4864934.0,0,1,0,0,0,1,0,0
4,100,100,100,100,100,100,100,100,100,100,...,100,4864982.0,1,0,0,1,0,0,0,0


## 3.3 Sample Data

As there is a lot of data, is a good idea to fail faster with a reduced proportion of the data

In [7]:
sample = Tdata_LAT

In [8]:
Tdata_LAT = sample.sample(frac =.45, random_state= 200)

print("Len of the DF :" + str(len(Tdata_LAT)))

#Tdata_Building.head()

Len of the DF :8972


## 3.4 X/y Split

- Input  > WAPS[0..520), BUILDINGID, FLOOR
- Output > LAT

In [9]:
y_lat = Tdata_LAT.iloc[:,520:521]
X_lat = Tdata_LAT.drop(columns = ['LATITUDE'])


X_lat.head() # check predictors

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP519,WAP520,BUILDINGID_0,BUILDINGID_1,BUILDINGID_2,FLOOR_0,FLOOR_1,FLOOR_2,FLOOR_3,FLOOR_4
998,100,100,100,100,100,100,100,100,100,100,...,100,100,0,1,0,0,0,1,0,0
2013,100,100,100,100,100,100,100,100,100,100,...,100,100,0,0,1,0,0,0,1,0
1665,100,100,100,100,100,100,100,100,100,100,...,100,100,0,0,1,0,0,1,0,0
6188,100,100,100,100,100,100,100,100,100,100,...,100,100,0,0,1,1,0,0,0,0
2671,100,100,100,100,100,100,100,100,100,100,...,100,100,0,0,1,0,0,0,1,0


In [10]:
y_lat.head() # check predicted

Unnamed: 0,LATITUDE
998,4864867.0
2013,4864838.0
1665,4864774.0
6188,4864769.0
2671,4864786.0


## 3.5 Multicolinearity

For reference, the goal of regression is to isolate the relationship between each independent variable and the dependent variable. Multicollinearity weakens the statistical power of your model, thus leaving you unable to trust the p-values identifying which independent variables are statistically significant. In summary, multicollinearity won’t let you know the true effect of each variable.

### Calculate the Variance Inflation Factor (VIF).
VIF measures the collinearity among independent variables within a regression model. Then remove the multicorrelated features. 

https://kaiserm.medium.com/how-to-tackle-multicollinearity-79afe58e9479

In [11]:
from statsmodels.stats.outliers_influence import variance_inflation_factor


# Compute VIF
vif = pd.DataFrame()
vif["variables"] = X_lat.columns
vif["VIF"] = [variance_inflation_factor(X_lat.values, i) for i in range(X_lat.shape[1])]

vif


  return 1 - self.ssr/self.centered_tss
  vif = 1. / (1. - r_squared_i)


Unnamed: 0,variables,VIF
0,WAP001,1.240412
1,WAP002,1.594707
2,WAP003,0.000000
3,WAP004,0.000000
4,WAP005,1.473705
...,...,...
523,FLOOR_0,inf
524,FLOOR_1,inf
525,FLOOR_2,inf
526,FLOOR_3,inf


### create a list with the features that exceeds the VIF treshold value
The [documentation](https://www.statsmodels.org/stable/generated/statsmodels.stats.outliers_influence.variance_inflation_factor.html) states that an independent variable is highly collinear with other independent variables when VIF > 5, and the parameter estimates will have large standard errors because of this.

In [12]:
count = 0
VIF_blackList = []

correlated_columns = []
for i in range(len(vif)):
    if vif.loc[i,"VIF"] >= 5:
        VIF_blackList.append(vif.loc[i,'variables'])
#        print(vif.loc[i,'variables'],vif.loc[i,'VIF'] )
        count += 1
print("Wap's with a VIF >= 5  -> " + str(count))

# uncomment to see the list of features to remove
#VIF_blackList

Wap's with a VIF >= 5  -> 97


### remove from the VIF_blackList

In [13]:
X_lat = X_lat.drop(columns = VIF_blackList)
X_lat.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP509,WAP510,WAP511,WAP512,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520
998,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
2013,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
1665,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
6188,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-83,-85,100,100,100
2671,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100


# Save PreProcessed Data

In [14]:
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/'
file_X = "3_DataPrepro_LAT_X.csv"
file_y = "3_DataPrepro_LAT_y.csv"

X_lat.to_csv(path + file_X, index = False)
y_lat.to_csv(path + file_y, index = False)

# Preprocess VALIDATION Data

In [33]:
# Load File
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/UJIndoorLoc/'
file = 'TestData.csv'

Val_data = pd.read_csv(path + file)
                                     
# make a copy of the dataset, keeping only the predictor/predicted features
Vdata_LAT = Val_data.drop(columns = ['LONGITUDE', 'SPACEID', 'RELATIVEPOSITION', 'USERID', 'PHONEID', 'TIMESTAMP'])

# X/y split
y_Vd_LAT = Vdata_LAT.iloc[:,520:521]
X_Vd_LAT = Vdata_LAT.drop(columns = ['LATITUDE'])

# dummify Building
X_Vd_LAT = pd.get_dummies(X_Vd_LAT, columns = ['BUILDINGID', 'FLOOR'])

# same process than the training set. Must drop the same features from the VIFblacklist
X_Vd_LAT = X_Vd_LAT.drop(columns = VIF_blackList)

print(y_Vd_LAT.head())
X_Vd_LAT.head()

       LATITUDE
0  4.864890e+06
1  4.864840e+06
2  4.864847e+06
3  4.864843e+06
4  4.864922e+06


Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP509,WAP510,WAP511,WAP512,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
2,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100


In [34]:
# save files
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/'
file_X = "3_Val_DataPrepro_LAT_X.csv"
file_y = "3_Val_DataPrepro_LAT_y.csv"

X_Vd_LAT.to_csv(path + file_X, index = False)
y_Vd_LAT.to_csv(path + file_y, index = False)

# Load TRAINING preProces Data

In [15]:
path = '/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/Data/'
file_X = "3_DataPrepro_LAT_X.csv"
file_y = "3_DataPrepro_LAT_y.csv"

Saved_X = pd.read_csv(path + file_X)
Saved_y = pd.read_csv(path + file_y)

print(Saved_y.head())
Saved_X.head()

       LATITUDE
0  4.864867e+06
1  4.864838e+06
2  4.864774e+06
3  4.864769e+06
4  4.864786e+06


Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP509,WAP510,WAP511,WAP512,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
2,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-83,-85,100,100,100
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100


## 3.6 Train/Test Split

In [16]:
# define X and y
Saved_X = X_lat
Saved_y = y_lat

# define train/test_ratio
train_ratio = 0.75
test_ratio = 0.25

# split data
X_train, X_test, y_train, y_test = train_test_split(X_lat, y_lat, test_size=(1-train_ratio) )

# check dimensions
print('DF: ', Tdata_LAT.shape)
#print('X:', X.shape,'y:', y.shape)
print('X_train:', X_train.shape,'y_train:', y_train.shape)
print('X_test: ', X_test.shape,' y_test: ', y_test.shape)

DF:  (8972, 529)
X_train: (6729, 431) y_train: (6729, 1)
X_test:  (2243, 431)  y_test:  (2243, 1)


## 3.7 Models

### 3.7.1 sklearn.tree.DecisionTreeRegressor¶

In [17]:
model_DTR = DecisionTreeRegressor()

# print the full list of parametrizable parameters
# model.get_params(deep=True)

In [18]:
#Setup the parameters and distributions to sample from: param_dist
cv_DTR = 10

param_dist_DTR = {#'ccp_alpha': 0.0,
                 #'criterion': 'mse',
                 #'max_depth': None,
                 #'max_features': None,
                 #'max_leaf_nodes': None,
                 #'min_impurity_decrease': 0.0,
                 #'min_impurity_split': None,
                 #'min_samples_leaf': 1,
                 #'min_samples_split': 2,
                 #'min_weight_fraction_leaf': 0.0,
                 #'random_state': None,
                 #'splitter': 'best'
                }

DTR = regressionModel_GSCV(model = model_DTR, cv = cv_DTR, param_dist = param_dist_DTR)

Tuned Decision Parameters: {}
Mean Square Error (MSE): 113.87586213019428


### save DTR

**Pickled** model as a file using joblib: Joblib is the replacement of pickle as it is more efficient on objects that carry large numpy arrays. These functions also accept file-like object instead of filenames.
     

- joblib.dump to serialize an object hierarchy 
- joblib.load to deserialize a data stream

In [1]:
file = "LAT_DTR.joblib"
model = DTR

save_model(model, file)

NameError: name 'DTR' is not defined

### Load DRT

In [20]:
file = 'LAT_DTR.joblib'

# Load the model from the file
DTR_from_joblib = load_model(file)

# Use the loaded model to make predictions
print("MSE from loaded model : {}".format(mean_squared_error(y_test, DTR_from_joblib.predict(X_test))))

MSE from loaded model : 113.87586213019428


### 3.7.1 KNeighborsRegressor

In [21]:
model = KNeighborsRegressor()

# print the full list of parametrizable parameters
model.get_params(deep=True)

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [22]:
model = KNeighborsRegressor()

#Setup the parameters and distributions to sample from: param_dist
cv_KNreg = 10

param_dist_KNreg = {# 'algorithm': 'auto',
                     'leaf_size': [1,2],#,4,6] ,
                    # 'metric': 'minkowski',
                    # 'metric_params': None,
                     'n_jobs': [-1],
                     'n_neighbors': [1,2,4,8,10] ,
                     #'p': [2]
                     'weights': ["uniform", "distance"]
                   }

KNreg = regressionModel_GSCV(model, cv_KNreg, param_dist_KNreg)
#KNreg = regressionModel_GSCV(model = model, cv = cv_DTR, param_dist = param_dist_KNreg)

Tuned Decision Parameters: {'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 4, 'weights': 'distance'}
Mean Square Error (MSE): 72.99491445150633


### save KNreg

**Pickled** model as a file using joblib: Joblib is the replacement of pickle as it is more efficient on objects that carry large numpy arrays. These functions also accept file-like object instead of filenames.
     

- joblib.dump to serialize an object hierarchy 
- joblib.load to deserialize a data stream

In [23]:
file = "LAT_KNreg.joblib"
model = KNreg

save_model(model, file)

['/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/LAT_KNreg.joblib']

### Load kNreg

In [24]:
file = 'LAT_KNreg.joblib'

# Load the model from the file
KNreg_from_joblib = load_model(file)

# Use the loaded model to make predictions
print("MSE from loaded model : {}".format(mean_squared_error(y_test, KNreg_from_joblib.predict(X_test))))

MSE from loaded model : 72.99491445150633


### 3.7.3 Multi-layer Perceptron regressor

In [25]:
model = MLPRegressor()

# print the full list of parametrizable parameters
model.get_params(deep=True)

{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 200,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [26]:
model = MLPRegressor()

#Setup the parameters and distributions to sample from: param_dist
cv_MLP = 10

param_dist_MLP ={'activation': ['logistic'], #‘identity’, ‘logistic’, ‘tanh’, ‘relu’
                 #'alpha': 0.0001,
                 #'batch_size': 'auto',
                 #'beta_1': 0.9,
                 #'beta_2': 0.999,
                 'early_stopping': [True] ,
                 #'epsilon': 1e-08,
                 #'hidden_layer_sizes': (100,),
                 'learning_rate': ['invscaling'],
                 'learning_rate_init': [0.01],
                 #'max_fun': 15000,
                 'max_iter': [600],
                 #'momentum': 0.9,
                 'n_iter_no_change': [5],
                 #'nesterovs_momentum': True,
                 #'power_t': 0.5,
                 'random_state': [1],
                 #'shuffle': True,
                 #'solver': 'adam',
                 #'tol': 0.0001,
                 #'validation_fraction': 0.1,
                 'verbose': [False] ,
                 #'warm_start': False
                }

MLP = regressionModel_GSCV(model, cv_MLP, param_dist_MLP)




Tuned Decision Parameters: {'activation': 'logistic', 'early_stopping': True, 'learning_rate': 'invscaling', 'learning_rate_init': 0.01, 'max_iter': 600, 'n_iter_no_change': 5, 'random_state': 1, 'verbose': False}
Mean Square Error (MSE): 23559693771685.45




### save DTR

**Pickled** model as a file using joblib: Joblib is the replacement of pickle as it is more efficient on objects that carry large numpy arrays. These functions also accept file-like object instead of filenames.
     

- joblib.dump to serialize an object hierarchy 
- joblib.load to deserialize a data stream

In [27]:
file = "LAT_MLP.joblib"
model = MLP

save_model(model, file)

['/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/LAT_MLP.joblib']

### Load MLP

In [28]:
file = 'LAT_MLP.joblib'

# Load the model from the file
MLP_from_joblib = load_model(file)

# Use the loaded model to make predictions
print("MSE from loaded model : {}".format(mean_squared_error(y_test, MLP_from_joblib.predict(X_test))))

MSE from loaded model : 23559693771685.45


### 3.7.4 Support Vector Regressor 

In [29]:
model = SVR()

# print the full list of parametrizable parameters
model.get_params(deep=True)

{'C': 1.0,
 'cache_size': 200,
 'coef0': 0.0,
 'degree': 3,
 'epsilon': 0.1,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [30]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.svm import SVR

model = SVR()

#Setup the parameters and distributions to sample from: param_dist
cv_SVR = 10

param_dist_SVR ={'C': [145],
                 #'cache_size': 200,
                 #'coef0': 0.0,
                 'degree': [2],
                 #'epsilon': 1,
                 #'gamma': 'scale',
                 'kernel': ['rbf'], #'rbf',
                 #'max_iter': -1,
                 #'shrinking': True,
                 #'tol': 0.001,
                 'verbose': [True]
                }

SVR = regressionModel_GSCV(model, cv_SVR, param_dist_SVR)


[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]Tuned Decision Parameters: {'C': 145, 'degree': 2, 'kernel': 'rbf', 'verbose': True}
Mean Square Error (MSE): 69.63717895715183


### save DTR

**Pickled** model as a file using joblib: Joblib is the replacement of pickle as it is more efficient on objects that carry large numpy arrays. These functions also accept file-like object instead of filenames.
     

- joblib.dump to serialize an object hierarchy 
- joblib.load to deserialize a data stream

In [31]:
file = "LAT_SVR.joblib"
model = SVR

save_model(model, file)

['/home/ale/Dropbox/UBIQUM/4.DeepAnalytics&Visualization/T4M3.WiFiLocationing/LAT_SVR.joblib']

### Load SVR

In [32]:
file = 'LAT_SVR.joblib'

# Load the model from the file
SVR_from_joblib = load_model(file)

# Use the loaded model to make predictions
print("MSE from loaded model : {}".format(mean_squared_error(y_test, SVR_from_joblib.predict(X_test))))

MSE from loaded model : 69.63717895715183
