# with autoencoder

In [1]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import ReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.utils import plot_model
import matplotlib

import tensorflow as tf 
#from matplotlib import pyplot as plt

# define dataset
#X, y = make_regression(n_samples=1000, n_features=100, n_informative=10, noise=0.1, random_state=1)
# number of input columns
n_inputs = X.shape[1]
# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)
# define encoder
visible = Input(shape=(n_inputs,))
e = Dense(n_inputs*2)(visible)
e = BatchNormalization()(e)
e = ReLU()(e)
# define bottleneck
n_bottleneck = n_inputs
bottleneck = Dense(n_bottleneck)(e)
# define decoder
d = Dense(n_inputs*2)(bottleneck)
d = BatchNormalization()(d)
d = ReLU()(d)

# output layer
output = Dense(n_inputs, activation='linear')(d)
# define autoencoder model
model = Model(inputs=visible, outputs=output)
# compile autoencoder model
model.compile(optimizer='adam', loss='mse')
# plot the autoencoder
plot_model(model, 'autoencoder.png', show_shapes=True)
# fit the autoencoder model to reconstruct input
history = model.fit(X_train, X_train, epochs=400, batch_size=16, verbose=2, validation_data=(X_test,X_test))

# plot loss
#plt.plot(history.history['loss'], label='train')
#plt.plot(history.history['val_loss'], label='test')
#plt.legend()
#plt.show()
# define an encoder model (without the decoder)

encoder = Model(inputs=visible, outputs=bottleneck)


converter = tf.lite.TFLiteConverter.from_keras_model(encoder)
tflite_model = converter.convert()

# Save the model.
with open('encoder_model.tflite', 'wb') as f:
    f.write(tflite_model)


plot_model(encoder, 'encoder.png', show_shapes=True)
# save the encoder to file
encoder.save('model\\encoder.h5')

NameError: name 'X' is not defined

In [20]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import load_model
# define dataset
#X, y = make_regression(n_samples=1000, n_features=100, n_informative=10, noise=0.1, random_state=1)
# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
# reshape target variables so that we can transform them
y_train = y_train.reshape((len(y_train), 1))
y_test = y_test.reshape((len(y_test), 1))
# scale input data
trans_in = MinMaxScaler()
trans_in.fit(X_train)
X_train = trans_in.transform(X_train)
X_test = trans_in.transform(X_test)
# scale output data
trans_out = MinMaxScaler()
trans_out.fit(y_train)
y_train = trans_out.transform(y_train)
y_test = trans_out.transform(y_test)
# load the model from file

#encoder =tf.lite.TFLiteConverter.from_keras_model(export_dir)


#encoder = load_model('encoder.h5')
# encode the train data
X_train_encode = encoder.predict(X_train)
# encode the test data
X_test_encode = encoder.predict(X_test)
# define model
model = SVR()
# fit model on the training dataset
model.fit(X_train_encode, y_train)
# make prediction on test set
yhat = model.predict(X_test_encode)
# invert transforms so we can calculate errors
yhat = yhat.reshape((len(yhat), 1))

yhat = trans_out.inverse_transform(yhat)
y_test = trans_out.inverse_transform(y_test)
# calculate error
score = mean_absolute_error(y_test, yhat)
print(score)

0.07115929080016616


  y = column_or_1d(y, warn=True)


# with a multilayer perceptron regressor

In [80]:
def CompareRegressors(datapath): 
    import numpy as np
    import pandas as pd
    
    from sklearn.neural_network import MLPRegressor    
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import GradientBoostingRegressor
    from sklearn.gaussian_process import GaussianProcessRegressor
    
    from sklearn.gaussian_process.kernels import RBF
    from sklearn.gaussian_process.kernels import ExpSineSquared
    from sklearn.gaussian_process.kernels import RationalQuadratic
    from sklearn.gaussian_process.kernels import ConstantKernel
    from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

        
    df=pd.read_excel(datapath)
    #C:\articles\to_upload\Vizcarra
    #print(df)

    X_raw = (df["S2_NDVI"].to_numpy())
    #print(X_raw.shape)
    X=np.reshape(X_raw,(X_raw.shape[0], 1))
    y=(df["drone_NDVI"].to_numpy()).ravel()

    X_train, X_test, Y_train, Y_test = train_test_split(X, y)
    regresssion_models=[]
    
    r_score=0
    
    ############################## MLPR ###################################################
    for activation_function in ("identity", "logistic", "tanh", "relu"):
        
        reg = MLPRegressor(random_state=1, max_iter=500,activation=activation_function).fit(X_train, Y_train)
        regresssion_models.append(("MLP",activation_function,reg,reg.score(X_test, Y_test)))
    
    
    ########################  "BOOSTED REGRESSION TREES"  ###############################
    for n_estimators in (20,50,100,200,300,400,500):
    
        reg = GradientBoostingRegressor(n_estimators=n_estimators,
                                    max_depth=3,
                                    learning_rate=0.1,
                                    min_samples_split=3)
        reg.fit(X_train, Y_train)
        regresssion_models.append(("BOOSTED REGRESSION TREES",n_estimators, reg, reg.score(X_test, Y_test)))
        
        
    ##################"Gaussian Process Regressor"############################
    kernel_types=[]
    kernel1 = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0))
    kernel2= 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1, alpha_bounds=(1e-5, 1e15))
    kernel3= 1.0 * ExpSineSquared(length_scale=1.0,periodicity=3.0,length_scale_bounds=(0.1, 10.0),periodicity_bounds=(1.0, 10.0),)
    kernel4 = DotProduct() + WhiteKernel()
    
    kernel_types.append(kernel1)
    kernel_types.append(kernel2)
    kernel_types.append(kernel3)
    kernel_types.append(kernel4)
    
    for kernel in kernel_types:
    
        reg = GaussianProcessRegressor(kernel=kernel, random_state=0)
        reg.fit(X_train, Y_train)
        # R^2 score for the model (on the test data)
        regresssion_models.append(("Gaussian Process Regressor",kernel, reg, reg.score(X_test, Y_test)))
        
    print(len(regression_models))
        
    ########################now, save the best model ##############################
    
    import joblib
    max_r_score=0
    for i in range(len(regression_models)):
        print(regression_models[i][3])
        model_r=float(regression_models[i][3])
        if(model_r > max_r_score):
            max_r_score=model_r
        
        
    for k in range(len(regression_models)): 
        if(regression_models[k][3] == max_r_score):
            # this is our best model
            print(regression_models[k])
            joblib.dump(regression_models[k][2],"best_regr_model.jl")
            break
        
    return regresssion_models[k]

In [81]:
datapath='C:\\articles\\to_upload\\with_soil_Viz_MB.xlsx'

regression_models=CompareRegressors(datapath)
#len(regression_models)

15
0.7948526650519683
-0.5925448634435388
0.7869562619389425
0.678582293665494
0.9203309220632988
0.9242615812785122
0.9018344483293508
0.8914942888921626
0.8897262630495237
0.8891312580453189
0.8889003191495918
0.9418523209364604
0.7958215990507918
0.6455117591573742
0.9121056644932372
('Gaussian Process Regressor', 1**2 * RBF(length_scale=1), GaussianProcessRegressor(alpha=1e-10, copy_X_train=True,
                         kernel=1**2 * RBF(length_scale=1),
                         n_restarts_optimizer=0, normalize_y=False,
                         optimizer='fmin_l_bfgs_b', random_state=0), 0.9418523209364604)


In [65]:
regression_models[:][3]

0.9530842281978945

In [43]:
print(len(regression_models))
for i in range (len(regression_models)):
    print(type(regression_models[i]))

4
<class 'str'>
<class 'sklearn.gaussian_process.kernels.Sum'>
<class 'sklearn.gaussian_process._gpr.GaussianProcessRegressor'>
<class 'numpy.float64'>


In [44]:
regression_models[3][3]

IndexError: invalid index to scalar variable.

In [6]:
import pandas as pd
df=pd.read_excel("C:\\articles\\to_upload\\Vizcarra\\Vizcarra_no_soil_grouped_NDVI.xlsx")

In [43]:
from sklearn.gaussian_process.kernels import ExpSineSquared
from sklearn.gaussian_process import GaussianProcessRegressor
#import matplotlib.pyplot as plt

kernel = 1.0 * ExpSineSquared(
    length_scale=1.0,
    periodicity=3.0,
    length_scale_bounds=(0.1, 10.0),
    periodicity_bounds=(1.0, 10.0),
)
gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)

#fig, axs = plt.subplots(nrows=2, sharex=True, sharey=True, figsize=(10, 8))

# plot prior
#plot_gpr_samples(gpr, n_samples=n_samples, ax=axs[0])
#axs[0].set_title("Samples from prior distribution")

# plot posterior
gpr.fit(X_train, y_train)
#plot_gpr_samples(gpr, n_samples=n_samples, ax=axs[1])
#axs[1].scatter(X_train[:, 0], y_train, color="red", zorder=10, label="Observations")
#axs[1].legend(bbox_to_anchor=(1.05, 1.5), loc="upper left")
#axs[1].set_title("Samples from posterior distribution")

#fig.suptitle("Periodic kernel", fontsize=18)
#plt.tight_layout()

ModuleNotFoundError: No module named 'matplotlib.pyplot'