In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import regularizers

# Load and prepare the dataset
df = pd.read_csv("soilmoisture_dataset.csv", index_col=0)
df = df.dropna().reset_index(drop=True)
X = df.drop(['soil_temperature', 'datetime', 'soil_moisture'], axis=1)
y = df['soil_moisture']

# Standardize features
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

# Define the autoencoder model
def create_autoencoder(input_dim, encoding_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(encoding_dim, activation='leaky_relu', activity_regularizer=regularizers.l2(0.01))(input_layer)
    decoded = Dense(input_dim, activation='linear')(encoded)
    autoencoder = Model(inputs=input_layer, outputs=decoded)
    encoder = Model(inputs=input_layer, outputs=encoded)  # Encoder model for dimensionality reduction
    return autoencoder, encoder

# Set the target number of features for dimensionality reduction
# Perform PLS and model training in a loop
for n_features in range(1, 16): 
    n_features = n_features  # Set desired number of reduced dimensions

    # Build and compile the autoencoder
    input_dim = X_standardized.shape[1]
    autoencoder, encoder = create_autoencoder(input_dim, n_features)
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')

    # Train the autoencoder
    autoencoder.fit(X_standardized, X_standardized, epochs=100, batch_size=32, shuffle=True, validation_split=0.3, verbose=0)

    # Use the encoder to transform data to lower-dimensional space
    X_reduced = encoder.predict(X_standardized)

    # Convert reduced features to DataFrame
    df_reduced = pd.DataFrame(X_reduced, columns=[f'feature_{i+1}' for i in range(n_features)])

    # Split data for regression
    X_train, X_test, y_train, y_test = train_test_split(df_reduced, y, test_size=0.5, random_state=42)

    # Train regression model on reduced features
    model = SVR(kernel='rbf', C=1000, gamma='scale')
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)

    # Calculate RMSE and Adjusted R-squared
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    def adjusted_r2_score(y_true, y_pred, n_features):
        r2 = r2_score(y_true, y_pred)
        n = len(y_true)
        adj_r2 = 1 - (1 - r2) * (n - 1) / (n - n_features - 1)
        return adj_r2

    adj_r2 = adjusted_r2_score(y_test, y_pred, n_features)

    print(f"Iteration {n_features} (using {n_features} latent features):")
    print(f"RMSE: {rmse}")
    print(f"Adjusted R-squared: {adj_r2}")
    print("-" * 40)


Iteration 1 (using 1 latent features):
RMSE: 2.2395145912846335
Adjusted R-squared: 0.6337600260092544
----------------------------------------
Iteration 2 (using 2 latent features):
RMSE: 2.0393444014405686
Adjusted R-squared: 0.6954028039969973
----------------------------------------
Iteration 3 (using 3 latent features):
RMSE: 1.2224937361533703
Adjusted R-squared: 0.8902186265819485
----------------------------------------
Iteration 4 (using 4 latent features):
RMSE: 1.2383695969916597
Adjusted R-squared: 0.8870124979402823
----------------------------------------
Iteration 5 (using 5 latent features):
RMSE: 1.3941550012545223
Adjusted R-squared: 0.8563682978494614
----------------------------------------
Iteration 6 (using 6 latent features):
RMSE: 1.1208463590597013
Adjusted R-squared: 0.9068843692840189
----------------------------------------
Iteration 7 (using 7 latent features):
RMSE: 1.2235837235964913
Adjusted R-squared: 0.8886977481967464
---------------------------------

In [43]:
# Print a summary of the autoencoder architecture
print("Autoencoder Model Summary:")
autoencoder.summary()


Autoencoder Model Summary:
Model: "model_516"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_259 (InputLayer)      [(None, 125)]             0         
                                                                 
 dense_524 (Dense)           (None, 15)                1890      
                                                                 
 dense_525 (Dense)           (None, 125)               2000      
                                                                 
Total params: 3,890
Trainable params: 3,890
Non-trainable params: 0
_________________________________________________________________


In [None]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42, shuffle=True)

# Define the dimensionality of the input and the compressed representation
input_dim = 125  # e.g., for a 28x28 image flattened to a vector
encoding_dim = 15  # target dimensionality for reduced data

# Encoder
input_layer = Input(shape=(input_dim,))
encoded = Dense(128, activation='relu')(input_layer)
encoded = Dense(64, activation='relu')(encoded)
encoded_output = Dense(encoding_dim, activation='relu')(encoded)

# Decoder (to reconstruct the original input)
decoded = Dense(64, activation='relu')(encoded_output)
decoded = Dense(128, activation='relu')(decoded)
decoded_output = Dense(input_dim, activation='sigmoid')(decoded)

# Combine Encoder and Decoder into an Autoencoder Model
autoencoder = Model(inputs=input_layer, outputs=decoded_output)

# Separate Encoder Model (for dimensionality reduction)
encoder = Model(inputs=input_layer, outputs=encoded_output)


In [None]:
# Compile the model
autoencoder.compile(optimizer='adam', loss='mse')

# Fit the model to the data
history = autoencoder.fit(X_train, X_train,
                          epochs=50,
                          batch_size=256,
                          shuffle=True,
                          validation_data=(X_test, X_test))
