In [28]:
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from math import sqrt
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

# Load the DataFrame
df = pd.read_csv("soilmoisture_dataset.csv", index_col=0)
# Remove empty rows
df = df.dropna()
# Reset the index without adding the old index as a column
df.reset_index(drop=True, inplace=True)
# Drop unnecessary columns
df = df.drop(['soil_temperature', 'datetime'], axis=1)


# Split 90% of the data into `train_val` and 10% into `test`
train_val, test = train_test_split(df, test_size=0.2, random_state=42)
X_cross_val = train_val.drop('soil_moisture', axis=1)
y_cross_val = train_val['soil_moisture']
X_test2 = test.drop('soil_moisture', axis=1)
y_test2 = test['soil_moisture']


# Define the adjusted R-squared function
def adjR2(r2, n_features, n_samples):
    return 1 - (1 - r2) * (n_samples - 1) / (n_samples - n_features - 1)


# Create an ElasticNet model (L1 and L2 regularization)
elastic_net = ElasticNet(alpha=0.01, l1_ratio=0.5, random_state=42, max_iter=10000)  # Adjust alpha and l1_ratio as needed
# Create the SVR model
svr_model = SVR(kernel='rbf', C=10000, gamma='scale')


k = 15  # number of total features to compare
# Initialize a list to store results
results = []
# Use ElasticNet for feature selection and train the SVR model on the resulting dataset
for n in range(k):
    # Select the first n+1 features for ElasticNet
    selected_columns = X_cross_val.columns[:n + 1]  # Selecting the first n+1 feature
    # Ensure all selected columns exist in data
    missing_columns = [col for col in selected_columns if col not in X_cross_val.columns]
    if missing_columns:
        print(f"Missing columns in data for iteration {n + 1}: {missing_columns}")
        continue  # Skip iteration if columns are missing

    # Fit ElasticNet on the data to select important features
    elastic_net.fit(X_cross_val[selected_columns], y_cross_val)
    # Get the non-zero coefficients as the selected features
    selected_features = [col for col, coef in zip(selected_columns, elastic_net.coef_) if coef != 0]
    #print(f"Selected features based on ElasticNet (iteration {n + 1}):", selected_features)
    # Perform 10-fold cross-validation with the selected features
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    rmse_list = []
    adjR_list = []


    for train_index, test_index in kf.split(X_cross_val):
        # Split the data into training and testing sets using only the selected features
        X_train, X_test = X_cross_val.iloc[train_index][selected_features], X_cross_val.iloc[test_index][selected_features]
        y_train, y_test = y_cross_val.iloc[train_index], y_cross_val.iloc[test_index]

        # Scale the features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)  # Fit and transform on training data
        X_test_scaled = scaler.transform(X_test)  # Transform the test data

        # Train the SVR model on the scaled data
        svr_model.fit(X_train_scaled, y_train)
        y_pred = svr_model.predict(X_test_scaled)

        # Calculating mean squared error
        mse = mean_squared_error(y_test, y_pred)
        rmse_list.append(sqrt(mse))

        # Calculating adjusted R squared
        r2 = r2_score(y_test, y_pred)
        adjR_list.append(adjR2(r2, X_test.shape[1], X_test.shape[0]))

    # Average RMSE across folds
    average_rmse = sum(rmse_list) / len(rmse_list)
    average_adjR = sum(adjR_list) / len(adjR_list)

    # Print results
    print(n+1, ": Average RMSE:", round(average_rmse, 4), "Average Adjusted R-squared:", round(average_adjR, 4))

    # Append the results to the list
    results.append({
        'Iteration': n + 1,
        'Selected Columns': ', '.join(selected_features),  # Store the selected features as a string
        'Average RMSE': round(average_rmse, 4),
        'Average Adjusted R²': round(average_adjR, 4)
    })

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Save results to Excel
results_df.to_excel("ElasticNet_results.xlsx", index=False)

print("Results saved to 'ElasticNet_SVR_results.xlsx'")


1 : Average RMSE: 2.4292 Average Adjusted R-squared: 0.5194
2 : Average RMSE: 2.2154 Average Adjusted R-squared: 0.5876
3 : Average RMSE: 2.1302 Average Adjusted R-squared: 0.6136
4 : Average RMSE: 1.6171 Average Adjusted R-squared: 0.7707
5 : Average RMSE: 1.5414 Average Adjusted R-squared: 0.7872
6 : Average RMSE: 1.467 Average Adjusted R-squared: 0.8008
7 : Average RMSE: 1.4733 Average Adjusted R-squared: 0.7958
8 : Average RMSE: 1.4865 Average Adjusted R-squared: 0.7854
9 : Average RMSE: 1.5777 Average Adjusted R-squared: 0.7504
10 : Average RMSE: 1.5895 Average Adjusted R-squared: 0.7411
11 : Average RMSE: 1.5492 Average Adjusted R-squared: 0.7475
12 : Average RMSE: 1.5941 Average Adjusted R-squared: 0.7278
13 : Average RMSE: 1.554 Average Adjusted R-squared: 0.7367
14 : Average RMSE: 1.4686 Average Adjusted R-squared: 0.7537
15 : Average RMSE: 1.3693 Average Adjusted R-squared: 0.7802
Results saved to 'ElasticNet_SVR_results.xlsx'


In [None]:
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from math import sqrt
import warnings
import numpy as np

warnings.filterwarnings("ignore", category=FutureWarning)

# Load the DataFrame
df = pd.read_csv("soilmoisture_dataset.csv", index_col=0)
# Remove empty rows
df = df.dropna()
# Reset the index without adding the old index as a column
df.reset_index(drop=True, inplace=True)
# Drop unnecessary columns
df = df.drop(['soil_temperature', 'datetime'], axis=1)


# Split 90% of the data into `train_val` and 10% into `test`
train_val, test = train_test_split(df, test_size=0.2, random_state=42)
X_cross_val = train_val.drop('soil_moisture', axis=1)
y_cross_val = train_val['soil_moisture']
X_test2 = test.drop('soil_moisture', axis=1)
y_test2 = test['soil_moisture']


# Define the adjusted R-squared function
def adjR2(r2, n_features, n_samples):
    return 1 - (1 - r2) * (n_samples - 1) / (n_samples - n_features - 1)


# Create an ElasticNet model (L1 and L2 regularization)
elastic_net = ElasticNet(alpha=0.001, l1_ratio=0.5, random_state=42, max_iter=100000)  # Adjust alpha and l1_ratio as needed
# Create the SVR model
svr_model = SVR(kernel='rbf', C=10000, gamma='scale')


k = 15  # number of total features to compare
# Initialize a list to store results
results = []
# Use ElasticNet for feature selection and train the SVR model on the resulting dataset
for n in range(k):
    # Select the first n+1 features for ElasticNet
    selected_columns = X_cross_val.columns[:n + 1]  # Selecting the first n+1 feature
    # Ensure all selected columns exist in data
    missing_columns = [col for col in selected_columns if col not in X_cross_val.columns]
    if missing_columns:
        print(f"Missing columns in data for iteration {n + 1}: {missing_columns}")
        continue  # Skip iteration if columns are missing

# Perform ElasticNet on the entire dataset to select features
elastic_net.fit(X_cross_val, y_cross_val)

# Rank features by the absolute value of their coefficients
feature_ranking = np.argsort(np.abs(elastic_net.coef_))[::-1]
selected_features = X_cross_val.columns[feature_ranking].tolist()

# Iterate through top 'n' ranked features
for n in range(1, k + 1):
    current_features = selected_features[:n]
    print(f"Evaluating top {n} features: {current_features}")

    # Perform 10-fold cross-validation
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    rmse_list = []
    adjR_list = []

    for train_index, test_index in kf.split(X_cross_val):
        X_train, X_val = X_cross_val.iloc[train_index][current_features], X_cross_val.iloc[test_index][current_features]
        y_train, y_val = y_cross_val.iloc[train_index], y_cross_val.iloc[test_index]

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)

        svr_model.fit(X_train_scaled, y_train)
        y_pred = svr_model.predict(X_val_scaled)

        mse = mean_squared_error(y_val, y_pred)
        rmse_list.append(sqrt(mse))

        r2 = r2_score(y_val, y_pred)
        adjR_list.append(adjR2(r2, len(current_features), len(y_val)))

    average_rmse = np.mean(rmse_list)
    average_adjR = np.mean(adjR_list)

    results.append({
        'Number of Features': n,
        'Selected Columns': ', '.join(current_features),
        'Average RMSE': round(average_rmse, 4),
        'Average Adjusted R²': round(average_adjR, 4)
    })




Evaluating top 1 features: ['950']
Evaluating top 2 features: ['950', '946']
Evaluating top 3 features: ['950', '946', '942']
Evaluating top 4 features: ['950', '946', '942', '938']
Evaluating top 5 features: ['950', '946', '942', '938', '494']
Evaluating top 6 features: ['950', '946', '942', '938', '494', '498']
Evaluating top 7 features: ['950', '946', '942', '938', '494', '498', '462']
Evaluating top 8 features: ['950', '946', '942', '938', '494', '498', '462', '506']
Evaluating top 9 features: ['950', '946', '942', '938', '494', '498', '462', '506', '934']
Evaluating top 10 features: ['950', '946', '942', '938', '494', '498', '462', '506', '934', '502']
Evaluating top 11 features: ['950', '946', '942', '938', '494', '498', '462', '506', '934', '502', '490']
Evaluating top 12 features: ['950', '946', '942', '938', '494', '498', '462', '506', '934', '502', '490', '486']
Evaluating top 13 features: ['950', '946', '942', '938', '494', '498', '462', '506', '934', '502', '490', '486', '4

PermissionError: [Errno 13] Permission denied: 'ElasticNet_results.xlsx'

In [35]:

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Save results to Excel
results_df.to_excel("ElasticNet_results.xlsx", index=False)

print("Results saved to 'ElasticNet_SVR_results.xlsx'")

Results saved to 'ElasticNet_SVR_results.xlsx'
