In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

# File path for dataset
DATA_FILE_PATH = "soil_conditions_data.csv"

# Load dataset
df = pd.read_csv(DATA_FILE_PATH)

# Function to encode categorical labels
def encode_labels(df, columns):
    label_encoders = {}
    for col in columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le  # Store encoders for inverse transform if needed
    return df, label_encoders

# Encode categorical labels
categorical_columns = ["Erosion_Level", "Irrigation_Need", "Soil_Condition"]
df, label_encoders = encode_labels(df, categorical_columns)

# Define features and targets
features = ["Soil_pH", "Moisture_Content(%)", "Soil_Temperature(°C)", "Nitrogen_Content(ppm)", "Phosphorus_Content(ppm)", "Potassium_Content(ppm)"]
targets = {
    "Erosion_Level": df["Erosion_Level"],
    "Irrigation_Need": df["Irrigation_Need"],
    "Soil_Condition": df["Soil_Condition"],
}

# Function to train and evaluate models
def train_and_evaluate_model(X, y, target_name):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Define the model
    model = RandomForestClassifier(random_state=42)
    
    # Hyperparameter tuning using GridSearchCV
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }
    grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    
    # Best model
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    
    print(f"\nModel Performance for {target_name}:")
    print("Best Parameters:", grid_search.best_params_)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    
    return best_model

# Train and evaluate models for each target
models = {}
for target_name, y in targets.items():
    models[target_name] = train_and_evaluate_model(df[features], y, target_name)

# Save trained models
for target_name, model in models.items():
    joblib.dump(model, f"{target_name}_model.pkl")
    print(f"Model for {target_name} saved as {target_name}_model.pkl")


Model Performance for Erosion_Level:
Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00        44
           2       1.00      1.00      1.00        49

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100


Model Performance for Irrigation_Need:
Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy: 0.99
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        36
           1       0.98      1.00      0.99        64

    accuracy                           0.99       100
   macro avg       0.99      0.99      0.99       100
weighted avg       0.99      0.99      0.99       100


Model Performance for