In [14]:
from joblib import load
import pandas as pd
from sklearn.model_selection import train_test_split

# Step 1: Load the saved model
model_path = '../models/trained/mdmc_modelv5.2.joblib'  # Replace with your actual path
pipeline = load(model_path)
print("Model loaded successfully!")

# Step 2: Load your data (assuming the dataset is in CSV format for this example)
data_path = '../data/test_data.csv'  # Replace with the actual path to your dataset
data = pd.read_csv(data_path)

# Step 3: Feature engineering (add derived features as expected by the model)
data['pH_squared'] = data['pH'] ** 2
data['DO_EC_ratio'] = data['Dissolved Oxygen'] / data['Electrical Conductivity']
data['NPK_sum'] = data['Nitrogen'] + data['Phosphorus'] + data['Potassium']

# Step 4: Split your data into features (X) and target (y)
X = data.drop('Label', axis=1)  # Replace 'Label' with your actual target column name
y = data['Label']  # Replace with your actual target column name

# Step 5: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example prediction
print("\nExample prediction:")

# Select an example row from the test data
example = X_test.iloc[0:1]  # Use the first row of the test data for prediction (as a DataFrame)

# Predict the label for this example
prediction = pipeline.predict(example)

# Output the feature values and the predicted label
print(f"Water quality parameters: {example.to_dict(orient='records')[0]}")
print(f"Predicted label: {'Great for lettuce' if prediction[0] == 1 else 'Not great for lettuce'}")


Model loaded successfully!

Example prediction:
Water quality parameters: {'pH': 6.101115011743209, 'Temperature': 20.591670111852693, 'Dissolved Oxygen': 7.962072844310212, 'Electrical Conductivity': 2.184233026512157, 'Turbidity': 1.0467102793432796, 'Nitrogen': 10.45227288910538, 'Phosphorus': 3.2236519310393126, 'Potassium': 18.584657285442727, 'Water Hardness': 77.74425485152653, 'pH_squared': 37.22360438651834, 'DO_EC_ratio': 3.6452488116729325, 'NPK_sum': 32.26058210558742}
Predicted label: Great for lettuce
