In [7]:
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
import joblib

In [14]:
def predict_all(input_data):
    """
    Predict organik_madde, saturasyon, ph, and classify the top 2 urun (products) using trained models and label encoders.

    Parameters:
    - input_data: A dictionary containing the input features with original string values.

    Returns:
    - A dictionary with both encoded and original values along with predictions.
    """

    # Load the models
    model_organik = xgb.Booster()
    model_organik.load_model('xgb_model_organik_madde.model')

    model_saturasyon = xgb.Booster()
    model_saturasyon.load_model('xgb_model_saturasyon.model')

    model_kirec = xgb.Booster()  # Replace ph with kirec
    model_kirec.load_model('xgb_model_kirec.model')

    xgb_model = XGBClassifier()
    xgb_model.load_model('xgb_classifier.model')  # Load your XGBClassifier model file

    # Load the label encoders
    label_encoder_ilce = joblib.load('label_encoder_ilce.pkl')
    label_encoder_tarim = joblib.load('label_encoder_tarim.pkl')
    label_encoder_urun = joblib.load('label_encoder_urun.pkl')

    # Ensure the loaded objects are LabelEncoders
    if not isinstance(label_encoder_ilce, LabelEncoder):
        raise Exception("label_encoder_ilce is not a LabelEncoder instance")
    if not isinstance(label_encoder_tarim, LabelEncoder):
        raise Exception("label_encoder_tarim is not a LabelEncoder instance")
    if not isinstance(label_encoder_urun, LabelEncoder):
        raise Exception("label_encoder_urun is not a LabelEncoder instance")

    # Store original string values before encoding
    original_ilce = input_data['ilce_encoded']
    original_tarim_sekli = input_data['tarim_sekli_encoded']

    # Encode the categorical values using the label encoders
    input_data['ilce_encoded'] = label_encoder_ilce.transform([input_data['ilce_encoded']])[0]
    input_data['tarim_sekli_encoded'] = label_encoder_tarim.transform([input_data['tarim_sekli_encoded']])[0]

    # Convert the input_data dictionary to a DataFrame for compatibility with DMatrix
    input_df = pd.DataFrame([input_data])

    # Step 1: Predict 'organik_madde'
    dmatrix_input = xgb.DMatrix(input_df[['ilce_encoded', 'tarim_sekli_encoded', 'potasyum', 'fosfor', 'ph']])  # Removed 'toplam_tuz'
    predicted_organik_madde = model_organik.predict(dmatrix_input)

    # Step 2: Predict 'saturasyon' using predicted 'organik_madde'
    input_df['organik_madde'] = predicted_organik_madde
    dmatrix_input_with_organik = xgb.DMatrix(input_df[['ilce_encoded', 'tarim_sekli_encoded', 'potasyum', 'fosfor', 'ph', 'organik_madde']])  # Removed 'toplam_tuz'
    predicted_saturasyon = model_saturasyon.predict(dmatrix_input_with_organik)

    # Step 3: Predict 'kirec' using predicted 'saturasyon' (replace ph with kirec)
    input_df['saturasyon'] = predicted_saturasyon
    dmatrix_input_with_saturasyon = xgb.DMatrix(input_df[['ilce_encoded', 'tarim_sekli_encoded', 'potasyum', 'fosfor', 'ph', 'saturasyon']])  # Removed 'toplam_tuz'
    predicted_kirec = model_kirec.predict(dmatrix_input_with_saturasyon)

    # Add the predictions back to the input_data dictionary
    input_data['organikMadde'] = predicted_organik_madde[0]
    input_data['saturasyon'] = predicted_saturasyon[0]
    input_data['kirec'] = predicted_kirec[0]  # Replaced 'ph' with 'kirec'

    # Step 4: Use the classification model to predict 'urun'
    input_df['kirec'] = predicted_kirec  # Replaced 'ph' with 'kirec'
    # Create the full feature set for classification
    classification_features = input_df[['ilce_encoded', 'tarim_sekli_encoded', 'potasyum', 'fosfor', 'ph', 'organik_madde', 'saturasyon', 'kirec']]  # Removed 'toplam_tuz'

    # # Predict the encoded product (urun) and get the probabilities
    class_probabilities = xgb_model.predict_proba(classification_features)

    # # Get the indices of the top 2 classes with highest probabilities
    top_2_indices = np.argsort(class_probabilities[0])[-2:][::-1]

    # # Decode the top 2 predicted 'urun' labels
    top_2_urun = label_encoder_urun.inverse_transform(top_2_indices)

    # # Add the top 2 predicted 'urun' to the input_data dictionary
    input_data['urun'] = top_2_urun.tolist()

    # Return both encoded and original versions of ilce and tarim sekli with predictions
    return {
        'ilce_encoded': input_data['ilce_encoded'],
        'ilce': original_ilce,
        'tarim_sekli_encoded': input_data['tarim_sekli_encoded'],
        'tarim_sekli': original_tarim_sekli,
        'potasyum': input_data['potasyum'],
        'fosfor': input_data['fosfor'],
        'ph': input_data['ph'],  # Changed to 'ph'
        'organikMadde': max(0, input_data['organikMadde']),
        'saturasyon':max(0, input_data['saturasyon']),
        'kirec': max(0, input_data['kirec']),  # Changed 'ph' to 'kirec'
        'urun': input_data['urun']  # Top 2 predicted products
    }


In [15]:
# Example of input data as a dictionary (original input with string values)
input_data = {
    'ilce_encoded': "aksu",  # Original string value
    'tarim_sekli_encoded': "sulu",  # Original string value
    'potasyum': 120,
    'fosfor': 70,
    'ph': 7.5,
}

# Call the unified function to predict the results
output_data = predict_all(input_data)

# Print the output data
print("Predicted Output Data:")
print(output_data)

Predicted Output Data:
{'ilce_encoded': 0, 'ilce': 'aksu', 'tarim_sekli_encoded': 1, 'tarim_sekli': 'sulu', 'potasyum': 120, 'fosfor': 70, 'ph': 7.5, 'organikMadde': 3.073654, 'saturasyon': 58.109818, 'kirec': 0, 'urun': ['biber', 'cim']}


