# Prediction with Gradient Boosting

In [7]:
import joblib
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Define categorical and numerical features
categorical_features = ['Item', 'Area']
numerical_features = ['Pesticides', 'avg_temp', 'avg_precipitation']

# Load the saved model and preprocessor
gb_loaded = joblib.load('../models/gradient_boosting_model.pkl')
preprocessor = joblib.load('../models/preprocessor.pkl')  # Load the saved preprocessor

# Function to preprocess and predict new data
def predict_yield(new_data: pd.DataFrame):
    # Ensure column order matches training
    new_data = new_data[categorical_features + numerical_features]
    
    # Transform data using the pre-fitted preprocessor
    X_new_transformed = preprocessor.transform(new_data)
    
    # Predict using the loaded model
    predictions = gb_loaded.predict(X_new_transformed)
    
    return predictions

# Example: Making predictions on new data
new_data = pd.DataFrame({
    'Item': ['Cassava'],
    'Area': ['Angola'],
    'Pesticides': [70],
    'avg_temp': [24.1],
    'avg_precipitation': [1010]
})

predicted_yield = predict_yield(new_data)
print("Predicted Yield:", predicted_yield)


Predicted Yield: [36836.4172741]
