In [None]:
import pandas as pd
import joblib
import numpy as np
import matplotlib.pyplot as plt
import io
import base64
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

# Function to load the pre-trained model and scaler
def load_model():
    try:
        model = joblib.load("crop_yield_model.pkl")
        scaler = joblib.load("scaler.pkl")
        return model, scaler
    except FileNotFoundError:
        raise FileNotFoundError("Model or scaler file not found! Train and save your model first.")

# Function to clean dataset
def clean_data(data):
    data = data.dropna()
    data = data[(data["Pesticides_Usage"] >= 0) & (data["Avg_Temperature"].between(-50, 50)) & (data["Avg_Rainfall"] >= 0)]
    return data

# Function to calculate confidence intervals
def get_confidence_interval(model, X, confidence=0.95):
    preds = np.array([tree.predict(X) for tree in model.estimators_])
    lower = np.percentile(preds, (1 - confidence) / 2 * 100, axis=0)
    upper = np.percentile(preds, (1 + confidence) / 2 * 100, axis=0)
    return lower, upper

# Function to interpret the crop yield predictions
def interpret_yield(yield_value):
    if yield_value < 2:
        return "Low yield. Possible issues: insufficient pesticide use, extreme temperatures, or inadequate rainfall."
    elif 2 <= yield_value <= 5:
        return "Moderate yield. Consider fine-tuning pesticide application and irrigation."
    else:
        return "High yield. Conditions are good, but optimization can improve efficiency."

# Function to suggest improvements based on input conditions
def suggest_improvements(pesticides, temperature, rainfall):
    suggestions = []
    if pesticides < 50:
        suggestions.append("Increase pesticide usage to prevent potential crop diseases.")
    if temperature < 15 or temperature > 35:
        suggestions.append("Monitor temperature conditions. Extreme temperatures may affect crop growth.")
    if rainfall < 20 or rainfall > 100:
        suggestions.append("Adjust irrigation based on rainfall levels.")
    return "Suggested improvements: " + "; ".join(suggestions) if suggestions else "Current conditions are optimal."

# Function to make predictions on a single data point
def predict_yield(pesticides, temperature, rainfall):
    model, scaler = load_model()
    input_data = np.array([[pesticides, temperature, rainfall]])
    input_scaled = scaler.transform(input_data)
    predicted_yield = model.predict(input_scaled)[0]
    lower, upper = get_confidence_interval(model, input_scaled)
    yield_interpretation = interpret_yield(predicted_yield)
    improvements = suggest_improvements(pesticides, temperature, rainfall)
    return predicted_yield, lower[0], upper[0], yield_interpretation, improvements

# Function to predict yield for a dataset (CSV file)
def batch_predict_yield(csv_file):
    model, scaler = load_model()

    if not csv_file.endswith('.csv'):
        raise ValueError("Invalid file format! Please upload a CSV file.")

    data = pd.read_csv(csv_file)
    data = clean_data(data)

    required_columns = ["Pesticides_Usage", "Avg_Temperature", "Avg_Rainfall"]
    for col in required_columns:
        if col not in data.columns:
            raise KeyError(f"Missing column: {col} in the dataset")

    X_new_scaled = scaler.transform(data[required_columns])
    predictions = model.predict(X_new_scaled)
    lower, upper = get_confidence_interval(model, X_new_scaled)

    data["Predicted_Crop_Yield"] = predictions
    data["Lower_Bound"] = lower
    data["Upper_Bound"] = upper
    data["Yield_Interpretation"] = data["Predicted_Crop_Yield"].apply(interpret_yield)

    return data

# Function to display feature importance
def feature_importance():
    model, _ = load_model()
    feature_importances = model.feature_importances_
    feature_names = ["Pesticides_Usage", "Avg_Temperature", "Avg_Rainfall"]
    plt.figure(figsize=(6, 4))
    plt.barh(feature_names, feature_importances, color='skyblue')
    plt.xlabel("Importance Score")
    plt.ylabel("Feature")
    plt.title("Feature Importance in Yield Prediction")
    img = io.BytesIO()
    plt.savefig(img, format='png')
    img.seek(0)
    return img

# Function to train and save the model (Run this once)
def train_and_save_model():
    data = pd.read_csv
    data = clean_data(data)

    X = data[["Pesticides_Usage", "Avg_Temperature", "Avg_Rainfall"]]
    y = data["Crop_Yield"]

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_scaled, y)

    joblib.dump(model, "crop_yield_model.pkl")
    joblib.dump(scaler, "scaler.pkl")

    print("Model and scaler saved successfully!")

# Function for cross-validating the model
def cross_validate_model():
    try:
        data = pd.read_csv
        data = clean_data(data)

        X = data[["Pesticides_Usage", "Avg_Temperature", "Avg_Rainfall"]]
        y = data["Crop_Yield"]

        scaler = joblib.load("scaler.pkl")
        X_scaled = scaler.transform(X)

        model = joblib.load("crop_yield_model.pkl")
        kf = KFold(n_splits=5, shuffle=True, random_state=42)

        scores = cross_val_score(model, X_scaled, y, cv=kf, scoring='r2')
        mae = -cross_val_score(model, X_scaled, y, cv=kf, scoring='neg_mean_absolute_error')
        rmse = np.sqrt(-cross_val_score(model, X_scaled, y, cv=kf, scoring='neg_mean_squared_error'))

        print(f"Cross-validation R² scores: {scores}")
        print(f"Mean R² score: {np.mean(scores):.4f}")
        print(f"Mean Absolute Error (MAE): {np.mean(mae):.4f}")
        print(f"Root Mean Squared Error (RMSE): {np.mean(rmse):.4f}")

    except Exception as e:
        print(f"Error during cross-validation: {e}")

# Function for tuning hyperparameters with GridSearchCV
def tune_hyperparameters():
    try:
        data = pd.read_csv
        data = clean_data(data)

        X = data[["Pesticides_Usage", "Avg_Temperature", "Avg_Rainfall"]]
        y = data["Crop_Yield"]

        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        param_grid = {
            'n_estimators': [50, 100, 200],
            'max_depth': [None, 10, 20],
            'min_samples_split': [2, 5, 10]
        }

        model = RandomForestRegressor(random_state=42)
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='r2', n_jobs=-1)
        grid_search.fit(X_scaled, y)

        print(f"Best Parameters: {grid_search.best_params_}")
        print(f"Best R² Score: {grid_search.best_score_:.4f}")

        joblib.dump(grid_search.best_estimator_, "crop_yield_model.pkl")
        joblib.dump(scaler, "scaler.pkl")
        print("Tuned model and scaler saved successfully!")

    except Exception as e:
        print(f"Error during hyperparameter tuning: {e}")

# Main function to test predictions
def main():
    print("Choose an option:")
    print("1. Test with a CSV file")
    print("2. Manual input")
    print("3. Train & Save Model (Run once)")
    print("4. Cross-validate the model")
    print("5. Tune hyperparameters")

    choice = input("Enter 1, 2, 3, 4, or 5: ")

    if choice == '1':
        file_path = input("Enter the CSV file path: ")
        try:
            results = batch_predict_yield(file_path)
            print(results)
        except Exception as e:
            print(f"Error: {e}")

    elif choice == '2':
        try:
            pesticides = float(input("Enter Pesticide Usage: "))
            temperature = float(input("Enter Average Temperature: "))
            rainfall = float(input("Enter Average Rainfall: "))
            predicted_yield, lower, upper, interpretation, improvements = predict_yield(pesticides, temperature, rainfall)
            print(f"Predicted Yield: {predicted_yield:.2f} tons/hectare")
            print(f"Confidence Interval: ({lower:.2f}, {upper:.2f})")
            print(interpretation)
            print(improvements)
        except ValueError:
            print("Invalid input. Please enter numeric values.")

    elif choice == '3':
        train_and_save_model()

    elif choice == '4':
        cross_validate_model()

    elif choice == '5':
        tune_hyperparameters()

    else:
        print("Invalid choice. Please enter 1, 2, 3, 4, or 5.")

# Run the script
if __name__ == "__main__":
    main()


Choose an option:
1. Test with a CSV file
2. Manual input
3. Train & Save Model (Run once)
4. Cross-validate the model
5. Tune hyperparameters


Enter 1, 2, 3, 4, or 5:  2
Enter Pesticide Usage:  66
Enter Average Temperature:  55
Enter Average Rainfall:  44


Predicted Yield: 89352.17 tons/hectare
Confidence Interval: (81138.00, 98881.51)
High yield. Conditions are good, but optimization can improve efficiency.
Suggested improvements: Monitor temperature conditions. Extreme temperatures may affect crop growth.


