In [28]:
import joblib
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score
from tensorflow.keras.models import load_model

# 1. Crop Recommendation
print("📊 Crop Recommendation Model Evaluation")
model_crop = joblib.load("../models/crop_rec.pkl")
df_crop   = pd.read_csv("../../../data/Crop_recommendation.csv")
X = df_crop.drop("label", axis=1)
y = df_crop["label"]
_, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
acc = accuracy_score(y_test, model_crop.predict(X_test))
print(f"🌾 Crop Accuracy: {acc*100:.2f}%\n")


# 2. Fertilizer Recommendation
# 🧪 Fertilizer Recommendation Model Evaluation
print("🧪 Evaluating Fertilizer Recommendation Model...")
model_fert = joblib.load("../models/fert_rec.pkl")

df_fert = pd.read_csv("../../../data/data_core.csv")

# 1) Rename to match training names exactly:
df_fert = df_fert.rename(columns={
    "Nitrogen": "N",
    "Phosphorous": "P",
    "Potassium": "K",
    "Humidity": "humidity",
    "Moisture": "rainfall",
    "Temparature": "temperature",  # fix spelling
    # drop “Soil Type” or anything not used by model
})

# 2) Subset to exactly the features the model expects:
expected = list(model_fert.feature_names_in_)
print("Model expects features:", expected)
X = df_fert[expected]  # keeps only these columns, in the correct order

# 3) Identify label column:
label_col = "Fertilizer Name"  # as you discovered before
y = df_fert[label_col]

# 4) Split & evaluate
_, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
acc = accuracy_score(y_test, model_fert.predict(X_test))
print(f"🧪 Fertilizer Accuracy: {acc*100:.2f}%")





# 3. Disease Detection (CNN)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model

print("🌿 Evaluating Disease Detection Model (CNN)…")

# 1) Load your trained CNN
model_disease = load_model("../models/disease_cnn.h5")

# 2) Define the correct directory
#   - Notebook is at src/app/testing/
#   - Augmented images are at notebooks/data/Plant_leave_diseases_dataset_with_augmentation/
test_dir = "../../../notebooks/data/Plant_leave_diseases_dataset_with_augmentation"

# 3) Setup generator & evaluate
test_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

test_generator = test_datagen.flow_from_directory(
    directory=test_dir,
    target_size=(224, 224),   # same as your training
    batch_size=32,
    class_mode="categorical",
    subset="validation",      # use the 20% split created by validation_split
    shuffle=False
)

loss, acc = model_disease.evaluate(test_generator, verbose=1)
print(f"🌿 Disease Detection Accuracy: {acc*100:.2f}%")




# 4. Market Price Prediction (optional)
print("📈 Market Dashboard Data Check…\n")

# Load all three cleaned tables
area       = pd.read_csv("../../../market_price/data/area.csv")
prices     = pd.read_csv("../../../market_price/data/prices.csv")
production = pd.read_csv("../../../market_price/data/production.csv")

# 1) Show their heads
print("▶️ area.csv (first 5 rows):")
print(area.head(), "\n")

print("▶️ prices.csv (first 5 rows):")
print(prices.head(), "\n")

print("▶️ production.csv (first 5 rows):")
print(production.head(), "\n")

# 2) Basic sanity checks
print("▶️ area.csv columns:", area.columns.tolist())
print("▶️ prices.csv columns:", prices.columns.tolist())
print("▶️ production.csv columns:", production.columns.tolist(), "\n")

# 3) Price summary
if "modal_price" in prices.columns:
    print("▶️ modal_price stats:\n", prices["modal_price"].describe(), "\n")
else:
    # If your prices file uses a different column name, print that one
    price_cols = [c for c in prices.columns if "price" in c.lower()]
    print(f"▶️ Found price columns: {price_cols}")
    for col in price_cols:
        print(f"{col} stats:\n", prices[col].describe(), "\n")

# 4) Unique commodities & markets for the sidebar
print("▶️ Unique commodities:", prices["commodity_name"].unique())
print("▶️ Unique markets:", prices["market_name"].unique())

print("\n✅ Market data looks good! Your Streamlit dashboard should be able to load these files.")

📊 Crop Recommendation Model Evaluation
🌾 Crop Accuracy: 100.00%

🧪 Evaluating Fertilizer Recommendation Model...
Model expects features: ['N', 'P', 'K', 'temperature', 'humidity', 'rainfall']




🧪 Fertilizer Accuracy: 83.25%
🌿 Evaluating Disease Detection Model (CNN)…
Found 12289 images belonging to 39 classes.


  self._warn_if_super_not_called()


[1m385/385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 202ms/step - accuracy: 0.7685 - loss: 2.4317
🌿 Disease Detection Accuracy: 73.97%
📈 Market Dashboard Data Check…

▶️ area.csv (first 5 rows):
  state_name         district     year commodity_name    area
0  Karnataka         BAGALKOT  2001-02       Arecanut     4.0
1  Karnataka         BAGALKOT  2003-04       Arecanut     6.0
2  Karnataka  BANGALORE RURAL  2001-02       Arecanut  1828.0
3  Karnataka  BANGALORE RURAL  2003-04       Arecanut  1998.0
4  Karnataka          BELGAUM  2003-04       Arecanut     2.0 

▶️ prices.csv (first 5 rows):
       commodity_name     market_name arrival_date  modal_price
0       Alasande+Gram       Bagalakot   22-05-2019        46.46
1       Alasande+Gram       Bangalore   22-05-2019        50.50
2       Alasande+Gram      Laxmeshwar   22-05-2019        36.54
3            Antawala  Chikkamagalore   22-05-2019        20.00
4  Arhar+Dal(Tur+Dal)       Bangalore   22-05-2019        83.00 

▶