In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

SEED = 42
IMG_SIZE = (256, 256)
BATCH = 32

SOURCE = "/kaggle/input/paddy-disease-classification/train_images"
TRAIN = "/kaggle/working/train"
VAL   = "/kaggle/working/val"

# ------------------------------------
# 1. Train/Validation Split (80/20)
# ------------------------------------
from sklearn.model_selection import train_test_split
import shutil

os.makedirs(TRAIN, exist_ok=True)
os.makedirs(VAL, exist_ok=True)

for cls in os.listdir(SOURCE):
    cls_path = os.path.join(SOURCE, cls)
    if not os.path.isdir(cls_path): continue

    files = os.listdir(cls_path)
    train_f, val_f = train_test_split(files, test_size=0.2, random_state=SEED)

    os.makedirs(os.path.join(TRAIN, cls), exist_ok=True)
    os.makedirs(os.path.join(VAL, cls), exist_ok=True)

    for f in train_f:
        shutil.copy(os.path.join(cls_path, f), os.path.join(TRAIN, cls, f))
    for f in val_f:
        shutil.copy(os.path.join(cls_path, f), os.path.join(VAL, cls, f))

print("Split done.")

# ------------------------------------
# 2. Image Generators
# ------------------------------------
train_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=25,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2]
)

val_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_ds = train_gen.flow_from_directory(
    TRAIN,
    target_size=IMG_SIZE,
    batch_size=BATCH,
    class_mode="categorical",
    shuffle=True,
    seed=SEED
)

val_ds = val_gen.flow_from_directory(
    VAL,
    target_size=IMG_SIZE,
    batch_size=BATCH,
    class_mode="categorical",
    shuffle=False
)

NUM_CLASSES = len(train_ds.class_indices)

# ------------------------------------
# 3. DenseNet121 Model
# ------------------------------------
base = DenseNet121(
    weights="imagenet",
    include_top=False,
    input_shape=(256, 256, 3)
)
base.trainable = False   # freeze base for stability in JEI setting

model = Sequential([
    base,
    GlobalAveragePooling2D(),
    BatchNormalization(),
    Dense(256, activation="relu"),
    BatchNormalization(),
    Dense(NUM_CLASSES, activation="softmax")
])

model.compile(
    optimizer=Adam(1e-4),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# ------------------------------------
# 4. Train Model
# ------------------------------------
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=[early_stop],
    verbose=2
)

model.save("/kaggle/working/densenet_bio_tool.h5")
print("DenseNet model saved!")


2025-11-23 08:53:26.108019: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763888006.308713      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763888006.370605      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Split done.
Found 8323 images belonging to 10 classes.
Found 2084 images belonging to 10 classes.


I0000 00:00:1763888093.807383      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1763888093.808139      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/15


I0000 00:00:1763888118.973333      76 service.cc:148] XLA service 0x7fe7c4003ce0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1763888118.974144      76 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1763888118.974164      76 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1763888121.801160      76 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1763888133.201875      76 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


261/261 - 212s - 812ms/step - accuracy: 0.3511 - loss: 1.9764 - val_accuracy: 0.4765 - val_loss: 1.5834
Epoch 2/15
261/261 - 150s - 576ms/step - accuracy: 0.5363 - loss: 1.4165 - val_accuracy: 0.5787 - val_loss: 1.2915
Epoch 3/15
261/261 - 148s - 569ms/step - accuracy: 0.6028 - loss: 1.2151 - val_accuracy: 0.6392 - val_loss: 1.1218
Epoch 4/15
261/261 - 154s - 592ms/step - accuracy: 0.6392 - loss: 1.1070 - val_accuracy: 0.6751 - val_loss: 1.0091
Epoch 5/15
261/261 - 147s - 562ms/step - accuracy: 0.6752 - loss: 1.0117 - val_accuracy: 0.7025 - val_loss: 0.9293
Epoch 6/15
261/261 - 150s - 576ms/step - accuracy: 0.6957 - loss: 0.9496 - val_accuracy: 0.7212 - val_loss: 0.8898
Epoch 7/15
261/261 - 149s - 569ms/step - accuracy: 0.7065 - loss: 0.9012 - val_accuracy: 0.7260 - val_loss: 0.8500
Epoch 8/15
261/261 - 149s - 571ms/step - accuracy: 0.7214 - loss: 0.8371 - val_accuracy: 0.7361 - val_loss: 0.8248
Epoch 9/15
261/261 - 147s - 564ms/step - accuracy: 0.7449 - loss: 0.7962 - val_accuracy: 0.

In [2]:
# ============================================================
# FINAL, FULLY-CORRECTED SEVERITY EXTRACTION PIPELINE
# ============================================================
import numpy as np
import tensorflow as tf
import pandas as pd
import cv2
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.densenet import preprocess_input
import matplotlib.pyplot as plt

# --------------------------
# 1. LOAD MODEL
# --------------------------
model_path = "/kaggle/working/densenet_bio_tool.h5"
model = load_model(model_path)
print("Model loaded!")

# Force graph build
_ = model.predict(np.zeros((1,256,256,3)))
print("Model graph initialized!")

# Split into backbone + head
backbone = model.layers[0]       # DenseNet121
gap_layer = model.layers[1]
bn1       = model.layers[2]
dense1    = model.layers[3]
bn2       = model.layers[4]
dense2    = model.layers[5]

# --------------------------
# 2. LOAD METADATA
# --------------------------
meta = pd.read_csv("/kaggle/input/paddy-disease-classification/train.csv")

# --------------------------
# 3. LOAD VAL IMAGES
# --------------------------
IMG_SIZE = (256,256)
VAL_DIR = "/kaggle/working/val"

datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

val_ds = datagen.flow_from_directory(
    VAL_DIR,
    target_size=IMG_SIZE,
    class_mode=None,
    shuffle=False,
    batch_size=1
)

index_to_class = {v:k for k,v in val_ds.class_indices.items()}

# --------------------------
# 4. FULLY-WORKING GRAD-CAM
# --------------------------
def get_gradcam(img_array, layer_name="conv5_block16_concat"):

    # Build model: DenseNet input -> conv layer + backbone output
    grad_model = tf.keras.models.Model(
        inputs=backbone.input,
        outputs=[
            backbone.get_layer(layer_name).output,
            backbone.output
        ]
    )

    with tf.GradientTape() as tape:
        conv_out, backbone_feats = grad_model(img_array)

        # Manually pass through classification head
        x = gap_layer(backbone_feats)
        x = bn1(x, training=False)
        x = dense1(x)
        x = bn2(x, training=False)
        preds = dense2(x)

        class_idx = tf.argmax(preds[0])
        class_score = preds[:, class_idx]

    # Compute gradients
    grads = tape.gradient(class_score, conv_out)
    pooled_grads = tf.reduce_mean(grads, axis=(0,1,2))

    conv_out = conv_out[0]

    # Weighted sum of activation maps
    heatmap = tf.reduce_sum(conv_out * pooled_grads, axis=-1)

    heatmap = np.maximum(heatmap, 0)
    heatmap /= (np.max(heatmap) + 1e-10)

    return heatmap   # <- FIXED

# --------------------------
# 5. SEVERITY EXTRACTION LOOP
# --------------------------
records = []

for i in range(len(val_ds)):
    img = val_ds[i]  # shape (1,256,256,3)

    # prediction
    preds = model.predict(img, verbose=0)
    pred_idx = np.argmax(preds)
    pred_label = index_to_class[pred_idx]

    # filename → metadata mapping
    filename = val_ds.filenames[i]
    img_id = filename.split("/")[-1]

    row = meta[meta["image_id"] == img_id]
    if row.empty:
        continue

    age = int(row["age"].values[0])
    true_label = row["label"].values[0]

    # Grad-CAM heatmap
    heatmap = get_gradcam(img)

    # Convert to mask
    heatmap_resized = cv2.resize(heatmap, IMG_SIZE)
    mask = (heatmap_resized > 0.4).astype(np.uint8)
    severity = mask.sum() / mask.size

    records.append([img_id, age, true_label, pred_label, severity])

    if i % 200 == 0:
        print(f"Processed {i} images...")

# --------------------------
# 6. SAVE CSV
# --------------------------
df = pd.DataFrame(records, columns=["image_id", "age", "true_label", "pred_label", "severity"])
df.to_csv("/kaggle/working/severity_scores.csv", index=False)

print("\nSeverity extraction complete!")
df.head()


Model loaded!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step
Model graph initialized!
Found 2084 images belonging to 10 classes.
Processed 0 images...
Processed 200 images...
Processed 400 images...
Processed 600 images...
Processed 800 images...
Processed 1000 images...
Processed 1200 images...
Processed 1400 images...
Processed 1600 images...
Processed 1800 images...
Processed 2000 images...

Severity extraction complete!


Unnamed: 0,image_id,age,true_label,pred_label,severity
0,100169.jpg,65,bacterial_leaf_blight,bacterial_leaf_blight,0.565765
1,100365.jpg,45,bacterial_leaf_blight,blast,0.095276
2,100382.jpg,45,bacterial_leaf_blight,brown_spot,0.434875
3,100513.jpg,65,bacterial_leaf_blight,bacterial_leaf_blight,0.565765
4,100632.jpg,45,bacterial_leaf_blight,bacterial_leaf_blight,0.484528


In [3]:
# ================================
# JEI ANALYSIS + PLOTS CELL
# ================================

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# ---------------------------------------
# 1. LOAD SEVERITY CSV
# ---------------------------------------
csv_path = "/kaggle/working/severity_scores.csv"
assert os.path.exists(csv_path), "CSV not found! Did you run severity extraction?"
df = pd.read_csv(csv_path)

df['age'] = pd.to_numeric(df['age'], errors='coerce')
df['severity'] = pd.to_numeric(df['severity'], errors='coerce')
df = df.dropna(subset=['age','severity'])

print("Rows:", len(df))
print(df.head())

# ---------------------------------------
# 2. SUMMARY STATISTICS
# ---------------------------------------
print("\n=== SUMMARY ===")
print(df[['age','severity']].describe())

print("\nAge range:", df['age'].min(), "-", df['age'].max())
print("Severity range:", df['severity'].min(), "-", df['severity'].max())

# ---------------------------------------
# 3. CORRELATIONS (JEI-SAFE)
# ---------------------------------------
pearson_r, pearson_p = stats.pearsonr(df['age'], df['severity'])
spearman_r, spearman_p = stats.spearmanr(df['age'], df['severity'])

print(f"\nPearson r = {pearson_r:.4f}, p = {pearson_p:.4g}")
print(f"Spearman rho = {spearman_r:.4f}, p = {spearman_p:.4g}")

# ---------------------------------------
# 4. SIMPLE LINEAR REGRESSION
# ---------------------------------------
slope, intercept, r_val, p_val, std_err = stats.linregress(df['age'], df['severity'])
print(f"\nLinear regression: severity = {slope:.6f}*age + {intercept:.6f}")
print(f"R² = {r_val**2:.4f}, p = {p_val:.4g}")

# ---------------------------------------
# 5. KRUSKAL–WALLIS (JEI-friendly ANOVA)
# ---------------------------------------
groups = [grp['severity'].values for name, grp in df.groupby('true_label')]
kw_stat, kw_p = stats.kruskal(*groups)
print(f"\nKruskal–Wallis H = {kw_stat:.4f}, p = {kw_p:.4g}")

# ---------------------------------------
# 6. AGE BINS
# ---------------------------------------
bins = [0,20,40,60,80,200]
labels = ['0–20','21–40','41–60','61–80','81+']
df['age_bin'] = pd.cut(df['age'], bins=bins, labels=labels, include_lowest=True)

print("\nSeverity by age bin:")
print(df.groupby('age_bin').severity.agg(['count','mean','median','std']))

# ---------------------------------------
# 7. PLOT DIRECTORY
# ---------------------------------------
out_dir = "/kaggle/working/jei_plots"
os.makedirs(out_dir, exist_ok=True)

# ---------------------------------------
# 8. FIGURE 1: Age vs Severity (scatter + line)
# ---------------------------------------
plt.figure(figsize=(6,5))
plt.scatter(df['age'], df['severity'], alpha=0.35)
x = np.linspace(df['age'].min(), df['age'].max(), 300)
y = intercept + slope*x
plt.plot(x, y)
plt.xlabel("Plant Age (days)")
plt.ylabel("Severity (fraction of leaf area)")
plt.title("Age vs Severity (scatter + trendline)")
plt.tight_layout()
plt.savefig(f"{out_dir}/figure_age_vs_severity.png", dpi=150)
plt.close()

# ---------------------------------------
# 9. FIGURE 2: Severity by disease (top diseases)
# ---------------------------------------
counts = df['true_label'].value_counts()
diseases = counts[counts >= 20].index.tolist()  # only stable categories

if len(diseases) > 0:
    data = [df[df['true_label']==d]['severity'].values for d in diseases]
    plt.figure(figsize=(10,4))
    plt.boxplot(data, labels=diseases, vert=True, showfliers=False)
    plt.xticks(rotation=60)
    plt.ylabel("Severity")
    plt.title("Severity distribution across disease types")
    plt.tight_layout()
    plt.savefig(f"{out_dir}/figure_severity_by_disease.png", dpi=150)
    plt.close()

# ---------------------------------------
# 10. FIGURE 3: Mean severity per disease
# ---------------------------------------
mean_sev = df.groupby('true_label').severity.mean().sort_values(ascending=False)
plt.figure(figsize=(9,4))
plt.bar(mean_sev.index, mean_sev.values)
plt.xticks(rotation=60)
plt.ylabel("Mean Severity")
plt.title("Mean severity per disease type")
plt.tight_layout()
plt.savefig(f"{out_dir}/figure_mean_severity.png", dpi=150)
plt.close()

# ---------------------------------------
# 11. FIGURE 4: Age-bin severity
# ---------------------------------------
agebin_stats = df.groupby('age_bin').severity.mean()
plt.figure(figsize=(6,4))
plt.bar(agebin_stats.index.astype(str), agebin_stats.values)
plt.xlabel("Age Bin (days)")
plt.ylabel("Mean Severity")
plt.title("Severity vs Plant Age Group")
plt.tight_layout()
plt.savefig(f"{out_dir}/figure_agebin_severity.png", dpi=150)
plt.close()

print("\nAll plots saved in:", out_dir)
print("Done!")


Rows: 2084
     image_id  age             true_label             pred_label  severity
0  100169.jpg   65  bacterial_leaf_blight  bacterial_leaf_blight  0.565765
1  100365.jpg   45  bacterial_leaf_blight                  blast  0.095276
2  100382.jpg   45  bacterial_leaf_blight             brown_spot  0.434875
3  100513.jpg   65  bacterial_leaf_blight  bacterial_leaf_blight  0.565765
4  100632.jpg   45  bacterial_leaf_blight  bacterial_leaf_blight  0.484528

=== SUMMARY ===
               age     severity
count  2084.000000  2084.000000
mean     63.750000     0.273944
std       9.083037     0.172880
min      45.000000     0.000000
25%      57.000000     0.146893
50%      67.000000     0.233810
75%      70.000000     0.376171
max      82.000000     0.921097

Age range: 45 - 82
Severity range: 0.0 - 0.9210968017578124

Pearson r = 0.0187, p = 0.3936
Spearman rho = 0.0422, p = 0.05383

Linear regression: severity = 0.000356*age + 0.251259
R² = 0.0003, p = 0.3936

Kruskal–Wallis H = 225.760

  print(df.groupby('age_bin').severity.agg(['count','mean','median','std']))
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()



All plots saved in: /kaggle/working/jei_plots
Done!


  agebin_stats = df.groupby('age_bin').severity.mean()
