# Import Libraries and Setup

In [14]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import warnings
from PIL import Image
import joblib
import json
from pathlib import Path
from tqdm import tqdm

# Suppress warnings
warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow logging
tf.get_logger().setLevel('ERROR')  # Suppress TensorFlow logging

# Set random seeds for reproducibility
import random
random.seed(45)
tf.random.set_seed(45)
np.random.seed(45)

# Define Constants

In [15]:
img_height = 256
img_width = 256
batch_size = 32
image_size = 72
patch_size = 6
num_patches = (image_size // patch_size) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [
    projection_dim * 2,
    projection_dim,
]
transformer_layers = 8
mlp_head_units = [2048, 1024]
input_shape = (256, 256, 3)
num_classes = 10  # Number of classes for disease classification

# Custom layers for Importing Model Weights

In [16]:
# Custom layers needed for the models
class Patches(layers.Layer):
    def __init__(self, patch_size, **kwargs):
        super(Patches, self).__init__(**kwargs)
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches
    
    def get_config(self):
        config = super().get_config()
        config.update({"patch_size": self.patch_size})
        return config
    
    @classmethod
    def from_config(cls, config):
        patch_size = config.pop("patch_size")
        return cls(patch_size=patch_size, **config)

class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim, **kwargs):
        super(PatchEncoder, self).__init__(**kwargs)
        self.num_patches = num_patches
        self.projection_dim = projection_dim
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "num_patches": self.num_patches,
            "projection_dim": self.projection_dim
        })
        return config
    
    @classmethod
    def from_config(cls, config):
        num_patches = config.pop("num_patches")
        projection_dim = config.pop("projection_dim")
        return cls(num_patches=num_patches, projection_dim=projection_dim, **config)

# Define Model Architecture for Importing Weights

In [17]:
# Helper function for the ViT model
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

##  ViT Model for Disease Classification (Task 1)

In [18]:
def create_vit_disease_classifier():
    # Create a standalone normalization layer
    normalization = layers.Normalization()
    
    # Create data augmentation separately
    data_augmentation = keras.Sequential(
        [
            layers.Resizing(image_size, image_size),
            layers.RandomFlip("horizontal"),
            layers.RandomRotation(factor=0.02),
            layers.RandomZoom(height_factor=0.2, width_factor=0.2),
        ],
        name="data_augmentation",
    )
    
    inputs = layers.Input(shape=input_shape)

    # Normalize data
    normalized = normalization(inputs)

    # Augment data
    augmented = data_augmentation(normalized)

    # Create patches
    patches = Patches(patch_size)(augmented)

    # Encode patches
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block
    for _ in range(transformer_layers):
        # Layer normalization 1
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    # Add MLP
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    # Classify outputs
    logits = layers.Dense(num_classes, activation='softmax', name='disease_output')(features)
    # Create the Keras model
    model = keras.Model(inputs=inputs, outputs=logits)
    return model

## ViT Model for Variety Classification (Task 2)

In [19]:
def create_vit_variety_classifier():
    # Create a standalone normalization layer
    normalization = layers.Normalization()
    
    # Create data augmentation separately
    data_augmentation = keras.Sequential(
        [
            layers.Resizing(image_size, image_size),
            layers.RandomFlip("horizontal"),
            layers.RandomRotation(factor=0.02),
            layers.RandomZoom(height_factor=0.2, width_factor=0.2),
        ],
        name="data_augmentation",
    )
    
    inputs = layers.Input(shape=input_shape)

    # Normalize data
    normalized = normalization(inputs)

    # Augment data
    augmented = data_augmentation(normalized)

    # Create patches
    patches = Patches(patch_size)(augmented)

    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    # Add MLP.
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    # Classify outputs
    logits = layers.Dense(num_classes, activation='softmax', name='variety_output')(features)
    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=logits)
    return model

## Function to create the ViT model for Age Regression (Task 3)

In [20]:
def create_vit_regressor():
    # Create data augmentation
    data_augmentation = keras.Sequential(
        [
            layers.Normalization(),
            layers.Resizing(image_size, image_size),
            layers.RandomFlip("horizontal"),
            layers.RandomRotation(factor=0.02),
            layers.RandomZoom(height_factor=0.2, width_factor=0.2),
        ],
        name="data_augmentation",
    )
    
    inputs = layers.Input(shape=input_shape)
    
    # Augment data
    augmented = data_augmentation(inputs)
    
    # Create patches
    patches = Patches(patch_size)(augmented)
    
    # Encode patches
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block
    for _ in range(transformer_layers):
        # Layer normalization 1
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        
        # Create a multi-head attention layer
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        
        # Skip connection 1
        x2 = layers.Add()([attention_output, encoded_patches])
        
        # Layer normalization 2
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        
        # MLP
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        
        # Skip connection 2
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    
    # Add MLP
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    
    # Output layer for regression (single neuron for age)
    output = layers.Dense(1, activation='linear')(features)
    
    # Create the Keras model
    model = keras.Model(inputs=inputs, outputs=output)
    return model

## Function to preprocess image for disease and variety models

In [21]:
 def preprocess_image(image_path):
    """Process image for model prediction"""
    try:
        img = Image.open(image_path).convert('RGB')
        img = img.resize((img_width, img_height))
        img_array = np.array(img) / 255.0
        img_array = np.expand_dims(img_array, axis=0)
        return img_array
    except Exception as e:
        print(f"Error preprocessing image: {e}")
        return None

# Function to preprocess image for age model
def preprocess_age_image(image_path):
    try:
        img = Image.open(image_path).convert('RGB')
        img = img.resize((img_width, img_height))
        img_array = np.array(img)
        img_array = np.expand_dims(img_array, axis=0)
        return img_array
    except Exception as e:
        print(f"Error preprocessing image: {e}")
        return None

# ModelHandler class to manage model loading and predictions

In [22]:
class PaddyModelHandler:
    def __init__(self):
        self.home_path = os.getcwd()
        self.models_path = os.path.join(self.home_path, 'paddy_models')
        os.makedirs(self.models_path, exist_ok=True)
        
        # Define paths for ensemble model storage
        self.kfold_models_path = os.path.join(self.models_path, 'kfold_models')
        os.makedirs(self.kfold_models_path, exist_ok=True)
        
        print("Loading encoders and models...")
        # Load label encoders
        self.load_encoders()
        
        # Load models
        self.load_models()
        
    def load_encoders(self):
        """Load label encoders for predictions"""
        try:
            # Try to load variety encoder
            variety_encoder_path = 'variety_label_encoder.joblib'
            if os.path.exists(variety_encoder_path):
                self.variety_encoder = joblib.load(variety_encoder_path)
                print(f"Loaded variety encoder with {len(self.variety_encoder.classes_)} classes")
                self.num_varieties = len(self.variety_encoder.classes_)
            else:
                # Create fallback encoder
                from sklearn.preprocessing import LabelEncoder
                self.variety_encoder = LabelEncoder()
                # Use example variety classes
                self.variety_encoder.classes_ = np.array(['ADT45', 'Ariete', 'B40', 'BRS10', 'BRS30', 'BRS43', 
                                                 'Cirad141', 'Csl3', 'IET1444', 'Khazar', 'MTL119', 
                                                 'MTU1010', 'Pusa44', 'Spandana', 'TeqingMarshal', 'Varalu'])
                self.num_varieties = len(self.variety_encoder.classes_)
                print(f"Created fallback variety encoder with {self.num_varieties} classes")
            
            # Load or create disease encoder
            disease_encoder_path = 'disease_label_encoder.joblib'
            if os.path.exists(disease_encoder_path):
                self.disease_encoder = joblib.load(disease_encoder_path)
                self.disease_classes = self.disease_encoder.classes_
                self.num_diseases = len(self.disease_classes)
                print(f"Loaded disease encoder with {self.num_diseases} classes")
            else:
                # Try to infer from folder structure
                if os.path.exists('train_images'):
                    disease_folders = [d for d in os.listdir('train_images') 
                                    if os.path.isdir(os.path.join('train_images', d))]
                    if disease_folders:
                        self.disease_classes = sorted(disease_folders)
                        self.num_diseases = len(self.disease_classes)
                        print(f"Found {self.num_diseases} disease classes from folders")
                    else:
                        # Default disease classes from assignment
                        self.disease_classes = ['tungro', 'bacterial_leaf_blight', 'bacterial_leaf_streak', 
                                            'bacterial_panicle_blight', 'blast', 'brown_spot', 
                                            'dead_heart', 'downy_mildew', 'hispa', 'normal']
                        self.num_diseases = len(self.disease_classes)
                        print(f"Using default {self.num_diseases} disease classes")
                else:
                    # Default disease classes
                    self.disease_classes = ['tungro', 'bacterial_leaf_blight', 'bacterial_leaf_streak', 
                                        'bacterial_panicle_blight', 'blast', 'brown_spot', 
                                        'dead_heart', 'downy_mildew', 'hispa', 'normal']
                    self.num_diseases = len(self.disease_classes)
                    print(f"Using default {self.num_diseases} disease classes")
                
                # Create a LabelEncoder for diseases
                from sklearn.preprocessing import LabelEncoder
                self.disease_encoder = LabelEncoder()
                self.disease_encoder.fit(self.disease_classes)
            
            # Load or create age statistics for normalization
            age_stats_path = os.path.join(self.models_path, 'age_stats_kfold.json')
            if os.path.exists(age_stats_path):
                with open(age_stats_path, 'r') as f:
                    self.age_stats = json.load(f)
                print(f"Loaded age statistics: mean={self.age_stats['mean']}, std={self.age_stats['std']}")
            else:
                # Create fallback stats
                self.age_stats = {
                    'mean': 64.0436244835207,  # Example value
                    'std': 8.9582253420494     # Example value
                }
                # Save the fallback stats
                with open(age_stats_path, 'w') as f:
                    json.dump(self.age_stats, f)
                print(f"Created fallback age statistics: mean={self.age_stats['mean']}, std={self.age_stats['std']}")
            
        except Exception as e:
            print(f"Error loading encoders: {e}")
            # Set fallback values
            self.num_varieties = 16
            self.num_diseases = 10
            from sklearn.preprocessing import LabelEncoder
            self.variety_encoder = LabelEncoder()
            self.variety_encoder.classes_ = np.array(['ADT45', 'Ariete', 'B40', 'BRS10', 'BRS30', 'BRS43', 
                                             'Cirad141', 'Csl3', 'IET1444', 'Khazar', 'MTL119', 
                                             'MTU1010', 'Pusa44', 'Spandana', 'TeqingMarshal', 'Varalu'])
            
            self.disease_encoder = LabelEncoder()
            self.disease_classes = ['tungro', 'bacterial_leaf_blight', 'bacterial_leaf_streak', 
                                  'bacterial_panicle_blight', 'blast', 'brown_spot', 
                                  'dead_heart', 'downy_mildew', 'hispa', 'normal']
            self.disease_encoder.fit(self.disease_classes)
            
            self.age_stats = {
                'mean': 64.0436244835207,
                'std': 8.9582253420494
            }
    
    def load_models(self):
        """Load trained models or create fallbacks"""
        try:
            # -------- DISEASE MODEL (Task 1) --------
            print("Creating disease classification model...")
            self.disease_model = create_vit_disease_classifier()
            self.disease_model.compile(
                optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy']
            )
            
            # Check if weights are available
            disease_weights_path = os.path.join(self.models_path, 'vit_label_weights.weights.h5')
            if os.path.exists(disease_weights_path):
                print(f"Loading disease model weights from {disease_weights_path}")
                try:
                    self.disease_model.load_weights(disease_weights_path)
                    self.disease_model_loaded = True
                    print("Successfully loaded disease model weights")
                except Exception as e:
                    print(f"Error loading disease model weights: {e}")
                    self.disease_model_loaded = False
            else:
                print("Disease model weights not found. Using uninitialized model.")
                self.disease_model_loaded = False
            
            # -------- VARIETY MODEL (Task 2) --------
            print("Creating variety classification model...")
            self.variety_model = create_vit_variety_classifier()
            self.variety_model.compile(
                optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy']
            )
            
            # Check if weights are available
            variety_weights_path = os.path.join(self.models_path, 'vit_variety_weights.weights.h5')
            if os.path.exists(variety_weights_path):
                print(f"Loading variety model weights from {variety_weights_path}")
                try:
                    self.variety_model.load_weights(variety_weights_path)
                    self.variety_model_loaded = True
                    print("Successfully loaded variety model weights")
                except Exception as e:
                    print(f"Error loading variety model weights: {e}")
                    self.variety_model_loaded = False
            else:
                print("Variety model weights not found. Using uninitialized model.")
                self.variety_model_loaded = False
            
            # -------- AGE MODELS (ENSEMBLE) (Task 3) --------
            print("Loading age regression ensemble models...")
            self.age_ensemble_models = []
            k_folds = 3  # Number of models in the ensemble
            
            for fold in range(1, k_folds + 1):
                # Create a fresh model
                age_model = create_vit_regressor()
                
                # Compile model
                optimizer = keras.optimizers.Adam(learning_rate=0.001)
                age_model.compile(
                    optimizer=optimizer,
                    loss='mean_absolute_error',
                    metrics=['mae', 'mse']
                )
                
                # Load weights
                age_weights_path = os.path.join(self.kfold_models_path, f'best_vit_age_model_fold_{fold}.weights.h5')
                
                if os.path.exists(age_weights_path):
                    age_model.load_weights(age_weights_path)
                    print(f"Loaded weights for age model fold {fold}")
                    
                    # Add to ensemble with normalization stats
                    self.age_ensemble_models.append((age_model, self.age_stats['mean'], self.age_stats['std']))
                else:
                    print(f"Warning: Could not find weights for age model fold {fold}")
            
            # Check if at least one age model was loaded
            if self.age_ensemble_models:
                self.age_model_loaded = True
                print(f"Successfully loaded {len(self.age_ensemble_models)} age models for ensemble")
            else:
                # Create a single fallback model
                fallback_age_model = create_vit_regressor()
                fallback_age_model.compile(
                    optimizer='adam',
                    loss='mean_absolute_error',
                    metrics=['mae']
                )
                self.age_ensemble_models = [(fallback_age_model, self.age_stats['mean'], self.age_stats['std'])]
                self.age_model_loaded = False
                print("No age model weights found. Using uninitialized fallback model.")
            
            print("Models initialized successfully")
            
        except Exception as e:
            print(f"Error loading models: {e}")
            # Set flags to indicate models couldn't be loaded
            self.variety_model_loaded = False
            self.disease_model_loaded = False
            self.age_model_loaded = False
            
    def predict_diseases(self, image_path):
        """Predict disease for an image"""
        img_array = preprocess_image(image_path)
        
        if self.disease_model_loaded and img_array is not None:
            disease_pred = self.disease_model.predict(img_array, verbose=0)
            top_disease_index = np.argmax(disease_pred[0])
            return self.disease_classes[top_disease_index]
        else:
            # Return a default prediction if the model isn't loaded
            return "normal"
    
    def predict_variety(self, image_path):
        """Predict variety for an image"""
        img_array = preprocess_image(image_path)
        
        if self.variety_model_loaded and img_array is not None:
            variety_pred = self.variety_model.predict(img_array, verbose=0)
            top_variety_index = np.argmax(variety_pred[0])
            return self.variety_encoder.classes_[top_variety_index]
        else:
            # Return a default prediction if the model isn't loaded
            return "Ariete"
    
    def predict_age(self, image_path):
        """Predict age for an image"""
        if self.age_model_loaded and self.age_ensemble_models:
            age_img_array = preprocess_age_image(image_path)
            
            # Make predictions with each model in the ensemble
            all_age_predictions = []
            
            for model, age_mean, age_std in self.age_ensemble_models:
                predictions_norm = model.predict(age_img_array, verbose=0)
                predictions_original = predictions_norm.flatten() * age_std + age_mean
                all_age_predictions.append(predictions_original)
            
            # Average predictions across all models in the ensemble
            ensemble_age_prediction = np.mean(all_age_predictions, axis=0)[0]
            age_pred = int(round(ensemble_age_prediction))  # Round to nearest integer
            return str(age_pred)
        else:
            # Return a default prediction if the model isn't loaded
            return "65"

# Main function to run predictions and create submission

In [23]:
def run_predictions():
    # Initialize the model handler
    model_handler = PaddyModelHandler()
    
    # Define paths
    home_path = os.getcwd()
    test_images_path = os.path.join(home_path, 'test_images')
    
    # Load the template submission CSV
    submission_template = pd.read_csv('prediction_submission.csv')
    
    # Check if test images directory exists
    if not os.path.exists(test_images_path):
        print(f"Error: Test images directory not found at {test_images_path}")
        return
    
    print(f"Processing {len(submission_template)} test images...")
    
    # Process each image and make predictions
    for i, row in tqdm(submission_template.iterrows(), total=len(submission_template)):
        image_id = row['image_id']
        image_path = os.path.join(test_images_path, image_id)
        
        # Skip if image doesn't exist
        if not os.path.exists(image_path):
            print(f"Warning: Image {image_id} not found in test_images directory")
            continue
        
        # Make predictions for each task
        disease_pred = model_handler.predict_diseases(image_path)
        variety_pred = model_handler.predict_variety(image_path)
        age_pred = model_handler.predict_age(image_path)
        
        # Update the submission template
        submission_template.at[i, 'label'] = disease_pred
        submission_template.at[i, 'variety'] = variety_pred
        submission_template.at[i, 'age'] = age_pred
    
    # Save the submission file
    submission_template.to_csv('prediction_submission.csv', index=False)
    print(f"Predictions completed and saved to prediction_submission.csv")

if __name__ == "__main__":
    run_predictions()

Loading encoders and models...
Loaded variety encoder with 10 classes
Loaded disease encoder with 10 classes
Loaded age statistics: mean=64.0436244835207, std=8.9582253420494
Creating disease classification model...
Loading disease model weights from c:\Users\ThinkPad\Desktop\COSC2753_A2_MachineLearning\paddy_models\vit_label_weights.weights.h5
Successfully loaded disease model weights
Creating variety classification model...
Loading variety model weights from c:\Users\ThinkPad\Desktop\COSC2753_A2_MachineLearning\paddy_models\vit_variety_weights.weights.h5
Successfully loaded variety model weights
Loading age regression ensemble models...
Loaded weights for age model fold 1
Loaded weights for age model fold 2
Loaded weights for age model fold 3
Successfully loaded 3 age models for ensemble
Models initialized successfully
Processing 3469 test images...


100%|██████████| 3469/3469 [26:01<00:00,  2.22it/s]  

Predictions completed and saved to prediction_submission.csv



