In [1]:
# Memory-Efficient Version for Task 2: Variety Classification
import os
import random
import pandas as pd
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, losses, metrics, callbacks, Sequential, Model
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import warnings

# Suppress TensorFlow warnings
warnings.filterwarnings("ignore")

# Enable eager execution
tf.config.run_functions_eagerly(True)
if not tf.executing_eagerly():
    raise RuntimeError("Eager execution is not enabled. Ensure TensorFlow 2.x is installed.")

# Set random seed for reproducibility
random.seed(45)

# Setup Configuration and Constants
batch_size = 32
img_height = 256
img_width = 256
num_epochs = 50
learning_rate = 0.001
weight_decay = 0.0001
image_size = 72
patch_size = 6
num_patches = (image_size // patch_size) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [projection_dim * 2, projection_dim]
transformer_layers = 8
mlp_head_units = [2048, 1024]
n_splits = 4

# Define paths
HOME_PATH = os.getcwd() + "/"
TRAIN_IMG_PATH = HOME_PATH + 'train_images'
TEST_IMG_PATH = HOME_PATH + 'test_images'
META_TRAIN_PATH = HOME_PATH + 'meta_train.csv'
CHECKPOINT_MODEL_PATH = HOME_PATH + 'paddy_models/best_vit_variety_model_fold_{fold}.keras'

# Create models directory
os.makedirs('paddy_models', exist_ok=True)

# Load metadata
print("Loading metadata...")
meta_train = pd.read_csv(META_TRAIN_PATH)
variety_encoder = LabelEncoder()
variety_labels = variety_encoder.fit_transform(meta_train['variety'])
variety_to_idx = {variety: idx for idx, variety in enumerate(variety_encoder.classes_)}
num_varieties = len(variety_encoder.classes_)
print(f"Number of unique varieties: {num_varieties}")
print(f"Varieties: {variety_encoder.classes_}")
joblib.dump(variety_encoder, 'variety_label_encoder.joblib')

# Create DataFrame with file paths and variety labels
def create_file_df(meta_df):
    data = []
    for idx, row in tqdm(meta_df.iterrows(), total=len(meta_df)):
        image_id = row['image_id']
        variety = row['variety']
        label = row['label']
        img_path = os.path.join(TRAIN_IMG_PATH, label, image_id)
        if os.path.exists(img_path):
            data.append({
                'file_path': img_path,
                'variety_label': variety_to_idx[variety],
                'variety_name': variety,
                'image_id': image_id
            })
    return pd.DataFrame(data)

print("Creating file DataFrame...")
file_df = create_file_df(meta_train)

# Image parsing function
def parse_image(file_path, label):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [img_height, img_width])
    img = img / 255.0
    return img, label

# Create dataset from DataFrame
def create_dataset_from_df(df, batch_size=32, is_training=True):
    dataset = tf.data.Dataset.from_tensor_slices((df['file_path'].values, df['variety_label'].values))
    dataset = dataset.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
    if is_training:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

# Data augmentation
data_augmentation = Sequential([
    layers.Resizing(image_size, image_size),
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(factor=0.02),
    layers.RandomZoom(height_factor=0.2, width_factor=0.2),
], name="data_augmentation")

# Normalization layer
normalization = layers.Normalization()
sample_dataset = create_dataset_from_df(file_df).take(5)
normalization.adapt(sample_dataset.map(lambda x, y: x))

# Multilayer perceptron (MLP)
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

# Patch creation layer
class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

# Patch encoding layer
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

# ViT model
def create_vit_variety_classifier():
    inputs = layers.Input(shape=(img_height, img_width, 3))
    normalized = normalization(inputs)
    augmented = data_augmentation(normalized)
    patches = Patches(patch_size)(augmented)
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    for _ in range(transformer_layers):
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        x2 = layers.Add()([attention_output, encoded_patches])
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        encoded_patches = layers.Add()([x3, x2])

    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    logits = layers.Dense(num_varieties, activation='softmax', name='variety_output')(features)
    return Model(inputs=inputs, outputs=logits)

# Train one epoch
def train_one_epoch(model, train_dataset, val_dataset, optimizer, loss_fn, steps_per_epoch, validation_steps, checkpoint_path):
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])
    checkpoint_callback = callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        save_weights_only=False
    )
    history = model.fit(
        train_dataset,
        epochs=1,
        steps_per_epoch=steps_per_epoch,
        validation_data=val_dataset,
        validation_steps=validation_steps,
        callbacks=[checkpoint_callback],
        verbose=1
    )
    return float(history.history['loss'][0]), float(history.history['accuracy'][0]), \
           float(history.history['val_loss'][0]), float(history.history['val_accuracy'][0])

# Evaluate model
def evaluate(model, val_dataset, loss_fn, validation_steps):
    model.compile(loss=loss_fn, metrics=['accuracy'])
    results = model.evaluate(val_dataset, steps=validation_steps, verbose=1)
    return float(results[0]), float(results[1])

# Run Stratified K-Fold training
def run_kfold_training(file_df, n_splits=4, num_epochs=50, batch_size=32):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    loss_fn = losses.SparseCategoricalCrossentropy()
    epoch_results = []
    fold_best_val_acc = {f'fold_{i+1}': 0.0 for i in range(n_splits)}

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        fold_train_losses, fold_train_accs, fold_val_losses, fold_val_accs = [], [], [], []

        for fold, (train_idx, val_idx) in enumerate(skf.split(file_df, file_df['variety_label'])):
            print(f"  Fold {fold + 1}/{n_splits}")

            train_df = file_df.iloc[train_idx].reset_index(drop=True)
            val_df = file_df.iloc[val_idx].reset_index(drop=True)

            train_dataset = create_dataset_from_df(train_df, batch_size=batch_size, is_training=True)
            val_dataset = create_dataset_from_df(val_df, batch_size=batch_size, is_training=False)

            steps_per_epoch = len(train_df) // batch_size
            validation_steps = len(val_df) // batch_size

            model = create_vit_variety_classifier()
            optimizer = optimizers.AdamW(learning_rate=learning_rate, weight_decay=weight_decay)
            checkpoint_path = CHECKPOINT_MODEL_PATH.format(fold=fold + 1)

            train_loss, train_acc, val_loss, val_acc = train_one_epoch(
                model, train_dataset, val_dataset, optimizer, loss_fn, steps_per_epoch, validation_steps, checkpoint_path
            )
            fold_train_losses.append(train_loss)
            fold_train_accs.append(train_acc)
            fold_val_losses.append(val_loss)
            fold_val_accs.append(val_acc)

            print(f"    Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
            print(f"    Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

            # Track best validation accuracy for this fold
            if val_acc > fold_best_val_acc[f'fold_{fold + 1}']:
                fold_best_val_acc[f'fold_{fold + 1}'] = val_acc

        mean_train_loss = np.mean(fold_train_losses)
        mean_train_acc = np.mean(fold_train_accs)
        mean_val_loss = np.mean(fold_val_losses)
        mean_val_acc = np.mean(fold_val_accs)
        print(f"Epoch {epoch + 1} Summary:")
        print(f"  Mean Train Loss: {mean_train_loss:.4f}, Mean Train Acc: {mean_train_acc:.4f}")
        print(f"  Mean Val Loss: {mean_val_loss:.4f}, Mean Val Acc: {mean_val_acc:.4f}")

        epoch_results.append({
            'epoch': epoch + 1,
            'mean_train_loss': mean_train_loss,
            'mean_train_acc': mean_train_acc,
            'mean_val_loss': mean_val_loss,
            'mean_val_acc': mean_val_acc
        })

    print("\nTraining Completed!")
    print("Final Results:")
    for result in epoch_results:
        print(f"Epoch {result['epoch']}: "
              f"Mean Train Loss: {result['mean_train_loss']:.4f}, "
              f"Mean Train Acc: {result['mean_train_acc']:.4f}, "
              f"Mean Val Loss: {result['mean_val_loss']:.4f}, "
              f"Mean Val Acc: {result['mean_val_acc']:.4f}")

    return epoch_results, fold_best_val_acc

# Plot Learning Curves
def plot_training_curves(epoch_results):
    epochs = [r['epoch'] for r in epoch_results]
    train_loss = [r['mean_train_loss'] for r in epoch_results]
    val_loss = [r['mean_val_loss'] for r in epoch_results]
    train_acc = [r['mean_train_acc'] for r in epoch_results]
    val_acc = [r['mean_val_acc'] for r in epoch_results]

    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_loss, label='Mean Train Loss')
    plt.plot(epochs, val_loss, label='Mean Val Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_acc, label='Mean Train Acc')
    plt.plot(epochs, val_acc, label='Mean Val Acc')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Create test dataset
def create_test_dataset(test_path, batch_size=32):
    test_files = [os.path.join(test_path, img) for img in os.listdir(test_path) if img.endswith('.jpg')]
    test_ids = [img for img in os.listdir(test_path) if img.endswith('.jpg')]
    dataset = tf.data.Dataset.from_tensor_slices((test_files, [0] * len(test_files)))
    dataset = dataset.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset, test_ids


    

Loading metadata...
Number of unique varieties: 10
Varieties: ['ADT45' 'AndraPonni' 'AtchayaPonni' 'IR20' 'KarnatakaPonni' 'Onthanel'
 'Ponni' 'RR' 'Surya' 'Zonal']
Creating file DataFrame...


100%|██████████| 10407/10407 [00:00<00:00, 23001.36it/s]


In [None]:
# Main execution
if __name__ == "__main__":
    # Run training
    epoch_results, fold_best_val_acc = run_kfold_training(file_df, n_splits=n_splits, num_epochs=num_epochs, batch_size=batch_size)

    import json
    with open('training_results.json', 'w') as f:
        json.dump({
            'epoch_results': epoch_results,
            'fold_best_val_acc': fold_best_val_acc
        }, f)
    print("Training results saved to 'training_results.json'")


Epoch 1/50
  Fold 1/4

[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m505s[0m 2s/step - accuracy: 0.6018 - loss: 2.2956 - val_accuracy: 0.6447 - val_loss: 1.1874
    Train Loss: 1.5593, Train Acc: 0.6420
    Val Loss: 1.1874, Val Acc: 0.6447
  Fold 2/4
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m307s[0m 1s/step - accuracy: 0.6090 - loss: 2.3096 - val_accuracy: 0.6655 - val_loss: 1.2268
    Train Loss: 1.5715, Train Acc: 0.6493
    Val Loss: 1.2268, Val Acc: 0.6655
  Fold 3/4
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m350s[0m 1s/step - accuracy: 0.6030 - loss: 2.2965 - val_accuracy: 0.6246 - val_loss: 1.3524
    Train Loss: 1.6009, Train Acc: 0.6448
    Val Loss: 1.3524, Val Acc: 0.6246
  Fold 4/4
[1m243/243[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m308s[0m 1s/step - accuracy: 0.6081 - loss: 2.3488 - val_accuracy: 0.6744 - val_loss: 1.4150
    Train Loss: 1.5990, Train Acc: 0.6404
    Val Loss: 1.4150, Val Acc: 0.6744
Epoch 1 Summary

In [None]:
import os
import json
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold

# Check if required variables are defined
if 'epoch_results' not in globals() or 'fold_best_val_acc' not in globals():
    print("Loading training results from file...")
    try:
        with open('training_results.json', 'r') as f:
            data = json.load(f)
            epoch_results = data['epoch_results']
            fold_best_val_acc = data['fold_best_val_acc']
    except FileNotFoundError:
        raise ValueError("Training results not found. Please run the training cell first.")

# Plot learning curves
print("Plotting learning curves...")
plot_training_curves(epoch_results)

# Select the best model
best_fold = max(fold_best_val_acc, key=fold_best_val_acc.get)
best_val_acc = fold_best_val_acc[best_fold]
best_fold_num = int(best_fold.split('_')[1])
best_model_path = CHECKPOINT_MODEL_PATH.format(fold=best_fold_num)
print(f"Best model from {best_fold} with Val Acc: {best_val_acc:.4f}")

# Check if model file exists
if not os.path.exists(best_model_path):
    raise FileNotFoundError(f"Best model file not found at {best_model_path}. Ensure training completed successfully.")

# Load the best model
best_model = tf.keras.models.load_model(best_model_path)

# Create test dataset
print("Creating test dataset...")
test_pred_dataset, test_image_ids = create_test_dataset(TEST_IMG_PATH)

# Generate predictions
print("Generating predictions...")
predictions = best_model.predict(test_pred_dataset)
predicted_variety_indices = np.argmax(predictions, axis=1)
predicted_varieties = variety_encoder.inverse_transform(predicted_variety_indices)

# Create submission dataframe
submission_df = pd.DataFrame({
    'image_id': test_image_ids,
    'variety': predicted_varieties
})
submission_df.to_csv('variety_predictions.csv', index=False)
print("Predictions saved to 'variety_predictions.csv'")

# Create detailed submission file
confidence_df = pd.DataFrame({
    'image_id': test_image_ids,
    'variety': predicted_varieties,
    'confidence': np.max(predictions, axis=1)
})
for i in range(3):
    top_n_indices = np.argsort(predictions, axis=1)[:, -(i+1)]
    confidence_df[f'variety_top_{i+1}'] = variety_encoder.inverse_transform(top_n_indices)
    confidence_df[f'confidence_top_{i+1}'] = np.sort(predictions, axis=1)[:, -(i+1)]
confidence_df.to_csv('variety_predictions_detailed.csv', index=False)
print("Detailed predictions saved to 'variety_predictions_detailed.csv'")

# Evaluate on validation set of the best fold
print("Evaluating best model on validation set...")
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
for fold, (train_idx, val_idx) in enumerate(skf.split(file_df, file_df['variety_label'])):
    if fold + 1 == best_fold_num:
        val_df = file_df.iloc[val_idx].reset_index(drop=True)
        val_dataset = create_dataset_from_df(val_df, batch_size=batch_size, is_training=False)
        break

val_predictions = []
val_true_labels = []
for batch in val_dataset:
    images, labels = batch
    preds = best_model.predict(images, verbose=0)
    val_predictions.extend(np.argmax(preds, axis=1))
    val_true_labels.extend(labels.numpy())

# Create classification report
print("\nClassification Report:")
print(classification_report(val_true_labels, val_predictions, target_names=variety_encoder.classes_))

# Create confusion matrix
cm = confusion_matrix(val_true_labels, val_predictions)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=variety_encoder.classes_,
            yticklabels=variety_encoder.classes_)
plt.title('Confusion Matrix for Variety Classification')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

print("\nTraining and evaluation completed successfully!")