In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf

In [None]:
# Load data
images = np.load("images.npy")
labels = pd.read_csv("labels.csv")
class_names = labels['Label'].unique()

In [None]:
# Visualize class distribution
plt.figure(figsize=(12, 6))
sns.countplot(x=labels['Label'], order=class_names)
plt.title('Distribution of Plant Seedling Classes')
plt.xlabel('Class')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.savefig('class_distribution.png')
plt.show()

In [None]:
# Visualize sample images (all 12 classes)
plt.figure(figsize=(16, 12))
for i, label in enumerate(class_names):
    idx = labels[labels['Label'] == label].index[0]
    plt.subplot(4, 3, i+1)
    plt.imshow(images[idx])
    plt.title(label)
    plt.axis('off')
plt.savefig('sample_images.png')
plt.show()

In [None]:
# Downsample images to 64x64 and convert to float16
images_resized = tf.image.resize(images, [64, 64]).numpy()
images_resized = images_resized.astype('float16') / 255.0

# Split data
X_train, X_temp, y_train, y_temp = train_test_split(images_resized, labels['Label'], test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = to_categorical(label_encoder.fit_transform(y_train))
y_val_encoded = to_categorical(label_encoder.transform(y_val))
y_test_encoded = to_categorical(label_encoder.transform(y_test))

In [None]:
# Create tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train_encoded)).batch(32).prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val_encoded)).batch(32).prefetch(tf.data.AUTOTUNE)

In [None]:
# Save datasets
np.save('X_train.npy', X_train)
np.save('y_train_encoded.npy', y_train_encoded)
np.save('X_val.npy', X_val)
np.save('y_val_encoded.npy', y_val_encoded)
np.save('X_test.npy', X_test)
np.save('y_test_encoded.npy', y_test_encoded)

In [None]:
# Build and compile model
model = Sequential([
    Input(shape=(64, 64, 3)),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(12, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Generate and save model summary
model.summary()
with open('model_summary.txt', 'w', encoding='utf-8') as f:
    model.summary(print_fn=lambda x: f.write(x + '\n'))
# Take screenshot of model.summary() output from notebook for model_summary.png

In [None]:
# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train model
history = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=50,
                    callbacks=[early_stopping])

In [None]:
# Evaluate model
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
print(f"Test accuracy: {test_accuracy}")

In [None]:
# Generate and save confusion matrix
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test_encoded, axis=1)
cm = confusion_matrix(y_true_classes, y_pred_classes)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig('confusion_matrix.png')
plt.show()

In [None]:
# Generate and save classification report
report = classification_report(y_true_classes, y_pred_classes, target_names=class_names)
with open('classification_report.txt', 'w', encoding='utf-8') as f:
    f.write(report)
print(report)  # Screenshot this output for classification_report.png

In [None]:
# Generate and save loss plot
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training vs. Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig('loss_plot.png')
plt.show()

In [None]:
# Save model
model.save('plant_seedling_cnn.keras')