In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from shutil import copyfile
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount('/content/drive')

In [None]:
base_dir = '/content/drive/MyDrive/FireRisk'
train_dir = os.path.join(base_dir, 'train')

In [None]:
def stratified_split(directory, test_size=0.2, val_size=0.2, seed=42):
    file_paths = []
    labels = []
    class_names = sorted(os.listdir(directory))

    for label_idx, class_name in enumerate(class_names):
        class_dir = os.path.join(directory, class_name)
        class_files = [os.path.join(class_dir, f)
                      for f in os.listdir(class_dir)
                      if f.endswith(('.jpg', '.jpeg', '.png'))]
        file_paths.extend(class_files)
        labels.extend([label_idx] * len(class_files))

    file_paths = np.array(file_paths)
    labels = np.array(labels)

    X = np.array(file_paths)
    y = np.array(labels)

    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X, y, test_size=test_size, stratify=y, random_state=seed)

    val_size_adjusted = val_size / (1 - test_size)  
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=val_size_adjusted,
        stratify=y_train_val, random_state=seed)

    return (X_train, y_train), (X_val, y_val), (X_test, y_test)

In [None]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = stratified_split(train_dir)

In [None]:
def create_dataset(file_paths, labels, class_names, img_size=(150, 150), batch_size=32, shuffle=True):
    labels = tf.keras.utils.to_categorical(labels, num_classes=len(class_names))

    ds = tf.data.Dataset.from_tensor_slices((file_paths, labels))

    def load_and_preprocess_image(path, label):
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, img_size)
        image = image / 255.0 
        return image, label

    if shuffle:
        ds = ds.shuffle(buffer_size=len(file_paths))

    ds = ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)

    return ds

In [None]:
img_size = (150, 150)
batch_size = 32
class_names = sorted(os.listdir(train_dir))

In [None]:
train_ds = create_dataset(X_train, y_train, class_names, img_size, batch_size, shuffle=True)
val_ds = create_dataset(X_val, y_val, class_names, img_size, batch_size, shuffle=False)
test_ds = create_dataset(X_test, y_test, class_names, img_size, batch_size, shuffle=False)

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
])

In [None]:
train_ds = train_ds.map(
    lambda x, y: (data_augmentation(x, training=True), y),
    num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(class_names), activation='softmax')
])

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [None]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True),
    tf.keras.callbacks.TerminateOnNaN()
]

In [None]:
history = model.fit(
    train_ds,
    validation_data=None,
    epochs=10,
    callbacks=callbacks,
    verbose=1)

In [None]:
test_loss, test_acc = model.evaluate(test_ds)
print(f'Test accuracy: {test_acc:.4f}')

In [None]:
import random

test_images = []
test_labels = []
for images, labels in test_ds.take(1):  # Take one batch
    test_images = images.numpy()
    test_labels = labels.numpy()

predictions = model.predict(test_images)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(test_labels, axis=1)

random_indices = random.sample(range(len(test_images)), 25)

plt.figure(figsize=(20, 20))
for i, idx in enumerate(random_indices):
    plt.subplot(5, 5, i+1)
    plt.imshow(test_images[idx])
    plt.title(f"True: {class_names[true_classes[idx]]}\nPred: {class_names[predicted_classes[idx]]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from tqdm import tqdm

all_true = []
all_pred = []

for images, labels in tqdm(test_ds):
    if labels.ndim > 1:
        true_labels = np.argmax(labels.numpy(), axis=1)
    else:
        true_labels = labels.numpy()

    preds = model.predict(images, verbose=0)
    pred_classes = np.argmax(preds, axis=1)

    all_true.extend(true_labels)
    all_pred.extend(pred_classes)

all_true = np.array(all_true)
all_pred = np.array(all_pred)

present_classes = np.unique(np.concatenate([all_true, all_pred]))
print(f"Classes present in evaluation: {present_classes}")

filtered_class_names = [class_names[i] for i in present_classes]

print("\nClassification Report:")
print(classification_report(
    all_true,
    all_pred,
    labels=present_classes,
    target_names=filtered_class_names,
    digits=4
))

conf_mat = confusion_matrix(all_true, all_pred, labels=present_classes)
class_accuracies = conf_mat.diagonal() / conf_mat.sum(axis=1)

print("\nPer-class Accuracy:")
for i, class_idx in enumerate(present_classes):
    print(f"{class_names[class_idx]:<15}: {class_accuracies[i]:.2%} ({conf_mat[i,i]}/{conf_mat.sum(axis=1)[i]})")

plt.figure(figsize=(12, 10))
sns.heatmap(
    conf_mat,
    annot=True,
    fmt='d',
    xticklabels=filtered_class_names,
    yticklabels=filtered_class_names,
    cmap='Blues',
    annot_kws={'size': 8}
)
plt.title('Confusion Matrix', fontsize=14)
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('True', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 6))
bars = plt.bar(filtered_class_names, class_accuracies, color='skyblue')

for bar, acc in zip(bars, class_accuracies):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{acc:.2%}',
             ha='center', va='bottom')

plt.title('Accuracy per Class', fontsize=14)
plt.xlabel('Class', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.ylim(0, 1.1)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

print("\nAdditional Metrics:")
print(f"Overall Accuracy: {np.mean(all_true == all_pred):.2%}")
print(f"Balanced Accuracy: {np.mean(class_accuracies):.2%}")