In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix


In [2]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transform
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

In [8]:
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 32 * 32, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 2)  # Binary classification
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x


In [9]:
# Load trained model
model = CustomCNN().to(device)
model.load_state_dict(torch.load("custom_cnn_model.pth", map_location=device))
model.eval()

# Evaluation function
def evaluate_model_on_split(split_name):
    base_path = f"../artifact_folder/{split_name}"
    labels_df = pd.read_csv(os.path.join(base_path, "labels.csv"))
    images_dir = os.path.join(base_path, "images")

    y_true, y_pred, y_score = [], [], []

    for _, row in labels_df.iterrows():
        img_path = os.path.join(images_dir, row["filename"])
        image = Image.open(img_path).convert("RGB")
        input_tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(input_tensor)
            pred = output.argmax(dim=1).item()
            prob = torch.softmax(output, dim=1)[0][1].item()

        label = 1 if row["label"].lower() == "cracked" else 0
        y_true.append(label)
        y_pred.append(pred)
        y_score.append(prob)

    return {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1-Score": f1_score(y_true, y_pred),
        "AUC-ROC": roc_auc_score(y_true, y_score),
        "Confusion Matrix": confusion_matrix(y_true, y_pred).tolist()
    }

# Run on all sets
for split in ['train', 'val', 'test']:
    print(f"📊 Evaluation for {split.upper()}")
    metrics = evaluate_model_on_split(split)
    for k, v in metrics.items():
        print(f"{k}: {v}")
    print()

📊 Evaluation for TRAIN
Accuracy: 0.9359464140179299
Precision: 0.9973852411388727
Recall: 0.5780434416568446
F1-Score: 0.7319049141882529
AUC-ROC: 0.9528855416413458
Confusion Matrix: [[33316, 9], [2506, 3433]]

📊 Evaluation for VAL
Accuracy: 0.8527279210745276
Precision: 0.5662100456621004
Recall: 0.19121048573631458
F1-Score: 0.2858789625360231
AUC-ROC: 0.6920706668174882
Confusion Matrix: [[6926, 190], [1049, 248]]

📊 Evaluation for TEST
Accuracy: 0.8576351752822341
Precision: 0.5634517766497462
Recall: 0.1778846153846154
F1-Score: 0.27040194884287455
AUC-ROC: 0.6854948942446326
Confusion Matrix: [[6995, 172], [1026, 222]]



In [11]:
class DeeperCNN(nn.Module):
    def __init__(self):
        super(DeeperCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 16 * 16, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 2)  # 2 classes
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [12]:
# Load trained model
model = DeeperCNN().to(device)
model.load_state_dict(torch.load("deeper_cnn_model.pth", map_location=device))
model.eval()

# Evaluation function
def evaluate_model_on_split(split_name):
    base_path = f"../artifact_folder/{split_name}"
    labels_df = pd.read_csv(os.path.join(base_path, "labels.csv"))
    images_dir = os.path.join(base_path, "images")

    y_true, y_pred, y_score = [], [], []

    for _, row in labels_df.iterrows():
        img_path = os.path.join(images_dir, row["filename"])
        image = Image.open(img_path).convert("RGB")
        input_tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(input_tensor)
            pred = output.argmax(dim=1).item()
            prob = torch.softmax(output, dim=1)[0][1].item()

        label = 1 if row["label"].lower() == "cracked" else 0
        y_true.append(label)
        y_pred.append(pred)
        y_score.append(prob)

    return {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1-Score": f1_score(y_true, y_pred),
        "AUC-ROC": roc_auc_score(y_true, y_score),
        "Confusion Matrix": confusion_matrix(y_true, y_pred).tolist()
    }

# Run on all sets
for split in ['train', 'val', 'test']:
    print(f"📊 Evaluation for {split.upper()}")
    metrics = evaluate_model_on_split(split)
    for k, v in metrics.items():
        print(f"{k}: {v}")
    print()

📊 Evaluation for TRAIN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.8487418500407498
Precision: 0.0
Recall: 0.0
F1-Score: 0.0
AUC-ROC: 0.487823583274165
Confusion Matrix: [[33325, 0], [5939, 0]]

📊 Evaluation for VAL


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.8458338285985975
Precision: 0.0
Recall: 0.0
F1-Score: 0.0
AUC-ROC: 0.48868984854138686
Confusion Matrix: [[7116, 0], [1297, 0]]

📊 Evaluation for TEST
Accuracy: 0.8516934046345811
Precision: 0.0
Recall: 0.0
F1-Score: 0.0
AUC-ROC: 0.49176782475233705
Confusion Matrix: [[7167, 0], [1248, 0]]



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
# Load trained model
model = CustomCNN().to(device)
model.load_state_dict(torch.load("custom_cnn_model_updated.pth", map_location=device))
model.eval()

# Evaluation function
def evaluate_model_on_split(split_name):
    base_path = f"../artifact_folder/{split_name}"
    labels_df = pd.read_csv(os.path.join(base_path, "labels.csv"))
    images_dir = os.path.join(base_path, "images")

    y_true, y_pred, y_score = [], [], []

    for _, row in labels_df.iterrows():
        img_path = os.path.join(images_dir, row["filename"])
        image = Image.open(img_path).convert("RGB")
        input_tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(input_tensor)
            pred = output.argmax(dim=1).item()
            prob = torch.softmax(output, dim=1)[0][1].item()

        label = 1 if row["label"].lower() == "cracked" else 0
        y_true.append(label)
        y_pred.append(pred)
        y_score.append(prob)

    return {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1-Score": f1_score(y_true, y_pred),
        "AUC-ROC": roc_auc_score(y_true, y_score),
        "Confusion Matrix": confusion_matrix(y_true, y_pred).tolist()
    }

# Run on all sets
for split in ['train', 'val', 'test']:
    print(f"📊 Evaluation for {split.upper()}")
    metrics = evaluate_model_on_split(split)
    for k, v in metrics.items():
        print(f"{k}: {v}")
    print()

📊 Evaluation for TRAIN


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.8487418500407498
Precision: 0.0
Recall: 0.0
F1-Score: 0.0
AUC-ROC: 0.6957914339672644
Confusion Matrix: [[33325, 0], [5939, 0]]

📊 Evaluation for VAL


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.8458338285985975
Precision: 0.0
Recall: 0.0
F1-Score: 0.0
AUC-ROC: 0.698609733275605
Confusion Matrix: [[7116, 0], [1297, 0]]

📊 Evaluation for TEST
Accuracy: 0.8516934046345811
Precision: 0.0
Recall: 0.0
F1-Score: 0.0
AUC-ROC: 0.6834319870632135
Confusion Matrix: [[7167, 0], [1248, 0]]



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
