# Module 2: Comparative Analysis of Keras and PyTorch Models

**Answers**  
- `preds > 0.5` thresholds probabilities at 0.5 to produce class labels.  
- **F1 score** balances precision and recall, useful under class imbalance.

In [None]:
import os, glob, numpy as np, matplotlib.pyplot as plt
from PIL import Image, ImageDraw

DATASET_DIR = "./images_dataSAT"
DIR_NON_AGRI = os.path.join(DATASET_DIR, "class_0_non_agri")
DIR_AGRI = os.path.join(DATASET_DIR, "class_1_agri")

def _ensure_dataset():
    os.makedirs(DIR_NON_AGRI, exist_ok=True)
    os.makedirs(DIR_AGRI, exist_ok=True)
    if len(os.listdir(DIR_NON_AGRI))>0 and len(os.listdir(DIR_AGRI))>0:
        return
    import numpy as np
    from PIL import Image, ImageDraw
    rng = np.random.default_rng(0)
    for cls_dir, pattern in [(DIR_NON_AGRI, 'rect'), (DIR_AGRI, 'lines')]:
        for i in range(12):
            img = Image.new("RGB",(64,64),(rng.integers(20,235),rng.integers(20,235),rng.integers(20,235)))
            d = ImageDraw.Draw(img)
            if pattern=='rect':
                d.rectangle([10,10,54,54], outline=(255,255,255), width=2)
            else:
                for y in range(5,64,10):
                    d.line([0,y,64,y], fill=(255,255,255), width=1)
            img.save(os.path.join(cls_dir, f"img_{{i:03d}}.png"))

# Copy dataset from /mnt/data if available
if os.path.exists('/mnt/data/images_dataSAT'):
    import shutil
    if not os.path.exists(DATASET_DIR):
        shutil.copytree('/mnt/data/images_dataSAT', DATASET_DIR)
_ensure_dataset()
print("Dataset ready at", os.path.abspath(DATASET_DIR))

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras import layers, models
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

def print_metrics(y_true, y_prob):
    y_pred = (y_prob > 0.5).astype(int)
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, zero_division=0))
    print("Recall:", recall_score(y_true, y_pred, zero_division=0))
    print("F1:", f1_score(y_true, y_pred, zero_division=0))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))

# Keras model + metrics
keras_ds_train = tf.keras.utils.image_dataset_from_directory(DATASET_DIR, validation_split=0.3, subset='training', seed=123, image_size=(64,64), batch_size=16)
keras_ds_val = tf.keras.utils.image_dataset_from_directory(DATASET_DIR, validation_split=0.3, subset='validation', seed=123, image_size=(64,64), batch_size=16)
keras_model = models.Sequential([layers.Rescaling(1./255, input_shape=(64,64,3)), layers.Conv2D(16,3,activation='relu'), layers.MaxPooling2D(), layers.Flatten(), layers.Dense(1, activation='sigmoid')])
keras_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
keras_model.fit(keras_ds_train, validation_data=keras_ds_val, epochs=2, verbose=0)
y_probs_k, y_true_k = [], []
for X, y in keras_ds_val:
    y_probs_k.extend(keras_model.predict(X, verbose=0).ravel())
    y_true_k.extend(y.numpy().ravel())
print("Keras metrics:")
print_metrics(np.array(y_true_k), np.array(y_probs_k))

# PyTorch model + metrics
transform = transforms.Compose([transforms.Resize((64,64)), transforms.ToTensor()])
full = datasets.ImageFolder(DATASET_DIR, transform=transform)
n_val = int(0.3*len(full)); n_train = len(full)-n_val
train_subset, val_subset = torch.utils.data.random_split(full, [n_train, n_val], generator=torch.Generator().manual_seed(123))
train_loader = DataLoader(train_subset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=16, shuffle=False)
class Tiny(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Conv2d(3,16,3,padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(16,1))
    def forward(self,x): return self.net(x).squeeze(1)
device = "cuda" if torch.cuda.is_available() else "cpu"
m = Tiny().to(device); opt = torch.optim.Adam(m.parameters(), 1e-3); crit = nn.BCEWithLogitsLoss()
for _ in range(2):
    m.train()
    for X,y in train_loader:
        X, y = X.to(device), y.float().to(device)
        opt.zero_grad(); out = m(X); loss = crit(out, y); loss.backward(); opt.step()
m.eval(); probs=[]; trues=[]
with torch.no_grad():
    for X,y in val_loader:
        X = X.to(device); out = m(X)
        probs.extend(torch.sigmoid(out).cpu().numpy().ravel()); trues.extend(y.numpy().ravel())
print("\nPyTorch metrics:")
print_metrics(np.array(trues), np.array(probs))
cm = confusion_matrix(np.array(trues), (np.array(probs)>0.5).astype(int))
print("Total false negatives (PyTorch):", cm[1,0])