In [7]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import copy
import os
import subprocess

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

from torch.utils.data import DataLoader, Dataset

In [8]:
from utils.datasets import WildfireDataset

transform = transforms.Compose(
    [
        transforms.ToPILImage(),
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

train_dataset = WildfireDataset(
    "/data/amathur-23/ROB313", split="train", labeled=False, transforms=transform
)
data_train_labeled = WildfireDataset(
    "/data/amathur-23/ROB313", split="train", labeled=True, transforms=transform
)
val_dataset = WildfireDataset(
    "/data/amathur-23/ROB313", split="val", transforms=transform
)
test_dataset = WildfireDataset(
    "/data/amathur-23/ROB313", split="test", transforms=transform
)

Loading meta file: /data/amathur-23/ROB313/train_unlabeled.csv
Loading meta file: /data/amathur-23/ROB313/train.csv
Loading meta file: /data/amathur-23/ROB313/val.csv
Loading meta file: /data/amathur-23/ROB313/test.csv


In [9]:
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_loader_labeled = DataLoader(
    data_train_labeled, batch_size=batch_size, shuffle=True
)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [14]:
class ConvVAE(nn.Module):
    def __init__(self, latent_dim=128):
        super(ConvVAE, self).__init__()

        # Encoder
        # 3x224x224
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, stride=2, padding=1),  # 224 -> 112
            nn.ReLU(),
            nn.Conv2d(32, 64, 4, stride=2, padding=1),  # 112 -> 56
            nn.ReLU(),
            nn.Conv2d(64, 128, 4, stride=2, padding=1),  # 56 -> 28
            nn.ReLU(),
            nn.Conv2d(128, 256, 4, stride=2, padding=1),  # 28 -> 14
            nn.ReLU(),
            nn.Conv2d(256, 256, 4, stride=2, padding=1),  # 14 -> 7
            nn.ReLU(),
        )

        self.encoder_output_dim = 256 * 7 * 7
        self.fc_mu = nn.Linear(self.encoder_output_dim, latent_dim)
        self.fc_var = nn.Linear(self.encoder_output_dim, latent_dim)

        # Decoder
        self.decoder_input = nn.Sequential(
            nn.Linear(latent_dim, 256), nn.Linear(256, self.encoder_output_dim)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 256, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 4, stride=2, padding=1),
            nn.Sigmoid(),
        )

    def encode(self, x):
        batch_size = x.size(0)
        x = self.encoder(x)
        x = x.view(batch_size, -1)
        mu = self.fc_mu(x)
        log_var = self.fc_var(x)
        return mu, log_var

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        x = self.decoder_input(z)
        x = x.view(x.size(0), 256, 7, 7)
        x = self.decoder(x)
        return x

    def forward(self, x):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        z = self.decode(z)
        return z, mu, log_var

In [15]:
class BetaVAELoss(nn.Module):
    def __init__(self, beta=1):
        super(BetaVAELoss, self).__init__()
        self.beta = beta

    def forward(self, x, recon_x, mu, logvar):
        recon_loss = F.mse_loss(recon_x, x, reduction="sum")
        kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        return recon_loss + self.beta * kl_loss


criterion_vae = BetaVAELoss(beta=1)

In [12]:
from tqdm import tqdm


def train(model, dataloader, optimizer, device, epoch):
    model.train()
    total_loss = 0
    for batch in tqdm(dataloader, f"Training {epoch}"):
        data = batch["image"].to(device)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(data)
        loss = criterion_vae(data, recon_batch, mu, logvar)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader.dataset)


# Validation Function
def validate(model, dataloader, device, epoch):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in tqdm(dataloader, f"Validation {epoch}"):
            data = batch["image"].to(device)
            recon_batch, mu, logvar = model(data)
            loss = criterion_vae(data, recon_batch, mu, logvar)
            total_loss += loss.item()
    return total_loss / len(dataloader.dataset)

In [13]:
latent_dim = 256
learning_rate = 1e-5
num_epochs = 50

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConvVAE(latent_dim=latent_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_losses = []
val_losses = []
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, device, epoch)
    print(f"Epoch {epoch} Train loss: {train_loss}")
    val_loss = validate(model, val_loader, device, epoch)
    print(f"Epoch {epoch} Validation loss: {val_loss}")

Training 0: 100%|██████████| 473/473 [02:28<00:00,  3.17it/s]


Epoch 0 Train loss: 199841.6421038712


Validation 0: 100%|██████████| 20/20 [00:05<00:00,  3.56it/s]


Epoch 0 Validation loss: 153660.54285714286


Training 1: 100%|██████████| 473/473 [02:36<00:00,  3.03it/s]


Epoch 1 Train loss: 154291.60813580613


Validation 1: 100%|██████████| 20/20 [00:06<00:00,  3.24it/s]


Epoch 1 Validation loss: 151203.84603174604


Training 2: 100%|██████████| 473/473 [02:57<00:00,  2.66it/s]


Epoch 2 Train loss: 152957.9131376244


Validation 2: 100%|██████████| 20/20 [00:06<00:00,  3.17it/s]


Epoch 2 Validation loss: 150539.5003968254


Training 3: 100%|██████████| 473/473 [02:38<00:00,  2.99it/s]


Epoch 3 Train loss: 152242.55656385334


Validation 3: 100%|██████████| 20/20 [00:05<00:00,  3.49it/s]


Epoch 3 Validation loss: 149682.90158730157


Training 4: 100%|██████████| 473/473 [02:34<00:00,  3.06it/s]


Epoch 4 Train loss: 151644.26833944925


Validation 4: 100%|██████████| 20/20 [00:05<00:00,  3.55it/s]


Epoch 4 Validation loss: 149307.65238095238


Training 5: 100%|██████████| 473/473 [02:39<00:00,  2.96it/s]


Epoch 5 Train loss: 151387.5874574366


Validation 5: 100%|██████████| 20/20 [00:06<00:00,  3.19it/s]


Epoch 5 Validation loss: 149059.70793650794


Training 6: 100%|██████████| 473/473 [02:36<00:00,  3.02it/s]


Epoch 6 Train loss: 151188.0321002347


Validation 6: 100%|██████████| 20/20 [00:05<00:00,  3.40it/s]


Epoch 6 Validation loss: 148958.84761904762


Training 7: 100%|██████████| 473/473 [02:31<00:00,  3.11it/s]


Epoch 7 Train loss: 151062.92650996728


Validation 7: 100%|██████████| 20/20 [00:06<00:00,  3.12it/s]


Epoch 7 Validation loss: 148793.41666666666


Training 8: 100%|██████████| 473/473 [02:27<00:00,  3.21it/s]


Epoch 8 Train loss: 150954.85510264803


Validation 8: 100%|██████████| 20/20 [00:05<00:00,  3.71it/s]


Epoch 8 Validation loss: 148759.80992063493


Training 9: 100%|██████████| 473/473 [02:25<00:00,  3.26it/s]


Epoch 9 Train loss: 150817.16124499982


Validation 9: 100%|██████████| 20/20 [00:05<00:00,  3.72it/s]


Epoch 9 Validation loss: 148514.39166666666


Training 10: 100%|██████████| 473/473 [02:26<00:00,  3.24it/s]


Epoch 10 Train loss: 150678.44024595854


Validation 10: 100%|██████████| 20/20 [00:05<00:00,  3.84it/s]


Epoch 10 Validation loss: 148379.70793650794


Training 11: 100%|██████████| 473/473 [02:31<00:00,  3.13it/s]


Epoch 11 Train loss: 150555.93851036398


Validation 11: 100%|██████████| 20/20 [00:06<00:00,  3.21it/s]


Epoch 11 Validation loss: 148234.16666666666


Training 12: 100%|██████████| 473/473 [02:24<00:00,  3.28it/s]


Epoch 12 Train loss: 150443.04484445768


Validation 12: 100%|██████████| 20/20 [00:05<00:00,  3.71it/s]


Epoch 12 Validation loss: 148140.58253968254


Training 13: 100%|██████████| 473/473 [02:32<00:00,  3.10it/s]


Epoch 13 Train loss: 150333.51704188567


Validation 13: 100%|██████████| 20/20 [00:06<00:00,  3.33it/s]


Epoch 13 Validation loss: 147996.75952380954


Training 14: 100%|██████████| 473/473 [02:27<00:00,  3.20it/s]


Epoch 14 Train loss: 150205.47771827166


Validation 14: 100%|██████████| 20/20 [00:05<00:00,  3.70it/s]


Epoch 14 Validation loss: 147903.85119047618


Training 15: 100%|██████████| 473/473 [02:30<00:00,  3.13it/s]


Epoch 15 Train loss: 150094.91133591192


Validation 15: 100%|██████████| 20/20 [00:06<00:00,  3.31it/s]


Epoch 15 Validation loss: 147768.42817460318


Training 16: 100%|██████████| 473/473 [02:25<00:00,  3.24it/s]


Epoch 16 Train loss: 149969.88313663262


Validation 16: 100%|██████████| 20/20 [00:05<00:00,  3.57it/s]


Epoch 16 Validation loss: 147619.53492063493


Training 17: 100%|██████████| 473/473 [02:41<00:00,  2.93it/s]


Epoch 17 Train loss: 149840.20149426427


Validation 17: 100%|██████████| 20/20 [00:05<00:00,  3.42it/s]


Epoch 17 Validation loss: 147499.76785714287


Training 18: 100%|██████████| 473/473 [02:26<00:00,  3.23it/s]


Epoch 18 Train loss: 149699.33006049786


Validation 18: 100%|██████████| 20/20 [00:05<00:00,  3.61it/s]


Epoch 18 Validation loss: 147362.8123015873


Training 19: 100%|██████████| 473/473 [02:34<00:00,  3.06it/s]


Epoch 19 Train loss: 149549.47411484676


Validation 19: 100%|██████████| 20/20 [00:05<00:00,  3.72it/s]


Epoch 19 Validation loss: 147223.00793650793


Training 20: 100%|██████████| 473/473 [02:17<00:00,  3.43it/s]


Epoch 20 Train loss: 149302.38459122615


Validation 20: 100%|██████████| 20/20 [00:05<00:00,  3.76it/s]


Epoch 20 Validation loss: 146918.6634920635


Training 21: 100%|██████████| 473/473 [02:17<00:00,  3.43it/s]


Epoch 21 Train loss: 148964.05542331978


Validation 21: 100%|██████████| 20/20 [00:05<00:00,  3.50it/s]


Epoch 21 Validation loss: 146779.73015873015


Training 22:  87%|████████▋ | 412/473 [02:12<00:19,  3.11it/s]


KeyboardInterrupt: 

In [None]:
from sklearn.cluster import KMeans, DBSCAN
from sklearn.mixture import GaussianMixture


def perform_clustering(features, method="kmeans", num_clusters=5):
    if method == "kmeans":
        clustering = KMeans(n_clusters=num_clusters, random_state=42).fit(features)
    elif method == "gmm":
        clustering = GaussianMixture(n_components=num_clusters, random_state=42).fit(
            features
        )
    elif method == "dbscan":
        clustering = DBSCAN(eps=0.5, min_samples=5).fit(features)
    else:
        raise ValueError("Unsupported clustering method")
    return clustering.labels_


labels = perform_clustering(labelled_features, method="kmeans", num_clusters=2)

In [16]:
class Classifier(nn.Module):
    def __init__(self, vae, device, input_dim=128, dropout=0.3):
        super(Classifier, self).__init__()
        self.vae = vae.to(device)
        self.vae.eval()
        self.device = device
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        ).to(device)

    def forward(self, x):
        with torch.no_grad():
            x = x["image"].to(self.device)
            mu, _ = self.vae.encode(x)
        return self.fc(mu)

In [17]:
from sklearn.metrics import f1_score


def train_classifier(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for batch in train_loader:
        target = batch["label"].float().to(device)

        optimizer.zero_grad()
        output = model(batch).squeeze()

        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predicted = (output > 0.5).float()
        correct += (predicted == target).sum().item()
        total += target.size(0)

    accuracy = 100.0 * correct / total
    return total_loss / len(train_loader), accuracy


def validate_classifier(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for batch in val_loader:
            target = batch["label"].float().to(device)
            output = model(batch).squeeze()
            loss = criterion(output, target)

            total_loss += loss.item()
            predicted = (output > 0.5).float()
            correct += (predicted == target).sum().item()
            total += target.size(0)
            all_preds.append(predicted.cpu().numpy())
            all_targets.append(target.cpu().numpy())

        f1 = f1_score(np.concatenate(all_targets), np.concatenate(all_preds))
        print(f"Validation Loss: {total_loss / len(val_loader)}")
        print(f"Validation Accuracy: {100. * correct / total}")
        print(f"Validation F1 Score: {f1}")
        return total_loss / len(val_loader)

In [18]:
model = ConvVAE(latent_dim=256).to(device)
model.load_state_dict(
    torch.load("/data/iivanova-23/ROB313/models/vae_trial/vae_model.pth")
)
classifier = Classifier(model, device, input_dim=256)
optimizer_classifier = optim.Adam(classifier.parameters(), lr=1e-4)
criterion_classifier = nn.BCELoss()
num_epochs = 30
for epoch in range(num_epochs):
    train_loss, train_accuracy = train_classifier(
        classifier,
        train_loader_labeled,
        optimizer_classifier,
        criterion_classifier,
        device,
    )
    print(f"Epoch {epoch} Train loss: {train_loss}, Train accuracy: {train_accuracy}")

Epoch 0 Train loss: 0.5778568349307096, Train accuracy: 75.91269841269842
Epoch 1 Train loss: 0.40844977807395066, Train accuracy: 83.84920634920636
Epoch 2 Train loss: 0.37917295815069463, Train accuracy: 84.04761904761905
Epoch 3 Train loss: 0.37467862684515457, Train accuracy: 84.22619047619048
Epoch 4 Train loss: 0.3694338585379757, Train accuracy: 84.5436507936508
Epoch 5 Train loss: 0.36611312175098853, Train accuracy: 84.3452380952381
Epoch 6 Train loss: 0.3631080530489547, Train accuracy: 84.94047619047619
Epoch 7 Train loss: 0.35766323401203637, Train accuracy: 85.15873015873017
Epoch 8 Train loss: 0.35432506135747405, Train accuracy: 85.53571428571429
Epoch 9 Train loss: 0.35469776316534113, Train accuracy: 85.43650793650794
Epoch 10 Train loss: 0.35036925160432164, Train accuracy: 85.91269841269842
Epoch 11 Train loss: 0.35057720655127417, Train accuracy: 85.63492063492063
Epoch 12 Train loss: 0.34689384491383274, Train accuracy: 86.03174603174604
Epoch 13 Train loss: 0.3432

In [20]:
validate_classifier(classifier, val_loader, nn.BCELoss(), device)

Validation Loss: 0.3031256183981895
Validation Accuracy: 88.41269841269842
Validation F1 Score: 0.896011396011396


0.3031256183981895

In [19]:
validate_classifier(classifier, test_loader, nn.BCELoss(), device)

Validation Loss: 0.33543640615964176
Validation Accuracy: 86.01365296078743
Validation F1 Score: 0.8778932778932779


0.33543640615964176