In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import copy
import os
import subprocess

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

from torch.utils.data import DataLoader, Dataset

In [2]:
from utils.datasets import WildfireDataset

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = WildfireDataset('/data/amathur-23/ROB313', split='train', labeled=False, transforms=transform)
data_train_labeled = WildfireDataset('/data/amathur-23/ROB313', split='train', labeled=True, transforms=transform)
val_dataset = WildfireDataset('/data/amathur-23/ROB313', split='val', transforms=transform)
test_dataset = WildfireDataset('/data/amathur-23/ROB313', split='test', transforms=transform)

Loading meta file: /data/amathur-23/ROB313/train_unlabeled.csv
Loading meta file: /data/amathur-23/ROB313/train.csv
Loading meta file: /data/amathur-23/ROB313/val.csv
Loading meta file: /data/amathur-23/ROB313/test.csv


In [3]:

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
train_loader_labeled = DataLoader(data_train_labeled, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [None]:
from torchvision import models

class ResNet_VAE(nn.Module):
    def __init__(self, fc_hidden = [1024, 768, 512], drop_p=0.3, CNN_embed_dim=256):
        super(ResNet_VAE, self).__init__()

        self.fc_hidden_dims = fc_hidden
        # CNN architechtures
        self.ch1, self.ch2, self.ch3, self.ch4 = 16, 32, 64, 128
        self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)      # 2d kernal size
        self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)      # 2d strides
        self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)  # 2d padding

        # encoding components
        resnet = models.resnet152(pretrained=True)
        modules = list(resnet.children())[:-1]      # delete the last fc layer.
        self.resnet = nn.Sequential(*modules)
        self.encoder = nn.Sequential(
            nn.Linear(resnet.fc.in_features, self.fc_hidden1),
            nn.BatchNorm1d(self.fc_hidden1, momentum=0.01),
            nn.ReLU(inplace=True),
            nn.Linear(self.fc_hidden1, self.fc_hidden2),
            nn.BatchNorm1d(self.fc_hidden2, momentum=0.01),
            nn.ReLU(inplace=True),
            
        )
        self.fc1 = nn.Linear(resnet.fc.in_features, self.fc_hidden1)
        self.bn1 = nn.BatchNorm1d(self.fc_hidden1, momentum=0.01)
        self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
        self.bn2 = nn.BatchNorm1d(self.fc_hidden2, momentum=0.01)
        # Latent vectors mu and sigma
        self.fc3_mu = nn.Linear(self.fc_hidden2, self.CNN_embed_dim)      # output = CNN embedding latent variables
        self.fc3_logvar = nn.Linear(self.fc_hidden2, self.CNN_embed_dim)  # output = CNN embedding latent variables

        # Sampling vector
        self.fc4 = nn.Linear(self.CNN_embed_dim, self.fc_hidden2)
        self.fc_bn4 = nn.BatchNorm1d(self.fc_hidden2)
        self.fc5 = nn.Linear(self.fc_hidden2, 64 * 4 * 4)
        self.fc_bn5 = nn.BatchNorm1d(64 * 4 * 4)
        self.relu = nn.ReLU(inplace=True)

        # Decoder
        self.convTrans6 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=self.k4, stride=self.s4,
                               padding=self.pd4),
            nn.BatchNorm2d(32, momentum=0.01),
            nn.ReLU(inplace=True),
        )
        self.convTrans7 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=32, out_channels=8, kernel_size=self.k3, stride=self.s3,
                               padding=self.pd3),
            nn.BatchNorm2d(8, momentum=0.01),
            nn.ReLU(inplace=True),
        )

        self.convTrans8 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=8, out_channels=3, kernel_size=self.k2, stride=self.s2,
                               padding=self.pd2),
            nn.BatchNorm2d(3, momentum=0.01),
            nn.Sigmoid()    # y = (y1, y2, y3) \in [0 ,1]^3
        )


    def encode(self, x):
        x = self.resnet(x)  # ResNet
        x = x.view(x.size(0), -1)  # flatten output of conv

        # FC layers
        x = self.bn1(self.fc1(x))
        x = self.relu(x)
        x = self.bn2(self.fc2(x))
        x = self.relu(x)
        # x = F.dropout(x, p=self.drop_p, training=self.training)
        mu, logvar = self.fc3_mu(x), self.fc3_logvar(x)
        return mu, logvar

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        x = self.relu(self.fc_bn4(self.fc4(z)))
        x = self.relu(self.fc_bn5(self.fc5(x))).view(-1, 64, 4, 4)
        x = self.convTrans6(x)
        x = self.convTrans7(x)
        x = self.convTrans8(x)
        x = F.interpolate(x, size=(224, 224), mode='bilinear')
        return x

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        x_reconst = self.decode(z)

        return x_reconst, mu, logvar

In [23]:
class VectorQuantizer(nn.Module):
    """
    Reference:
    https://github.com/AntixK/PyTorch-VAE/blob/master/models/vq_vae.py
    """
    def __init__(self, num_embeddings: int, embedding_dim: int, beta: float = 0.25):
        super(VectorQuantizer, self).__init__()
        self.K = num_embeddings
        self.D = embedding_dim
        self.beta = beta
        
        self.embedding = nn.Embedding(self.K, self.D)
        self.embedding.weight.data.uniform_(-1 / self.K, 1 / self.K)
        
    def forward(self, latents: torch.Tensor) -> torch.Tensor:
        latents = latents.permute(0, 2, 3, 1).contiguous()  # [B x D x H x W] -> [B x H x W x D]
        latents_shape = latents.shape
        flat_latents = latents.view(-1, self.D)  # [BHW x D]

        # Compute L2 distance between latents and embedding weights
        dist = torch.sum(flat_latents ** 2, dim=1, keepdim=True) + \
               torch.sum(self.embedding.weight ** 2, dim=1) - \
               2 * torch.matmul(flat_latents, self.embedding.weight.t())  # [BHW x K]

        # Get the encoding that has the min distance
        encoding_inds = torch.argmin(dist, dim=1).unsqueeze(1)  # [BHW, 1]

        # Convert to one-hot encodings
        device = latents.device
        encoding_one_hot = torch.zeros(encoding_inds.size(0), self.K, device=device)
        encoding_one_hot.scatter_(1, encoding_inds, 1)  # [BHW x K]

        # Quantize the latents
        quantized_latents = torch.matmul(encoding_one_hot, self.embedding.weight)  # [BHW, D]
        quantized_latents = quantized_latents.view(latents_shape)  # [B x H x W x D]

        # Compute the VQ Losses
        commitment_loss = F.mse_loss(quantized_latents.detach(), latents)
        embedding_loss = F.mse_loss(quantized_latents, latents.detach())

        vq_loss = commitment_loss * self.beta + embedding_loss

        # Add the residue back to the latents
        quantized_latents = latents + (quantized_latents - latents).detach()

        return quantized_latents.permute(0, 3, 1, 2).contiguous(), vq_loss  # [B x D x H x W]

In [25]:
class ResNet_VQVAE(nn.Module):
    def __init__(self, embedding_dim=256, num_embeddings=128, beta=0.25):
        super(ResNet_VQVAE, self).__init__()

        self.embedding_dim, self.num_embeddings, self.beta = embedding_dim, num_embeddings, beta

        # CNN architechtures
        self.ch1, self.ch2, self.ch3, self.ch4 = 16, 32, 64, 128
        self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)      # 2d kernal size
        self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)      # 2d strides
        self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)  # 2d padding

        # encoding components
        resnet = models.resnet101(pretrained=True)
        modules = list(resnet.children())[:-2]      # delete the last fc and avgpool layers.
        self.resnet = nn.Sequential(*modules)
        
        self.conv1 = nn.Conv2d(resnet.fc.in_features, self.embedding_dim, kernel_size=1, stride=1)
        #self.fc1 = nn.Linear(resnet.fc.in_features, self.fc_hidden1)
        #self.bn1 = nn.BatchNorm1d(self.fc_hidden1, momentum=0.01)
        #self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
        #self.bn2 = nn.BatchNorm1d(self.fc_hidden2, momentum=0.01)
        # Latent vectors mu and sigma
        #self.fc3_mu = nn.Linear(self.fc_hidden2, self.CNN_embed_dim)      # output = CNN embedding latent variables
        #self.fc3_logvar = nn.Linear(self.fc_hidden2, self.CNN_embed_dim)  # output = CNN embedding latent variables
        
        self.vq_layer = VectorQuantizer(self.num_embeddings, self.embedding_dim, self.beta)

        # Sampling vector
        #self.fc4 = nn.Linear(self.CNN_embed_dim, self.fc_hidden2)
        #self.fc_bn4 = nn.BatchNorm1d(self.fc_hidden2)
        #self.fc5 = nn.Linear(self.fc_hidden2, 64 * 4 * 4)
        #self.fc_bn5 = nn.BatchNorm1d(64 * 4 * 4)
        #self.relu = nn.ReLU(inplace=True)

        # Decoder
        self.conv2 = nn.Conv2d(self.embedding_dim, 64, kernel_size=3, stride=1, padding=1)
        
        self.convTrans6 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=self.k4, stride=self.s4,
                               padding=self.pd4),
            nn.BatchNorm2d(32, momentum=0.01),
            nn.ReLU(inplace=True),
        )
        self.convTrans7 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=32, out_channels=8, kernel_size=self.k3, stride=self.s3,
                               padding=self.pd3),
            nn.BatchNorm2d(8, momentum=0.01),
            nn.ReLU(inplace=True),
        )

        self.convTrans8 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=8, out_channels=3, kernel_size=self.k2, stride=self.s2,
                               padding=self.pd2),
            nn.BatchNorm2d(3, momentum=0.01),
            nn.Sigmoid()    # y = (y1, y2, y3) \in [0 ,1]^3
        )


    def encode(self, x):
        x = self.resnet(x)  # ResNet
        x = self.conv1(x)
        x = F.relu(x)
        #x = x.view(x.size(0), -1)  # flatten output of conv

        # FC layers
        #x = self.bn1(self.fc1(x))
        #x = self.relu(x)
        #x = self.bn2(self.fc2(x))
        #x = self.relu(x)
        # x = F.dropout(x, p=self.drop_p, training=self.training)
        #mu, logvar = self.fc3_mu(x), self.fc3_logvar(x)
        return x


    def decode(self, z):
        x = self.conv2(z)
        #x = self.relu(self.fc_bn4(self.fc4(z)))
        #x = self.relu(self.fc_bn5(self.fc5(x))).view(-1, 64, 4, 4)
        x = self.convTrans6(x)
        x = self.convTrans7(x)
        x = self.convTrans8(x)
        x = F.interpolate(x, size=(224, 224), mode='bilinear')
        return x

    def forward(self, x):
        latents = self.encode(x)
        quantized_inputs, vq_loss = self.vq_layer(latents)
        #z = self.reparameterize(mu, logvar)
        x_reconst = self.decode(quantized_inputs)

        return x_reconst, quantized_inputs, vq_loss
    
    def loss_function(self, *args, **kwargs) -> dict:
        reconst = args[0]
        original = args[1]
        vq_loss = args[2]
        
        recon_loss = F.mse_loss(reconst, original)
        loss = recon_loss + vq_loss
        
        return loss
    
    def generate(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x)[0]

In [None]:
class ConvVAE(nn.Module):
    def __init__(self, latent_dim=128):
        super(ConvVAE, self).__init__()
        
        # Encoder
        # 3x224x224
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, stride=2, padding=1), # 224 -> 112
            nn.ReLU(),
            nn.Conv2d(32, 64, 4, stride=2, padding=1), # 112 -> 56
            nn.ReLU(),
            nn.Conv2d(64, 128, 4, stride=2, padding=1), # 56 -> 28
            nn.ReLU(),
            nn.Conv2d(128, 256, 4, stride=2, padding=1),  # 28 -> 14
            nn.ReLU(),
            nn.Conv2d(256, 256, 4, stride=2, padding=1),  # 14 -> 7
            nn.ReLU()
        )
        
        self.encoder_output_dim = (256 * 7 * 7)
        self.fc_mu = nn.Linear(self.encoder_output_dim, latent_dim)
        self.fc_var = nn.Linear(self.encoder_output_dim, latent_dim)
        
        # Decoder
        self.decoder_input = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.Linear(256, self.encoder_output_dim)
        ) 
        
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 256, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 4, stride=2, padding=1),
            nn.Sigmoid()
        )
        
    def encode(self, x):
        batch_size = x.size(0)
        x = self.encoder(x)
        x = x.view(batch_size,-1)
        mu = self.fc_mu(x)
        log_var = self.fc_var(x)
        return mu, log_var
    
    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def decode(self, z):
        x = self.decoder_input(z)
        x = x.view(x.size(0), 256, 7, 7) 
        x = self.decoder(x)
        return x
    
    def forward(self, x):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        z = self.decode(z)
        return z, mu, log_var

In [9]:
class BetaVAELoss(nn.Module):
    def __init__(self, beta=1):
        super(BetaVAELoss, self).__init__()
        self.beta = beta
        
    def forward(self, x, recon_x, mu, logvar):
        recon_loss = F.mse_loss(recon_x, x, reduction='sum')
        kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        return recon_loss + self.beta * kl_loss
    
criterion_vae = BetaVAELoss(beta=1)

In [33]:
from tqdm import tqdm
from torch.amp import autocast
def train(model, dataloader, optimizer, device, epoch):
    model.train()
    total_loss = 0
    for batch in tqdm(dataloader, f"Training {epoch}"):
        data = batch['image'].to(device)
        optimizer.zero_grad()
        with autocast(
            device_type="cuda", dtype=torch.float16
        ):
            x_reconst, quantized_inputs, vq_loss = model(data)
            loss = model.loss_function(x_reconst, data, vq_loss)
        # loss = criterion_vae(data, recon_batch, mu, logvar)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader.dataset)

# Validation Function
def validate(model, dataloader, device, epoch):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in tqdm(dataloader, f"Validation {epoch}"):
            data = batch['image'].to(device)
            # recon_batch, mu, logvar = model(data)
            # loss = criterion_vae(data, recon_batch, mu, logvar)
            with autocast(
                device_type="cuda", dtype=torch.float16
            ):
                x_reconst, quantized_inputs, vq_loss = model(data)
                loss = model.loss_function(x_reconst, data, vq_loss)
            total_loss += loss.item()
    return total_loss / len(dataloader.dataset)

In [3]:
import os 
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [35]:
latent_dim = 256
learning_rate = 1e-5
num_epochs = 30

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNet_VQVAE().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_losses = []
val_losses = []
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, device, epoch)
    print(f"Epoch {epoch} Train loss: {train_loss}")
    # val_loss = validate(model, val_loader, device, epoch)
    # print(f"Epoch {epoch} Validation loss: {val_loss}")

Training 0: 100%|██████████| 946/946 [01:31<00:00, 10.39it/s]


Epoch 0 Train loss: 0.8072221898028443


Training 1: 100%|██████████| 946/946 [01:28<00:00, 10.74it/s]


Epoch 1 Train loss: 1.8476987839564831


Training 2: 100%|██████████| 946/946 [01:26<00:00, 10.94it/s]


Epoch 2 Train loss: 3.6075933715513613


Training 3: 100%|██████████| 946/946 [01:25<00:00, 11.10it/s]


Epoch 3 Train loss: 3.387631249193704


Training 4: 100%|██████████| 946/946 [01:26<00:00, 10.96it/s]


Epoch 4 Train loss: 5.583571244131069


Training 5: 100%|██████████| 946/946 [01:26<00:00, 10.93it/s]


Epoch 5 Train loss: 9.04528380375097


Training 6: 100%|██████████| 946/946 [01:20<00:00, 11.68it/s]


Epoch 6 Train loss: 8.505252191317275


Training 7: 100%|██████████| 946/946 [01:25<00:00, 11.08it/s]


Epoch 7 Train loss: 15.600209312409959


Training 8: 100%|██████████| 946/946 [01:25<00:00, 11.01it/s]


Epoch 8 Train loss: 22.887083659483707


Training 9: 100%|██████████| 946/946 [01:25<00:00, 11.09it/s]


Epoch 9 Train loss: 43.67478837998131


Training 10: 100%|██████████| 946/946 [01:25<00:00, 11.02it/s]


Epoch 10 Train loss: 90.22162342030548


Training 11: 100%|██████████| 946/946 [01:21<00:00, 11.54it/s]


Epoch 11 Train loss: 22.726670110051096


Training 12: 100%|██████████| 946/946 [01:25<00:00, 11.12it/s]


Epoch 12 Train loss: 17.413088233708397


Training 13: 100%|██████████| 946/946 [01:24<00:00, 11.23it/s]


Epoch 13 Train loss: 21.77140636848274


Training 14: 100%|██████████| 946/946 [01:25<00:00, 11.05it/s]


Epoch 14 Train loss: 28.544941283812456


Training 15: 100%|██████████| 946/946 [01:26<00:00, 10.91it/s]


Epoch 15 Train loss: 38.09007130869125


Training 16: 100%|██████████| 946/946 [01:25<00:00, 11.05it/s]


Epoch 16 Train loss: 39.05210080711604


Training 17: 100%|██████████| 946/946 [01:24<00:00, 11.21it/s]


Epoch 17 Train loss: 36.66661428769146


Training 18: 100%|██████████| 946/946 [01:26<00:00, 10.88it/s]


Epoch 18 Train loss: 42.94013925143961


Training 19: 100%|██████████| 946/946 [01:27<00:00, 10.86it/s]


Epoch 19 Train loss: 45.71577218897603


Training 20: 100%|██████████| 946/946 [01:23<00:00, 11.31it/s]


Epoch 20 Train loss: 33.4307962740633


Training 21: 100%|██████████| 946/946 [01:23<00:00, 11.28it/s]


Epoch 21 Train loss: 31.440670141681466


Training 22: 100%|██████████| 946/946 [01:25<00:00, 11.12it/s]


Epoch 22 Train loss: 27.19023306910714


Training 23: 100%|██████████| 946/946 [01:26<00:00, 10.93it/s]


Epoch 23 Train loss: 70.691565291689


Training 24: 100%|██████████| 946/946 [01:27<00:00, 10.79it/s]


Epoch 24 Train loss: 42.75973370054024


Training 25: 100%|██████████| 946/946 [01:21<00:00, 11.60it/s]


Epoch 25 Train loss: 7.412367809925826


Training 26: 100%|██████████| 946/946 [01:20<00:00, 11.69it/s]


Epoch 26 Train loss: 7.638884422077519


Training 27: 100%|██████████| 946/946 [01:20<00:00, 11.74it/s]


Epoch 27 Train loss: 15.202482966108674


Training 28: 100%|██████████| 946/946 [01:20<00:00, 11.77it/s]


Epoch 28 Train loss: 35.23372449505691


Training 29: 100%|██████████| 946/946 [01:20<00:00, 11.71it/s]

Epoch 29 Train loss: 46.960548379440226





In [36]:
torch.save(model.state_dict(), '/data/iivanova-23/ROB313/models/ResNet_VQVAE.pt')

In [8]:
from sklearn.cluster import KMeans, DBSCAN
from sklearn.mixture import GaussianMixture
def perform_clustering(features, method="kmeans", num_clusters=2):
    if method == "kmeans":
        clustering = KMeans(n_clusters=num_clusters, random_state=42).fit(features)
    elif method == "gmm":
        clustering = GaussianMixture(n_components=num_clusters, random_state=42).fit(features)
    elif method == "dbscan":
        clustering = DBSCAN(eps=0.5, min_samples=5).fit(features)
    else:
        raise ValueError("Unsupported clustering method")
    return clustering.labels_


# labels = perform_clustering(labelled_features, method="kmeans", num_clusters=2)

In [42]:
class ClassifierFeatures(nn.Module):
    def __init__(self, vae, device, input_dim=256, dropout=0.1):
        super(ClassifierFeatures, self).__init__()
        self.vae = vae.to(device)  
        self.vae.eval()  
        self.device = device
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1),
            nn.Sigmoid()
        ).to(device)  

    def forward(self, x):
        with torch.no_grad():
            # mu, logvar = self.vae.encode(x)
            # x = self.vae.reparameterize(mu, logvar)
            latents = self.vae.encode(x)
            quantized_inputs, _ = self.vae.vq_layer(latents)
        return self.fc(quantized_inputs)

In [40]:
class ClassifierFeatures_Coords(nn.Module):
    def __init__(self, vae, device, input_dim=256, dropout=0.1):
        super(ClassifierFeatures_Coords, self).__init__()
        self.vae = vae
        self.vae.eval()  
        self.fc = nn.Sequential(
            nn.Linear(input_dim+2, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x, coords):
        with torch.no_grad():
            # mu, logvar = self.vae.encode(x)
            # x = self.vae.reparameterize(mu, logvar)
            latents = self.vae.encode(x)
            x, _ = self.vae.vq_layer(latents)
        x = torch.cat((x, coords), dim=1)
        return self.fc(x)

In [44]:
from sklearn.metrics import f1_score
def train_classifier(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for batch in tqdm(train_loader):
        target = batch['label'].float().to(device)  
        image = batch['image'].to(device)
        coords = batch['coords'].to(device)
        optimizer.zero_grad()
        output = model(image).squeeze()  
        
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predicted = (output > 0.5).float() 
        correct += (predicted == target).sum().item()
        total += target.size(0)

    accuracy = 100. * correct / total
    return total_loss / len(train_loader), accuracy

def validate_classifier(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for batch in tqdm(val_loader):
            target = batch['label'].float().to(device)
            image = batch['image'].to(device)
            coords = batch['coords'].to(device)
            output = model(image).squeeze()
            loss = criterion(output, target)

            total_loss += loss.item()
            predicted = (output > 0.5).float()  
            correct += (predicted == target).sum().item()
            total += target.size(0)
            all_preds.append(predicted.cpu().numpy())
            all_targets.append(target.cpu().numpy())
            
        f1 = f1_score(np.concatenate(all_targets), np.concatenate(all_preds))
        print(f'Validation Loss: {total_loss / len(val_loader)}')
        print(f'Validation Accuracy: {100. * correct / total}')
        print(f'Validation F1 Score: {f1}')
        return total_loss / len(val_loader)

In [46]:
def extract_features(model, dataloader, device, labels = True):
    model.eval()
    all_features = []
    all_targets = []
    with torch.no_grad():
        for batch in dataloader:
            data = batch['image'].to(device)
            target = batch['label'].float().to(device) if labels else None
            mu, _ = model.encode(data)
            all_features.append(mu.cpu().numpy())
            all_targets.append(target.cpu().numpy()) if labels else None
    if labels:
        return np.concatenate(all_features), np.concatenate(all_targets)
    return np.concatenate(all_features)

In [45]:
latent_dim = 256
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = ResNet_VAE().to(device)
# model.load_state_dict(torch.load('/data/iivanova-23/ROB313/models/resnet_vae_1.pt'))
classifier = ClassifierFeatures(model, device, input_dim=latent_dim).to(device) 
optimizer = optim.Adam(classifier.parameters(), lr=1e-4)
criterion = nn.BCELoss()
num_epochs = 30
for epoch in range(num_epochs):
    train_loss = train_classifier(classifier, train_loader_labeled, optimizer,criterion, device)
    print(f"Epoch {epoch} Train loss: {train_loss}")
    val_loss = validate_classifier(classifier, val_loader, criterion, device)
    print(f"Epoch {epoch} Validation loss: {val_loss}")

# features = extract_features(model, train_loader, device, labels=False)
# labels = perform_clustering(features, method="kmeans", num_clusters=2)

  0%|          | 0/158 [00:00<?, ?it/s]

  0%|          | 0/158 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (57344x7 and 256x128)

In [None]:
validate_classifier(classifier, test_loader, nn.BCELoss(), device)

100%|██████████| 197/197 [00:11<00:00, 17.00it/s]

Validation Loss: 0.1585715621388352
Validation Accuracy: 93.47515478647405
Validation F1 Score: 0.9428928720300125





0.1585715621388352

## Cluster features from resnet
idea: cluster features from resnet50?

In [41]:
from utils.datasets import WildfireDataset

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomResizedCrop(350),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = WildfireDataset('/data/amathur-23/ROB313', split='train', labeled=False, transforms=transform)
data_train_labeled = WildfireDataset('/data/amathur-23/ROB313', split='train', labeled=True, transforms=transform)
val_dataset = WildfireDataset('/data/amathur-23/ROB313', split='val', transforms=transform)
test_dataset = WildfireDataset('/data/amathur-23/ROB313', split='test', transforms=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
train_loader_labeled = DataLoader(data_train_labeled, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

Loading meta file: /data/amathur-23/ROB313/train_unlabeled.csv
Loading meta file: /data/amathur-23/ROB313/train.csv
Loading meta file: /data/amathur-23/ROB313/val.csv
Loading meta file: /data/amathur-23/ROB313/test.csv


In [None]:
from models import ResNetEncoder
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetEncoder(out_features=128, pretrained=True, train_backbone=True).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)   
model = model.to(device)

from sklearn.cluster import KMeans, DBSCAN
from sklearn.mixture import GaussianMixture
def perform_clustering(features, method="kmeans", num_clusters=2):
    if method == "kmeans":
        clustering = KMeans(n_clusters=num_clusters, random_state=42).fit(features)
    elif method == "gmm":
        clustering = GaussianMixture(n_components=num_clusters, random_state=42).fit(features)
    elif method == "dbscan":
        clustering = DBSCAN(eps=0.5, min_samples=5).fit(features)
    else:
        raise ValueError("Unsupported clustering method")
    return clustering.labels_