In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import random
from baselineCNN import *

In [2]:
class WildFire_Dataset(Dataset):
    def __init__(self, root_dir, mode='train', transform=None):
        """
        Args:
            root_dir (string): Directory with all the images.
            mode (string): One of 'train', 'valid', or 'test'.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.mode = mode
        self.transform = transform
        
        # Paths for fire and no_fire images
        self.fire_path = os.path.join(root_dir, mode, 'wildfire')
        self.no_fire_path = os.path.join(root_dir, mode, 'nowildfire')
        
        # Get list of images
        self.fire_images = [os.path.join(self.fire_path, img) for img in os.listdir(self.fire_path)]
        self.no_fire_images = [os.path.join(self.no_fire_path, img) for img in os.listdir(self.no_fire_path)]
        
        # Combine all images
        self.all_images = self.fire_images + self.no_fire_images
        
        # If mode is not 'train', we need labels
        if self.mode != 'train':
            self.labels = [1] * len(self.fire_images) + [0] * len(self.no_fire_images)
        else:
            # For training, we don't have labels
            self.labels = None

    def __len__(self):
        return len(self.all_images)

    def __getitem__(self, idx):
        img_path = self.all_images[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        if self.mode == 'train':
            # For training, return only the image (no label)
            return image
        else:
            # For validation and testing, return the image and its label
            label = self.labels[idx]
            return image, label


In [5]:
# Define transformations (you can customize these)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

from pathlib import Path

dataset_path = '/home/ids/ihamdaoui-21/wildfire-prediction-dataset'
# Create datasets
train_dataset = WildFire_Dataset(root_dir=dataset_path, mode='train', transform=transform)
valid_dataset = WildFire_Dataset(root_dir=dataset_path, mode='valid', transform=transform)
test_dataset = WildFire_Dataset(root_dir=dataset_path, mode='test', transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=4)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = baseline.to(device)
optimizer = torch.optim.Adam(baseline.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

In [7]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=10):
    best_val_accuracy = 0.0
    best_model_path = "best_model.pth"

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Val Accuracy: {val_accuracy:.2f}%")
        
        # Save the best model
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), best_model_path)
            print(f"Best model saved with val accuracy: {best_val_accuracy:.2f}%")

    print("Training complete.")
    return best_model_path

In [8]:
def predict_pseudo_labels(model, train_loader, threshold=0.95):
    model.eval()
    pseudo_labeled_data = []
    
    with torch.no_grad():
        for images, _ in train_loader:
            images = images.to(device)
            outputs = model(images)
            probabilities = torch.softmax(outputs, dim=1)
            max_probs, predicted_labels = torch.max(probabilities, dim=1)
            
            # Filter high-confidence predictions
            high_confidence_indices = max_probs > threshold
            pseudo_labeled_data.extend(
                [(images[i].cpu(), predicted_labels[i].item()) for i in range(len(images)) if high_confidence_indices[i]]
            )
    
    return pseudo_labeled_data

In [9]:
def fine_tune_model(model, pseudo_labeled_data, valid_loader, criterion, optimizer, num_epochs=5):
    # Create a DataLoader for pseudo-labeled data
    pseudo_images = torch.stack([x[0] for x in pseudo_labeled_data])
    pseudo_labels = torch.tensor([x[1] for x in pseudo_labeled_data])
    pseudo_dataset = torch.utils.data.TensorDataset(pseudo_images, pseudo_labels)
    pseudo_loader = DataLoader(pseudo_dataset, batch_size=32, shuffle=True)
    
    # Fine-tune the model
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in pseudo_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        print(f"Fine-tuning Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(pseudo_loader):.4f}, Val Accuracy: {val_accuracy:.2f}%")

In [10]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True


In [None]:
num_epochs = 10
threshold = 0.95

best_model_path = train_model(model, test_loader, valid_loader, criterion, optimizer, num_epochs)

model.load_state_dict(torch.load(best_model_path))


# fine_tune_model(model, pseudo_labeled_data, valid_loader, criterion, optimizer, num_epochs=5)

Epoch [1/10], Loss: 0.9411, Val Accuracy: 44.76%
Best model saved with val accuracy: 44.76%
Epoch [2/10], Loss: 0.6951, Val Accuracy: 44.76%
Epoch [3/10], Loss: 0.6944, Val Accuracy: 44.76%
Epoch [4/10], Loss: 0.6940, Val Accuracy: 55.24%
Best model saved with val accuracy: 55.24%
Epoch [5/10], Loss: 0.6938, Val Accuracy: 55.24%
Epoch [6/10], Loss: 0.6935, Val Accuracy: 55.24%
Epoch [7/10], Loss: 0.6933, Val Accuracy: 55.24%
Epoch [8/10], Loss: 0.6931, Val Accuracy: 55.24%
Epoch [9/10], Loss: 0.6929, Val Accuracy: 55.24%
Epoch [10/10], Loss: 0.6928, Val Accuracy: 55.24%
Training complete.


  model.load_state_dict(torch.load(best_model_path))


ValueError: too many values to unpack (expected 2)

In [None]:
pseudo_labeled_data = predict_pseudo_labels(model, train_loader, threshold)