<a href="https://colab.research.google.com/github/c-labropoulos/NN-for-lithological-segmentation/blob/main/UNET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
!pip install rasterio
import os
import numpy as np
import rasterio
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Concatenate
import torch.optim as optim
from torchvision import transforms
from sklearn.model_selection import train_test_split
import glob
from PIL import Image
import numpy as np
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms
import torch


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Verify GPU and CUDA installations
print("Using device:", device)


Using device: cuda


In [None]:
def read_raster_image(file_path):
    with rasterio.open(file_path) as src:
        image_data = src.read(1)  # Read the first band
    return image_data


In [None]:
def get_file_paths(folder_path, file_extension=".tif"):
    file_paths = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(file_extension):
                file_paths.append(os.path.join(root, file))
    return file_paths


In [None]:


class RasterDataset(Dataset):
    def __init__(self, root_dir, patch_size, stride, train_ratio, is_train=True):
        self.root_dir = root_dir
        self.patch_size = patch_size
        self.stride = stride
        self.train_ratio = train_ratio
        self.is_train = is_train

        self.image_paths = self.get_image_paths(root_dir)

        num_train = int(len(self.image_paths) * train_ratio)
        if is_train:
            self.image_paths = self.image_paths[:num_train]
        else:
            self.image_paths = self.image_paths[num_train:]

        self.image_patches = self.create_patches()

    def get_image_paths(self, root_dir):
        image_folder = os.path.join(root_dir)
        image_paths = sorted([os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.tif')])
        return image_paths

    def create_patches(self):
        image_patches = []

        for image_path in self.image_paths:
            image = Image.open(image_path)
            width, height = image.size

            for y in range(0, height - self.patch_size + 1, self.stride):
                for x in range(0, width - self.patch_size + 1, self.stride):
                    image_patch = image.crop((x, y, x + self.patch_size, y + self.patch_size))
                    image_patches.append(image_patch)

        return image_patches

    def preprocess(self, image):
        # Convert the PIL Image to a numpy array
        image = np.array(image)

        # Normalize the image to the range [0, 1]
        image = image / 255.0

        # Convert the numpy array to a PyTorch tensor and add a channel dimension
        image = torch.from_numpy(image).float().unsqueeze(0)

        return image

    def __len__(self):
        return len(self.image_patches)

    def __getitem__(self, index):
         image_patch = self.image_patches[index]
         image_patch = self.preprocess(image_patch)
         mask = (image_patch != 0).long()  # Convert the mask tensor to Long
         mask = mask.squeeze(0)  # Remove the channel dimension
         return image_patch, mask

In [None]:
# Create dataset and data loader
folder_path = "/content/drive/MyDrive/raster_to_be_used"

patch_size = 128
stride = 32
train_ratio = 0.8

train_dataset = RasterDataset(folder_path, patch_size, stride, train_ratio, is_train=True)
train_data_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)

test_dataset = RasterDataset(folder_path, patch_size, stride, train_ratio, is_train=False)
test_data_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2)


In [None]:
class UNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=1):
        super(UNet, self).__init__()

        # Contracting path
        self.enc1 = self.conv_block(in_channels, 64)
        self.enc2 = self.conv_block(64, 128)
        self.enc3 = self.conv_block(128, 256)
        self.enc4 = self.conv_block(256, 512)

        self.pool = nn.MaxPool2d(2)

        # Bottleneck
        self.bottleneck = self.conv_block(512, 1024)

        # Expanding path
        self.upconv4 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
        self.dec4 = self.conv_block(1024, 512)
        self.upconv3 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.dec3 = self.conv_block(512, 256)
        self.upconv2 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.dec2 = self.conv_block(256, 128)
        self.upconv1 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        self.dec1 = self.conv_block(128, 64)

        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        enc1 = self.enc1(x)
        enc2 = self.enc2(self.pool(enc1))
        enc3 = self.enc3(self.pool(enc2))
        enc4 = self.enc4(self.pool(enc3))

        bottleneck = self.bottleneck(self.pool(enc4))

        upconv4 = self.upconv4(bottleneck)
        dec4 = self.dec4(torch.cat((enc4, upconv4), dim=1))
        upconv3 = self.upconv3(dec4)
        dec3 = self.dec3(torch.cat((enc3, upconv3), dim=1))
        upconv2 = self.upconv2(dec3)
        dec2 = self.dec2(torch.cat((enc2, upconv2), dim=1))
        upconv1 = self.upconv1(dec2)
        dec1 = self.dec1(torch.cat((enc1, upconv1), dim=1))

        return self.final_conv(dec1)

    def conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )




In [None]:
# Customize in_channels and out_channels according to your data
in_channels = 1  # Number of input channels (1 for single-band images)
out_channels = 18  # Number of output channels (number of classes for multi-class segmentation)

model = UNet(in_channels=in_channels, out_channels=out_channels)
# Move the model to the GPU
model = model.to(device)


In [None]:

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
num_epochs = 10

# Move the model to the GPU
model = model.to(device)

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    print("-" * 10)

    model.train()
    train_loss = 0.0
    for batch_idx, (images, masks) in enumerate(train_data_loader):  # Use train_data_loader
        # Move images and masks to the GPU
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
    train_loss /= len(train_data_loader)
    print(f"Train Loss: {train_loss:.4f}")

    # Evaluation
    model.eval()
    eval_loss = 0.0
    with torch.no_grad():
        for batch_idx, (images, masks) in enumerate(test_data_loader):  # Use test_data_loader
            # Move images and masks to the GPU
            images, masks = images.to(device), masks.to(device)

            outputs = model(images)
            loss = criterion(outputs, masks)
            
            eval_loss += loss.item()
    
    eval_loss /= len(test_dataset)  # Replace len(dataset, is_train=False) with len(test_dataset)
    print(f"Evaluation loss: {eval_loss:.4f}")

Epoch 1/10
----------
Train Loss: 13.9123
Evaluation loss: 0.0038
Epoch 2/10
----------
Train Loss: 0.0027
Evaluation loss: 0.0011
Epoch 3/10
----------
Train Loss: 0.0005
Evaluation loss: 0.0001
Epoch 4/10
----------
Train Loss: 0.0003
Evaluation loss: 0.0003
Epoch 5/10
----------
Train Loss: 0.0145
Evaluation loss: 0.0033
Epoch 6/10
----------
Train Loss: 0.0018
Evaluation loss: 0.0006
Epoch 7/10
----------
Train Loss: 0.0023
Evaluation loss: 0.0041
Epoch 8/10
----------
Train Loss: 0.0003
Evaluation loss: 0.0001
Epoch 9/10
----------
Train Loss: 0.0002
Evaluation loss: 0.0000
Epoch 10/10
----------
Train Loss: 0.0001
Evaluation loss: 0.0000
