# Agriculture Detection with PyTorch
This notebook focuses on preparing PyTorch datasets and data loaders for the satellite imagery classification task.

In [None]:
# Import Required Libraries
import os
import importlib
import torch.nn as nn

# Local imports
import utils.main_utils as main_utils
importlib.reload(main_utils)
from utils.main_utils import (
    check_skillnetwork_extraction, 
    shuffle_data,
    get_random_sample_image
)
import utils.pytorch_ai_utils as pytorch_ai_utils
importlib.reload(pytorch_ai_utils)
from utils.pytorch_ai_utils import (
    create_pytorch_custom_dataset, 
    create_pytorch_dataset, 
    display_pytorch_batch, 
    set_pytorch_seed, 
    set_pytorch_processing_env,
    worker_init_fn, 
    create_pytorch_loaders,
    build_pytorch_model,
    display_pytorch_history,
    pytorch_training_loop,
    evaluate_pytorch_model,
    visualize_satellite_agriculture,
)

## Download and Extract Data
Ensure the satellite imagery dataset is available locally before constructing PyTorch datasets.

In [None]:
# Download and extract data
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/4Z1fwRR295-1O3PMQBH6Dg/images-dataSAT.tar'
extract_dir = './data/pytorch_data/'
model_dir = './models/'
os.makedirs(extract_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)
dataset_path = os.path.join(extract_dir, 'images_dataSAT')
await check_skillnetwork_extraction(extract_dir, url)

## Prepare Image Paths and Labels
Collect class-specific file paths and labels for use with PyTorch datasets.

In [None]:
# Gather image paths and labels
base_dir = './data/pytorch_data/images_dataSAT/'
dir_non_agri_name = os.path.join(base_dir, 'class_0_non_agri')
dir_agri_name = os.path.join(base_dir, 'class_1_agri')

all_image_paths, all_labels = shuffle_data(dir_non_agri_name, dir_agri_name)

## Build PyTorch Datasets and Data Loaders
Instantiate both custom and `ImageFolder`-based datasets, then create loaders for inspection.

In [None]:
# Create PyTorch datasets and loaders and display batches
batch_size = 8
custom_loader = create_pytorch_custom_dataset(
    base_dir,
    dir_non_agri_name,
    dir_agri_name,
    batch_size=batch_size,
)
imagefolder_loader = create_pytorch_dataset(base_dir, batch_size=batch_size)
display_pytorch_batch(custom_loader, batch_size, title='Custom Loader')
display_pytorch_batch(imagefolder_loader, batch_size, title='ImageFolder Loader')

## Set Seed

In [None]:
SEED = 42
set_pytorch_seed(SEED)

## Initialize PyTorch Processing Env

In [None]:
device = set_pytorch_processing_env()

## Hyperparameters

In [None]:
# Data and Training Hyperparameters
img_size = 64                          # Image width and height
batch_size = 128                       # Number of samples per gradient update
lr = .001                              # Learning rate for the optimizer
epochs = 3                             # Number of times to iterate over the entire dataset
padding = 'same'                       # Padding type for convolutional layers ('same' or 0, 1, 2, ...)
loss_function = nn.CrossEntropyLoss()  # Loss function for binary classification (CrossEntropyLoss or BCEWithLogitsLoss)
num_classes = 2                        # Number of output classes (2 for CrossEntropyLoss, 1 for BCEWithLogitsLoss)
num_workers = 0                        # Number of worker processes for data loading
train_split = .8                       # Fraction of data to use for training
shuffle = True                         # Whether to shuffle the data
model_name = os.path.join(             # Name of the file to save the model
    model_dir, 
    'pytorch_model.pth'
)

# Architectural Hyperparameters
n_channels = 3                         # Number of input image channels (3 for RGB)
conv_block_num = 4                     # Number of convolutional blocks
dense_block_num = 2                    # Number of dense blocks
filter_base = 32                       # Base number of filters for convolutional layers
unit_base = 128                        # Base number of units for dense layers
kernel_size = 5                        # Kernel size for convolutional layers
pool_size = 2                          # Pool size for MaxPooling layers
dropout = 0.4                          # Dropout rate for regularization

## Create Loaders

In [None]:
train_loader, val_loader = create_pytorch_loaders(
    dataset_path=dataset_path, 
    worker=worker_init_fn,
    img_size=img_size, 
    batch_size=batch_size, 
    train_split=train_split, 
    shuffle=shuffle, 
    num_workers=num_workers
)

## Build PyTorch Model

In [None]:
model = build_pytorch_model(
    conv_block_num=conv_block_num,
    dense_block_num=dense_block_num,
    filter_base=filter_base,
    unit_base=unit_base,
    kernel_size=kernel_size,
    padding=padding,
    pool_size=pool_size,
    dropout=dropout,
    n_channels=n_channels,
    num_classes=num_classes, 
    device=device, 
)

## Train PyTorch Model

In [None]:
model, loss_history, acc_history = pytorch_training_loop(
    loss_function=loss_function,
    lr=lr,
    epochs=epochs, 
    model=model, 
    train_loader=train_loader, 
    val_loader=val_loader,
    device=device, 
    model_name=model_name
)

## Display Results

In [None]:
display_pytorch_history(
    acc_history, 
    loss_history, 
    model, 
    val_loader, 
    device
)

In [None]:
result_dir = 'results/'
os.makedirs(result_dir, exist_ok=True)
result_path = os.path.join(result_dir, 'tile_grid_visualization.png')

# get one random image path
sample_img_path = get_random_sample_image(dir_non_agri_name, dir_agri_name)

visualize_satellite_agriculture(
    model=model,
    dir_non_agri=dir_non_agri_name,
    dir_agri=dir_agri_name,
    device=device,
    grid_size=4,
    tile_size=64,
    save_path=result_path
)

## Display Metrics

In [None]:
evaluate_pytorch_model(model, val_loader, device, model_name)