 ## Things to try to increase LB score

 1. Change way in which buffer region is constucted (Cross, Square, Ball)

 1. Change submission csv method to transpose rows and cols

 ### Submitting first 200 000, we have

 * 0.025533852 accuracy with only zeros.

 * 0.00043453 accuracy with only ones.

 * ie, 0.00043453 / (0.00043453 + 0.025533852) = 0.01673304097 ~= **1.67 %** rate of ones



 ....We should maybe consider that when setting threshold.

 ### Estimated time for getting **25m rows**:

 * One forward pass gives 40 000 rows

 * One forward pass takes approx. 0.5 sec

 * 25m / 40 000 = 625

 * 625 * 0.5 ~= 300 sec = **5 min**

 # Inference Notebook



 This notebook performs inference on test AOIs using the trained model. It reads the AOIs from shapefiles, processes each AOI through the model, and aggregates the results into final prediction tensors. The predictions are then converted into submission CSV files.



 The methods from `inference_utils.py` and `train_utils.py` are imported and used.

 ## Imports and Setup

In [3]:
# %%
import os
import sys
import geopandas as gpd
import pandas as pd
import numpy as np
import pyproj
import torch
from shapely.geometry import Polygon, Point
import matplotlib.pyplot as plt
import random
import re 
from tqdm.notebook import tqdm

# Add the src directory to the sys.path
sys.path.append(os.path.abspath('..'))

# Import functions and constants from inference_utils and train_utils
from secret_runway_detection.inference_utils import (
    aoi_to_tiles,
    aoi_to_input_areas,
    pad_output_tensor,
    run_inference_on_aoi,
    tensor_to_submission_csv,
    fetch_and_stitch_aoi_quarters,
)

from secret_runway_detection.train_utils import (
    input_area_to_input_image,
    make_input_image_tensor
)

from secret_runway_detection.model import CombinedModel

# Set random seeds for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cpu


 ## Configuration Parameters

In [4]:
DEBUG = True

# Tile and AOI parameters
TILE_SIDE_LEN = 10.0  # in meters
AOI_HEIGHT = 15270.0  # in meters
AOI_WIDTH = 15410.0   # in meters

ROWS_COUNT = 1527  # Number of tile rows
COLUMNS_COUNT = 1541  # Number of tile columns

assert (TILE_SIDE_LEN == AOI_HEIGHT / ROWS_COUNT) and (TILE_SIDE_LEN == AOI_WIDTH / COLUMNS_COUNT)

# Model input and output dimensions
INPUT_IMAGE_HEIGHT = 224  # in pixels
INPUT_IMAGE_WIDTH = 224

TILES_PER_AREA_LEN = 200  # Number of tiles per side in one input area

# Number of input areas to cover the AOI
INPUT_AREAS_VERTICALLY = 10
INPUT_AREAS_HORIZONTALLY = 10

# Threshold for converting model outputs to binary predictions
THRESHOLD = 0.5  # Adjust based on validation performance

# Path to the trained model checkpoint
MODEL_CHECKPOINT_PATH = 'checkpoints/model_checkpoint.pth'  # Update this path

# Path to save the submission CSVs
SUBMISSION_CSV_DIR = 'submission_csvs'
os.makedirs(SUBMISSION_CSV_DIR, exist_ok=True)


 ## Load the Trained Model

In [5]:
# Load model checkpoint from ../checkpoints dir
model = torch.load(MODEL_CHECKPOINT_PATH)
model.to(device)
model.eval()

TypeError: CombinedModel.__init__() missing 2 required positional arguments: 'backbone' and 'segmentation_head'

## Load images of AOIs

In [7]:
# %% [markdown]
# ## Load AOI Mosaics Based on Shapefile Names and Check for Missing Mosaics

# %%
import os
import rasterio
import numpy as np

# Directory containing AOI shapefiles
AOI_SHAPEFILES_DIR = '../shp_test_AOIs/shp'  # Adjust the path as necessary

# Directory where the mosaic images are saved
mosaic_images_dir = '../aoi_mosaic_images'  # Adjust the path as necessary

# Initialize an empty dictionary to store the images
aoi_images_dict = {}

# List all shapefiles in the directory
aoi_shapefiles = [f for f in os.listdir(AOI_SHAPEFILES_DIR) if f.endswith('.shp')]

# Extract AOI IDs from the shapefile names
aoi_ids = [os.path.splitext(f)[0] for f in aoi_shapefiles]

if DEBUG:
    aoi_ids = aoi_ids[:1]  # Limit the number of AOIs to load for debugging

# Iterate over each AOI ID and attempt to load the corresponding mosaic
for aoi_id in aoi_ids:
    mosaic_filename = f'{aoi_id}_mosaic.tif'
    mosaic_file_path = os.path.join(mosaic_images_dir, mosaic_filename)
    print(f"Processing AOI: {aoi_id}")
    
    # Check if the mosaic file exists
    if not os.path.exists(mosaic_file_path):
        raise FileNotFoundError(f"Mosaic file not found for AOI {aoi_id}: {mosaic_file_path}")
    else:
        # Open the image using rasterio
        with rasterio.open(mosaic_file_path) as src:
            # Read the image bands
            img_data = src.read()
            # Store the image data in the dictionary
            aoi_images_dict[aoi_id] = img_data
        print(f"Mosaic image loaded for AOI: {aoi_id}")

print(f"\nLoaded {len(aoi_images_dict)} AOI images into the dictionary.")


Processing AOI: aoi_2021_04
Mosaic image loaded for AOI: aoi_2021_04

Loaded 1 AOI images into the dictionary.


 ## Run Inference on Each AOI

In [None]:
# %%
def run_inference_on_aoi(aoi_gdf: gpd.GeoDataFrame, model: torch.nn.Module, threshold: float) -> torch.Tensor:
    """
    Runs inference on the given AOI using the provided model.

    Parameters:
    - aoi_gdf (gpd.GeoDataFrame): GeoDataFrame containing the AOI geometry.
    - model (torch.nn.Module): The trained model.
    - threshold (float): Threshold for converting model outputs to binary predictions.

    Returns:
    - final_prediction_tensor (torch.Tensor): The aggregated prediction tensor for the AOI.
    """
    aoi = aoi_gdf.geometry.iloc[0]
    crs = aoi_gdf.crs

    input_areas = aoi_to_input_areas(aoi, crs)

    padded_output_tensors = []
    for _, input_area_row in input_areas.iterrows():
        input_area = input_area_row['geometry']
        idxs = input_area_row['idxs']

        # Fetch the input image for the input area
        input_image = input_area_to_input_image(
            input_area=input_area,
            input_area_crs=crs,
            input_image_width=INPUT_IMAGE_WIDTH,
            input_image_height=INPUT_IMAGE_HEIGHT
        )

        # If the image is empty, skip this area
        if np.all(input_image == 0):
            continue

        # Convert the input image to a tensor
        input_tensor = make_input_image_tensor(input_image)
        input_tensor = input_tensor.to(device)

        with torch.no_grad():
            output_tensor = model(input_tensor)
            output_tensor = output_tensor.squeeze(0).squeeze(0)  # Shape: (H, W)

        # Pad the output tensor to the size of the AOI
        output_tensor_padded = pad_output_tensor(output_tensor, idxs)
        padded_output_tensors.append(output_tensor_padded)

    if not padded_output_tensors:
        raise ValueError("No valid input areas were processed. Check the input data.")

    # Stack and take the maximum confidence for overlapping areas
    aoi_confidence = torch.stack(padded_output_tensors, dim=0)
    aoi_confidence, _ = torch.max(aoi_confidence, dim=0)

    final_prediction_tensor = (aoi_confidence > threshold).float()

    return final_prediction_tensor

# Now, run inference on each AOI shapefile
for shapefile in aoi_shapefiles:
    aoi_id = os.path.splitext(shapefile)[0]  # Get the AOI ID from the filename
    aoi_shapefile_path = os.path.join(AOI_SHAPEFILES_DIR, shapefile)
    print(f"Processing AOI: {aoi_id}")

    # Read the AOI shapefile
    aoi_gdf = gpd.read_file(aoi_shapefile_path)
    # Ensure the CRS is correct
    if aoi_gdf.crs is None:
        # Assign a default CRS if none is set
        aoi_gdf.set_crs(epsg=4326, inplace=True)
    aoi_gdf = aoi_gdf.to_crs('EPSG:32633')  # Adjust to your working CRS

    # Run inference on the AOI
    final_prediction_tensor = run_inference_on_aoi(aoi_gdf, model, THRESHOLD)
    print(f"Inference completed on AOI: {aoi_id}")

    # Convert the prediction tensor to submission CSV
    submission_df = tensor_to_submission_csv(final_prediction_tensor, indexes='from-top-left', csvs_dir=SUBMISSION_CSV_DIR)
    submission_csv_path = os.path.join(SUBMISSION_CSV_DIR, f'submission_{aoi_id}.csv')
    submission_df.to_csv(submission_csv_path, index=False)
    print(f"Submission CSV saved to {submission_csv_path}")


 ## Notes

In [None]:
# %%
# - Ensure that the coordinate reference systems (CRS) are consistent throughout the process.
# - Verify the indexing of rows and columns in the `tensor_to_submission_csv` function to match the competition requirements.
# - The code assumes that the helper functions are correctly defined in `inference_utils.py` and `train_utils.py`.
# - Adjust paths and constants as necessary based on your project structure.


 ## Conclusion

In [None]:
# %%
print("Inference process completed for all AOIs. Submission files are ready.")

