In [58]:
from pathlib import Path
import geopandas as gpd
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm

In [59]:
# Fill these file paths with the locations on your machine.
PATH_TO_DATA_TEST = (
    "/Users/33783/Desktop/capgemini/hackathon-mines-invent-2024/DATA/TEST/"
)
METADATA_TEST = Path(PATH_TO_DATA_TEST) / "metadata.geojson"

MODEL_NAME = "simplesegmentation_1"
MODEL_PATH = f"models/{MODEL_NAME}.pth"

In [60]:
mtd_test = gpd.read_file(METADATA_TEST)
mtd_test

Unnamed: 0,ID,TILE,N_Parcel,Parcel_Cover,dates-S2,geometry
0,20000,t30uxv,88,0.667172,"{'0': 20180924, '1': 20180929, '2': 20181004, ...","MULTIPOLYGON (((364583.845 6890803.633, 365860..."
1,20001,t30uxv,93,0.752556,"{'0': 20180924, '1': 20180929, '2': 20181004, ...","MULTIPOLYGON (((367529.513 6912409.414, 368806..."
2,20002,t30uxv,64,0.970282,"{'0': 20180924, '1': 20180929, '2': 20181004, ...","MULTIPOLYGON (((452774.570 6918664.386, 454051..."
3,20003,t30uxv,92,0.820805,"{'0': 20180924, '1': 20180929, '2': 20181004, ...","MULTIPOLYGON (((369002.797 6914864.797, 370279..."
4,20004,t30uxv,75,0.835466,"{'0': 20180924, '1': 20180929, '2': 20181004, ...","MULTIPOLYGON (((460420.865 6901378.432, 461697..."
...,...,...,...,...,...,...
469,20469,t32ulu,18,0.168979,"{'0': 20180917, '1': 20180922, '2': 20180927, ...","MULTIPOLYGON (((1007901.660 6760101.793, 10091..."
470,20470,t32ulu,13,0.284865,"{'0': 20180917, '1': 20180922, '2': 20180927, ...","MULTIPOLYGON (((1004372.112 6772662.360, 10056..."
471,20471,t32ulu,36,0.399832,"{'0': 20180917, '1': 20180922, '2': 20180927, ...","MULTIPOLYGON (((1034994.449 6791692.947, 10362..."
472,20472,t32ulu,7,0.255358,"{'0': 20180917, '1': 20180922, '2': 20180927, ...","MULTIPOLYGON (((1007714.487 6812700.875, 10089..."


In [35]:
from baseline.dataset import BaselineDatatest
from baseline.collate import pad_collate

dt_test = BaselineDatatest(Path(PATH_TO_DATA_TEST))

dl_test = torch.utils.data.DataLoader(
    dt_test, batch_size=1, collate_fn=pad_collate, shuffle=True
)

Reading patch metadata ...
Done.
Dataset ready.


In [36]:
model = torch.load(MODEL_PATH)
# Ne pas oublier de mettre le modèle en mode d'évaluation si nécessaire
model.eval()

SimpleSegmentationModel(
  (encoder): Sequential(
    (0): Conv2d(10, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
  )
  (decoder): Sequential(
    (0): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
)

In [37]:
all_preds = []
device = "cuda" if torch.cuda.is_available() else "cpu"

for i, inputs in tqdm(enumerate(dl_test)):
    inputs_preprocessed = inputs["S2"][:, 10, :, :, :]
    
    inputs_preprocessed = inputs_preprocessed.to(device)  # Satellite data
    outputs = model(inputs_preprocessed)
    preds = torch.argmax(outputs, dim=1)
    preds = preds.cpu().detach().numpy().flatten()
    all_preds.append(preds)

0it [00:00, ?it/s]

474it [00:32, 14.56it/s]


In [45]:
all_preds_2d = [e.reshape(1,-1) for e in all_preds]
print(all_preds_2d[0].shape, len(all_preds))
Y = np.concatenate(all_preds_2d, axis=0)
print(Y.shape)

(1, 16384) 474
(474, 16384)


In [46]:
from baseline.submission_tools import masks_to_str

masks = masks_to_str(Y)
print(Y.shape)

submission = pd.DataFrame.from_dict({"ID": range(len(Y)), "MASKS": masks})
submission["ID"] = submission["ID"] + 20000

(474, 16384)


In [57]:
# Note that the index=False argument is important.
submission.to_csv(f"submission_{MODEL_NAME}.csv", index=False)