In [1]:
import warnings

warnings.filterwarnings("ignore")

import json
import os
import shutil
from datetime import date
from typing import Literal

import cv2
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import shapely.wkt
import torch
import torch.nn as nn
from PIL import Image
from shapely import wkt
from torch import tensor
from torch.autograd import Variable
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torcheval.metrics.functional import (
    multiclass_accuracy,
    multiclass_f1_score,
    multiclass_precision,
    multiclass_recall,
)
from torchvision import datasets, transforms
from tqdm.autonotebook import tqdm

In [2]:
# os.chdir('../..')

In [3]:
ROOT_DIR: str = os.getcwd()
INFERENCE_ROOT: str = os.path.join(ROOT_DIR, "data", "xview_building_damage", "inference")
UPLOADS_IMG: str = os.path.join(INFERENCE_ROOT, "upload", "img")
UPLOADS_JSON: str = os.path.join(INFERENCE_ROOT, "upload", "json")
POST_PROCESSED: str = os.path.join(INFERENCE_ROOT, "postprocesssed")
PRE_PROCESSED: str = os.path.join(INFERENCE_ROOT, "preprocesssed")

print(f"{ROOT_DIR=}")
print(f"{INFERENCE_ROOT=}")
print(f"{UPLOADS_IMG=}")
print(f"{UPLOADS_JSON=}")
print(f"{POST_PROCESSED=}")
print(f"{PRE_PROCESSED=}")

ROOT_DIR='/workspaces/alivio-app'
INFERENCE_ROOT='/workspaces/alivio-app/data/xview_building_damage/inference'
UPLOADS_IMG='/workspaces/alivio-app/data/xview_building_damage/inference/upload/img'
UPLOADS_JSON='/workspaces/alivio-app/data/xview_building_damage/inference/upload/json'
POST_PROCESSED='/workspaces/alivio-app/data/xview_building_damage/inference/postprocesssed'
PRE_PROCESSED='/workspaces/alivio-app/data/xview_building_damage/inference/preprocesssed'


In [4]:
os.makedirs(UPLOADS_IMG, exist_ok=True)
os.makedirs(UPLOADS_JSON, exist_ok=True)
os.makedirs(POST_PROCESSED, exist_ok=True)
os.makedirs(PRE_PROCESSED, exist_ok=True)

In [5]:
uploaded_imgs = (
    [f"{UPLOADS_IMG}/{f}" for f in os.listdir(UPLOADS_IMG)]
)
uploaded_jsons = (
    [f"{UPLOADS_JSON}/{f}" for f in os.listdir(UPLOADS_JSON)]
)

print(len(uploaded_imgs))
print(len(uploaded_jsons))

1
1


### Convert the Uploaded JSON to CSV

In [6]:
label_json_data: list[dict] = []


def read_and_store_label_json(label_json_path: str):
    """A thread-safe function that reads a json as a dictionary and writes to a global list"""
    with open(label_json_path) as f:
        label_json_data.append(json.load(f))

In [7]:
read_and_store_label_json(uploaded_jsons[0])

In [8]:
label_df_original: pd.DataFrame = pd.json_normalize(pd.Series(label_json_data))
print(label_df_original.shape)
label_df_original.head()

(1, 20)


Unnamed: 0,features.lng_lat,features.xy,metadata.sensor,metadata.provider_asset_type,metadata.gsd,metadata.capture_date,metadata.off_nadir_angle,metadata.pan_resolution,metadata.sun_azimuth,metadata.sun_elevation,metadata.target_azimuth,metadata.disaster,metadata.disaster_type,metadata.catalog_id,metadata.original_width,metadata.original_height,metadata.width,metadata.height,metadata.id,metadata.img_name
0,"[{'properties': {'feature_type': 'building', '...","[{'properties': {'feature_type': 'building', '...",WORLDVIEW03_VNIR,WORLDVIEW03_VNIR,1.909097,2016-10-08T15:33:12.799Z,38.06583,0.477265,144.004517,60.312649,279.450775,hurricane-matthew,wind,10400100233B1C00,1024,1024,1024,1024,MjU4NjM5Ng.hxGJlAVQ8JPvon1EV6UJp8YuyW4,hurricane-matthew_00000302_post_disaster.png


In [9]:
lbl_df: pd.DataFrame = label_df_original.copy()
CHALLENGE_TYPE: Literal["train", "test", "hold"] = "test"


def json_df_to_csv(label_df: pd.DataFrame):
    label_df_lng_lat: pd.DataFrame = (
        label_df.drop(columns=["features.xy", "features.lng_lat"])
        .join(label_df["features.lng_lat"].explode())
        .reset_index(drop=True)
    )

    label_df_features: pd.DataFrame = (
        label_df.drop(columns=["features.xy", "features.lng_lat"])
        .join(label_df["features.xy"].explode())
        .reset_index(drop=True)
    )

    lng_lat_normalized: pd.DataFrame = pd.json_normalize(
        label_df_lng_lat["features.lng_lat"]
    ).rename(
        columns={
            "wkt": "map_polygon",
            "properties.feature_type": "map_feature_type",
            "properties.subtype": "map_damage",
            "properties.uid": "building_id",
        }
    )

    features_normalized: pd.DataFrame = pd.json_normalize(
        label_df_features["features.xy"]
    ).rename(
        columns={
            "wkt": "image_polygon",
            "properties.feature_type": "image_feature_type",
            "properties.subtype": "image_damage",
            "properties.uid": "building_id",
        }
    )

    label_df_lng_lat_normalized = label_df_lng_lat.drop(
        columns=["features.lng_lat"]
    ).join(lng_lat_normalized)

    label_df_features_normalized = label_df_features.drop(columns=["features.xy"]).join(
        features_normalized
    )

    label_df_final: pd.DataFrame = label_df_lng_lat_normalized.merge(
        label_df_features_normalized[
            [
                "metadata.id",
                "image_polygon",
                "image_feature_type",
                "image_damage",
                "building_id",
            ]
        ],
        "left",
        ["metadata.id", "building_id"],
    )

    label_df_final = (
        label_df_final.rename(
            columns={
                c: c.replace("metadata.", "")
                for c in label_df_final.columns
                if c.startswith("metadata.")
            }
        )
        .drop(
            columns=[
                "map_feature_type",
                "map_damage",
            ]
        )
        .rename(
            columns={
                "image_feature_type": "feature_type",
                "image_damage": "damage",
            }
        )
    )

    label_df_final["dataset"] = CHALLENGE_TYPE
    label_df_final["capture_date"] = pd.to_datetime(label_df_final["capture_date"])

    label_df_final["image_id"] = (
        label_df_final["img_name"]
        .dropna()
        .apply(lambda cell: "_".join(cell.split("_")[0:2]))
    )
    label_df_final["is_pre_image"] = (
        label_df_final["img_name"].dropna().apply(lambda cell: "_pre_disaster" in cell)
    )
    label_df_final["is_post_image"] = (
        label_df_final["img_name"].dropna().apply(lambda cell: "_post_disaster" in cell)
    )

    label_df_final.to_parquet(f"{CHALLENGE_TYPE}.parquet")

    concat_list: list[pd.DataFrame] = [
        pd.read_parquet(pq_file)
        for pq_file in os.listdir()
        if pq_file.endswith(".parquet")
    ]

    df = pd.concat(concat_list).reset_index(drop=True)
    df.to_parquet(os.path.join(POST_PROCESSED, "inference_data.parquet"))

    df.to_csv(os.path.join(POST_PROCESSED, "inference_data.csv"), index=False)

In [10]:
json_df_to_csv(lbl_df)

### Step 2 : Begin Preprocess

In [11]:
def get_df_with_class_numeric_labels(df_name):
    # df_name['damage'].fillna('pre', inplace=True)
    df_name["damage_class"] = df_name["damage"]
    keys = list(df_name["damage_class"].value_counts().keys())
    df_name["damage_class"] = df_name["damage_class"].apply(keys.index)
    df_name["damage_class"].value_counts()
    return df_name

In [12]:
def get_metadata():
    infer_csv = pd.read_csv(os.path.join(POST_PROCESSED, "inference_data.csv"))
    data = infer_csv[infer_csv["image_polygon"].notna()]
    df_disaster = data[data["damage"] != "un-classified"]
    df_disaster["mask_file_names"] = (
        df_disaster["img_name"].str.replace(".png", "_")
        + df_disaster["building_id"]
        + ".png"
    )
    df_disaster_class_labels = get_df_with_class_numeric_labels(df_disaster)
    return df_disaster_class_labels

In [13]:
df = get_metadata()
print(df.shape)
df.head()

(216, 29)


Unnamed: 0,sensor,provider_asset_type,gsd,capture_date,off_nadir_angle,pan_resolution,sun_azimuth,sun_elevation,target_azimuth,disaster,...,building_id,image_polygon,feature_type,damage,dataset,image_id,is_pre_image,is_post_image,mask_file_names,damage_class
0,WORLDVIEW03_VNIR,WORLDVIEW03_VNIR,1.909097,2016-10-08 15:33:12.799000+00:00,38.06583,0.477265,144.004517,60.312649,279.450775,hurricane-matthew,...,f7348131-6c54-4c5a-a582-485850212e74,"POLYGON ((1.542371701700936 168.1241877196607,...",building,no-damage,test,hurricane-matthew_00000302,False,True,hurricane-matthew_00000302_post_disaster_f7348...,0
1,WORLDVIEW03_VNIR,WORLDVIEW03_VNIR,1.909097,2016-10-08 15:33:12.799000+00:00,38.06583,0.477265,144.004517,60.312649,279.450775,hurricane-matthew,...,fd75da06-838b-414d-87e0-5f66c1e12f3f,"POLYGON ((77.28999989817102 159.7900000240562,...",building,minor-damage,test,hurricane-matthew_00000302,False,True,hurricane-matthew_00000302_post_disaster_fd75d...,1
2,WORLDVIEW03_VNIR,WORLDVIEW03_VNIR,1.909097,2016-10-08 15:33:12.799000+00:00,38.06583,0.477265,144.004517,60.312649,279.450775,hurricane-matthew,...,48013051-1cda-4b82-b1b4-204edc0195e0,"POLYGON ((89.05825064169461 153.1524478280697,...",building,minor-damage,test,hurricane-matthew_00000302,False,True,hurricane-matthew_00000302_post_disaster_48013...,1
3,WORLDVIEW03_VNIR,WORLDVIEW03_VNIR,1.909097,2016-10-08 15:33:12.799000+00:00,38.06583,0.477265,144.004517,60.312649,279.450775,hurricane-matthew,...,9f718982-7f14-4ee8-8d80-dc18f7165649,"POLYGON ((57.70342892842147 125.7329949102509,...",building,no-damage,test,hurricane-matthew_00000302,False,True,hurricane-matthew_00000302_post_disaster_9f718...,0
4,WORLDVIEW03_VNIR,WORLDVIEW03_VNIR,1.909097,2016-10-08 15:33:12.799000+00:00,38.06583,0.477265,144.004517,60.312649,279.450775,hurricane-matthew,...,d9c66f30-f4b9-4d85-b396-0c59c434eac8,POLYGON ((0.1386328123365646 74.29142875935086...,building,no-damage,test,hurricane-matthew_00000302,False,True,hurricane-matthew_00000302_post_disaster_d9c66...,0


In [14]:
def polygons_mask(polygons, im_size: tuple = (1024, 1024)) -> np.ndarray:
    """Create a mask from polygons."""
    img_mask = np.zeros(im_size, np.uint8)

    if not polygons:
        return img_mask
    
    int_coords = lambda x: np.array(x).round().astype(np.int32)

    exteriors = [int_coords(poly.exterior.coords) for poly in polygons]
    interiors = [int_coords(pi.coords) for poly in polygons for pi in poly.interiors]

    cv2.fillPoly(img_mask, exteriors, 1)
    cv2.fillPoly(img_mask, interiors, 0)

    return img_mask


def create_image_mask_overall(
    root_dir: str, meta_df: pd.DataFrame, im_size: tuple = (1024, 1024)
):
    input_dir = os.path.join(root_dir, "upload")
    dest_dir = os.path.join(root_dir, str(date.today()))
    img_input = os.path.join(input_dir, "img")

    if os.path.exists(dest_dir):
        print("Removing the dir with name: ", dest_dir)
        os.system("rm -rf " + dest_dir)

    print("creating empty dir with name ", dest_dir)
    os.makedirs(dest_dir)

    img_overlay = os.path.join(dest_dir, "img_mask_overlay")
    if os.path.exists(img_overlay):
        print("Removing the dir with name: ", img_overlay)
        os.system("rm -rf " + img_overlay)

    print("creating empty dir with name ", img_overlay)
    os.makedirs(img_overlay)

    df = meta_df[meta_df["is_post_image"] == True]

    print("Starting : Mask overlay")
    for idx, file_name in tqdm(enumerate(df["mask_file_names"]), total=len(df)):
        image = cv2.imread(os.path.join(img_input, df.iloc[idx]["img_name"]))
        mask = np.zeros(image.shape[:2], dtype="uint8")
        _mask = polygons_mask([shapely.wkt.loads(df.iloc[idx]["image_polygon"])])
        masked = cv2.bitwise_and(image, image, mask=_mask)
        plt.imsave(os.path.join(img_overlay, file_name), masked)

    print("Ending : Mask overlay")

In [15]:
create_image_mask_overall(INFERENCE_ROOT, df)

Removing the dir with name:  /workspaces/alivio-app/data/xview_building_damage/inference/2024-04-15
creating empty dir with name  /workspaces/alivio-app/data/xview_building_damage/inference/2024-04-15
creating empty dir with name  /workspaces/alivio-app/data/xview_building_damage/inference/2024-04-15/img_mask_overlay
Starting : Mask overlay


 10%|█         | 22/216 [00:01<00:12, 15.52it/s]

100%|██████████| 216/216 [00:13<00:00, 15.64it/s]

Ending : Mask overlay





In [16]:
def get_bounds_tp(image_wkt: str) -> tuple[float]:
    bounds = wkt.loads(image_wkt).bounds
    return (bounds[0], bounds[1], bounds[2], bounds[3])  # type: ignore


def crop_save_masked_images(
    root_dir: str,
    meta_df: pd.DataFrame,
    crop_output_dir_name: str = "img_mask_overlay_crops",
):
    input_dir = os.path.join(root_dir, str(date.today()))
    img_crop_overlay = os.path.join(input_dir, crop_output_dir_name)

    if os.path.exists(img_crop_overlay):
        print("Removing the dir with name: ", img_crop_overlay)

    os.system("rm -rf " + img_crop_overlay)

    print("creating empty dir with name ", img_crop_overlay)
    os.makedirs(img_crop_overlay)

    print("Starting Cropping the images")

    for idx, file_name in tqdm(enumerate(meta_df["mask_file_names"]), total=len(df)):
        img = Image.open(os.path.join(input_dir, "img_mask_overlay", file_name))
        minx, miny, maxx, maxy = get_bounds_tp(meta_df.iloc[idx]["image_polygon"])
        cropped_img = img.crop((minx - 5, miny - 5, maxx + 5, maxy + 5))
        cropped_img.save(os.path.join(img_crop_overlay, file_name))

    print("Finished Cropping the images")

In [17]:
crop_save_masked_images(
    root_dir=INFERENCE_ROOT,
    meta_df=df,
    crop_output_dir_name="img_mask_overlay_crops",
)

creating empty dir with name  /workspaces/alivio-app/data/xview_building_damage/inference/2024-04-15/img_mask_overlay_crops
Starting Cropping the images


  4%|▎         | 8/216 [00:00<00:02, 73.83it/s]

100%|██████████| 216/216 [00:02<00:00, 73.40it/s]

Finished Cropping the images





In [18]:
def sort_masks_by_class(
    top_dir: str, meta_df: pd.DataFrame, cls_path="img_mask_ov_crop_class"
) -> None:
    input_dir = os.path.join(top_dir, str(date.today()))
    disas_post_mask = os.path.join(input_dir, "img_mask_overlay_crops")  # source

    print("Source root : ", disas_post_mask)
    disas_class_path = os.path.join(input_dir, "img_mask_ov_crop_class")

    if os.path.exists(disas_class_path):
        print("Removing the dir with name: ", disas_class_path)

    os.system("rm -rf " + disas_class_path)

    print("creating empty dir with name ", disas_class_path)
    os.makedirs(disas_class_path)

    print("Destination root : ", disas_class_path)

    df = meta_df[meta_df["is_post_image"] == True]

    print("Started moving the mask files to class folder ")

    for idx, file_name in tqdm(enumerate(df["mask_file_names"]), total=len(df)):
        source = os.path.join(disas_post_mask, df.iloc[idx]["mask_file_names"])
        destination = os.path.join(disas_class_path, df.iloc[idx]["damage"])
        if os.path.exists(destination):
            pass
        else:
            print("Creating dir for ", df.iloc[idx]["damage"])
            os.makedirs(destination)

        if os.path.exists(source):
            shutil.copy(source, destination)
    print("Finshed moving the mask files to class folder ")

In [19]:
sort_masks_by_class(INFERENCE_ROOT , df)

Source root :  /workspaces/alivio-app/data/xview_building_damage/inference/2024-04-15/img_mask_overlay_crops
creating empty dir with name  /workspaces/alivio-app/data/xview_building_damage/inference/2024-04-15/img_mask_ov_crop_class
Destination root :  /workspaces/alivio-app/data/xview_building_damage/inference/2024-04-15/img_mask_ov_crop_class
Started moving the mask files to class folder 


  0%|          | 0/216 [00:00<?, ?it/s]

Creating dir for  no-damage
Creating dir for  minor-damage
Creating dir for  major-damage
Creating dir for  destroyed


100%|██████████| 216/216 [00:00<00:00, 2776.63it/s]

Finshed moving the mask files to class folder 





In [20]:
def calculate_weight_decay(batch, train_data_len, nepoches, lambda_norm):
    return lambda_norm * math.sqrt((batch / (train_data_len * nepoches)))

### Test Loader

In [21]:
test_transform = transforms.Compose([
    # Resize the images to 64x64
    transforms.Resize(size=(224, 224)),    
    # Flip the images randomly on the horizontal
    transforms.RandomHorizontalFlip(p=0.5), 
    # Turn the image into a torch.Tensor
    transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0 
])

In [22]:
batch_size = 128
inference_all_dataset = datasets.ImageFolder(
    os.path.join(INFERENCE_ROOT, str(date.today()), "img_mask_ov_crop_class"),
    transform=test_transform,
)
inference_loader = DataLoader(
    inference_all_dataset, batch_size=batch_size
)

In [23]:
class_names = inference_all_dataset.classes
print("class_names ",class_names)

class_dict = inference_all_dataset.class_to_idx
print("class_dict ",class_dict)

class_names  ['destroyed', 'major-damage', 'minor-damage', 'no-damage']
class_dict  {'destroyed': 0, 'major-damage': 1, 'minor-damage': 2, 'no-damage': 3}


### Load the pickle file

In [24]:
path = "vit2024-04-11.pkl"
vit_model = joblib.load(os.path.join(path))

In [25]:
%%capture

print("=============================== Model Layers ==========================")
for layer_name, p in vit_model.named_parameters():
    print("Layer Name: {}, Frozen: {}".format(layer_name, not p.requires_grad))
    print()

In [26]:
def get_class_weights(labels):
    class_counts = np.bincount(labels)
    num_classes = len(class_counts)
    total_samples = len(labels)

    class_weights = []
    for count in class_counts:
        weight = 1 / (count / total_samples)
        class_weights.append(weight)

    return class_weights

In [27]:
def get_metrics(preds_list, target_list, num_classes=4) -> tuple:
    pred_ts = tensor(preds_list)
    target_ts = tensor(target_list)

    accuracy = multiclass_accuracy(pred_ts, target_ts, num_classes=4)

    f1_score = multiclass_f1_score(
        pred_ts, target_ts, num_classes=4, average="weighted"
    )

    precision = multiclass_precision(
        pred_ts, target_ts, num_classes=4, average="weighted"
    )
    recall = multiclass_recall(pred_ts, target_ts, num_classes=4, average="weighted")
    f1_score_class_wise = multiclass_f1_score(
        pred_ts, target_ts, num_classes=4, average=None
    )

    print("Accuracy :", accuracy)
    print("F1-score : ", f1_score)
    print("F1-score Classwise : ", f1_score_class_wise)
    print("Precision : ", precision)
    print("Recall : ", recall)
    return accuracy, f1_score, precision, recall

In [28]:
def accuracy_per_class(class_correct, class_total, class_names, accuracy):
    n_class = len(class_names)

    class_accuracy = class_correct / class_total

    print("Test Accuracy of Classes")
    print()

    for c in range(n_class):
        print(
            "{}\t: {}% \t ({}/{})".format(
                class_names[c],
                int(class_accuracy[c] * 100),
                int(class_correct[c]),
                int(class_total[c]),
            )
        )

    print()
    print(
        "Test Accuracy of Dataset: \t {}% \t ({}/{})".format(
            int(accuracy), int(np.sum(class_correct)), int(np.sum(class_total))
        )
    )

In [29]:
def model_eval(model, test_loader, criterion, class_names):
    model.eval()

    preds_list = []
    target_list = []
    output_list = []

    test_loss = 0.0
    # test_focal_loss = 0.0
    accuracy = 0

    # number of classes
    n_class = len(class_names)

    class_correct = np.zeros(n_class)
    class_total = np.zeros(n_class)

    # move model back to cpu
    model = model.to("cpu")

    # test model
    for images, targets in test_loader:

        # get outputs
        outputs = model(images)

        # calculate loss
        loss = criterion(outputs, targets)
        # fl = get_focal_loss(loss)

        # track loss
        test_loss += loss.item()
        # test_focal_loss += fl.item()

        # get predictions from probabilities
        preds = torch.argmax(F.softmax(outputs, dim=1), dim=1)

        target_list.extend(targets)
        output_list.extend(torch.argmax(F.softmax(outputs, dim=1), dim=1))
        preds_list.extend(preds)

        # get correct predictions
        correct_preds = (preds == targets).type(torch.FloatTensor)

        # calculate and accumulate accuracy
        accuracy += torch.mean(correct_preds).item() * 100

        # calculate test accuracy for each class
        for c in range(n_class):

            targets = targets.to("cpu")

            class_total[c] += (targets == c).sum()
            class_correct[c] += ((correct_preds) * (targets == c)).sum()

    # get average accuracy
    accuracy = accuracy / len(test_loader)

    # get average loss
    test_loss = test_loss / len(test_loader)

    # test_focal_loss = test_focal_loss/ len(test_loader)

    # output test loss statistics
    print("Test Loss: {:.6f}".format(test_loss))
    # print('Test Focal Loss: {:.6f}'.format(test_focal_loss))

    accuracy_per_class(class_correct, class_total, class_names, accuracy)

    metrics = get_metrics(preds_list, target_list)
    class_data = {class_names[c]: {"correct": class_correct[c], "total": class_total[c]} for c in range(n_class)}

    return metrics, class_data

In [30]:
criterion = nn.CrossEntropyLoss()

x, y = model_eval(vit_model, inference_loader, criterion, class_names)

Test Loss: 1.581329
Test Accuracy of Classes

destroyed	: 71% 	 (5/7)
major-damage	: 41% 	 (21/51)
minor-damage	: 33% 	 (21/63)
no-damage	: 16% 	 (16/95)

Test Accuracy of Dataset: 	 27% 	 (63/216)
Accuracy : tensor(0.2917)
F1-score :  tensor(0.3091)
F1-score Classwise :  tensor([0.1316, 0.3750, 0.3360, 0.2689])
Precision :  tensor(0.4756)
Recall :  tensor(0.2917)


In [33]:
import pandas as pd
import plotly.express as px

# Creating DataFrame using comprehensions
plot_df = pd.DataFrame(
    {
        "Class": [k for k in y.keys()],
        "Correct": [v["correct"] for v in y.values()],
        "Incorrect": [v["total"] - v["correct"] for v in y.values()],
    }
)

# Plotting the stacked bar chart
fig = px.bar(
    plot_df,
    x="Class",
    y=["Correct", "Incorrect"],
    labels={"value": "Number of Predictions", "variable": "Prediction Type"},
    color_discrete_map={"Correct": "green", "Incorrect": "red"},
    title="Correct vs Incorrect Predictions by Class",
    template='plotly_white'
)

fig.show()