In [116]:
import os
import json
import pandas as pd
from typing import Literal
from datetime import date
from torch.utils.data import DataLoader
import torch.nn as nn
import torch

import cv2
import argparse
import numpy as np
import shutil

from torch import tensor

from tqdm import tqdm
from os import path, walk, makedirs
from skimage.io import imread
from shapely import wkt
from shapely.geometry import mapping, MultiPolygon, Polygon
from cv2 import fillPoly, imwrite
from PIL import Image
from shapely.geometry import MultiPolygon, Polygon
import shapely.wkt
import matplotlib.pyplot as plt
import joblib
from torchvision import datasets, models, transforms, utils
from torcheval.metrics.functional import multiclass_precision, multiclass_f1_score, multiclass_recall, multiclass_accuracy


In [3]:
os.chdir('../..')

In [10]:
ROOT_DIR = os.getcwd()
INFERENCE_ROOT = os.path.join(ROOT_DIR, 'data','xview_building_damage','inference')
UPLOADS_IMG = os.path.join(INFERENCE_ROOT, 'upload', 'img')
UPLOADS_JSON = os.path.join(INFERENCE_ROOT, 'upload', 'json')
POST_PROCESSED =os.path.join(INFERENCE_ROOT, 'postprocesssed')
PRE_PROCESSED =os.path.join(INFERENCE_ROOT, 'preprocesssed')

In [16]:
uploaded_imgs = (
    [f"{UPLOADS_IMG}/{f}" for f in os.listdir(UPLOADS_IMG)]
)
uploaded_jsons = (
    [f"{UPLOADS_JSON}/{f}" for f in os.listdir(UPLOADS_JSON)]
)


### Convert the Uploaded JSON to CSV

In [13]:
label_json_data: list[dict] = []

def read_and_store_label_json(label_json_path: str):
    """A thread-safe function that reads a json as a dictionary and writes to a global list"""
    with open(label_json_path) as f:
        label_json_data.append(json.load(f))

In [22]:
read_and_store_label_json(uploaded_jsons[0])

In [21]:
label_json_series: pd.Series = pd.Series(label_json_data)
label_df_original: pd.DataFrame = pd.json_normalize(label_json_series)


In [37]:

lbl_df: pd.DataFrame = label_df_original.copy()
CHALLENGE_TYPE: Literal["train", "test", "hold"] = "test"

def json_df_to_csv(label_df):
    
    label_df_lng_lat: pd.DataFrame = (
        label_df.drop(columns=["features.xy", "features.lng_lat"])
        .join(label_df["features.lng_lat"].explode())
        .reset_index(drop=True)
    )
    
    label_df_features: pd.DataFrame = (
        label_df.drop(columns=["features.xy", "features.lng_lat"])
        .join(label_df["features.xy"].explode())
        .reset_index(drop=True)
    )
    
    lng_lat_normalized: pd.DataFrame = pd.json_normalize(label_df_lng_lat["features.lng_lat"]).rename(
        columns={
            "wkt": "map_polygon",
            "properties.feature_type": "map_feature_type",
            "properties.subtype": "map_damage",
            "properties.uid": "building_id",
        }
    )
    
    features_normalized: pd.DataFrame = pd.json_normalize(
        label_df_features["features.xy"]
    ).rename(
        columns={
            "wkt": "image_polygon",
            "properties.feature_type": "image_feature_type",
            "properties.subtype": "image_damage",
            "properties.uid": "building_id",
        }
    )
    
    label_df_lng_lat_normalized = label_df_lng_lat.drop(columns=["features.lng_lat"]).join(
        lng_lat_normalized
    )
    
    label_df_features_normalized = label_df_features.drop(columns=["features.xy"]).join(
        features_normalized
    )
    
    label_df_final: pd.DataFrame = label_df_lng_lat_normalized.merge(
        label_df_features_normalized[
            [
                "metadata.id",
                "image_polygon",
                "image_feature_type",
                "image_damage",
                "building_id",
            ]
        ],
        "left",
        ["metadata.id", "building_id"],
    )
    
    label_df_final = (
        label_df_final.rename(
            columns={
                c: c.replace("metadata.", "")
                for c in label_df_final.columns
                if c.startswith("metadata.")
            }
        )
        .drop(
            columns=[
                "map_feature_type",
                "map_damage",
            ]
        )
        .rename(
            columns={
                "image_feature_type": "feature_type",
                "image_damage": "damage",
            }
        )
    )
    
    label_df_final["dataset"] = CHALLENGE_TYPE
    label_df_final["capture_date"] = pd.to_datetime(label_df_final["capture_date"])
    
    label_df_final["image_id"] = label_df_final["img_name"].dropna().apply(lambda cell: "_".join(cell.split("_")[0:2]))
    label_df_final["is_pre_image"] = label_df_final["img_name"].dropna().apply(lambda cell: "_pre_disaster" in cell)
    label_df_final["is_post_image"] = (
        label_df_final["img_name"].dropna().apply(lambda cell: "_post_disaster" in cell)
    )
    
    label_df_final.to_parquet(f"{CHALLENGE_TYPE}.parquet")
    
    concat_list: list[pd.DataFrame] = [
        pd.read_parquet(pq_file) for pq_file in os.listdir() if pq_file.endswith(".parquet")
    ]
    
    df = pd.concat(concat_list).reset_index(drop=True)
    df.to_parquet(os.path.join(POST_PROCESSED, 'inference_data.parquet'))
    
    df.to_csv(
       os.path.join(POST_PROCESSED, 'inference_data.csv'), index=False
    )

In [38]:
json_df_to_csv(lbl_df)

### Step 2 : Begin Preprocess

In [42]:
def get_df_with_class_numeric_labels(df_name):
    # df_name['damage'].fillna('pre', inplace=True)
    df_name['damage_class']=df_name['damage']
    keys=list(df_name['damage_class'].value_counts().keys())
    df_name['damage_class']=df_name['damage_class'].apply(keys.index)
    df_name['damage_class'].value_counts()
    return df_name

In [47]:
def get_metadata():
    infer_csv = pd.read_csv(os.path.join(POST_PROCESSED,'inference_data.csv'))
    data = infer_csv[infer_csv['image_polygon'].notna()]
    df_disaster = data[data['damage'] != 'un-classified']
    df_disaster['mask_file_names'] = df_disaster['img_name'].str.replace('.png', '_')+df_disaster['building_id']+'.png'
    df_disaster_class_labels = get_df_with_class_numeric_labels(df_disaster)
    return df_disaster_class_labels

In [48]:
df=get_metadata()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_disaster['mask_file_names'] = df_disaster['img_name'].str.replace('.png', '_')+df_disaster['building_id']+'.png'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_name['damage_class']=df_name['damage']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_name['damage_class']=df_name['damage_class']

In [61]:
def polygons_mask(polygons):
    img_mask = np.zeros(im_size, np.uint8)
    if not polygons:
        return img_mask
    int_coords = lambda x: np.array(x).round().astype(np.int32)
    exteriors = [int_coords(poly.exterior.coords) for poly in polygons]
    interiors = [int_coords(pi.coords) for poly in polygons
                 for pi in poly.interiors]
    cv2.fillPoly(img_mask, exteriors, 1)
    cv2.fillPoly(img_mask, interiors, 0)
    return img_mask

def create_image_mask_overall(root_dir, im_size, meta_df):
    input_dir =  os.path.join(root_dir, 'upload') 
    dest_dir = os.path.join(root_dir, str(date.today()))
    img_input = os.path.join(input_dir, 'img')
    
    if os.path.exists(dest_dir):
      print("Removing the dir with name: ", dest_dir) 
      os.system("rm -rf "+dest_dir)
        
    print("creating empty dir with name " , dest_dir)
    os.makedirs(dest_dir)
    
    img_overlay = os.path.join(dest_dir, 'img_mask_overlay')
    if os.path.exists(img_overlay):
        print("Removing the dir with name: ", img_overlay) 
        os.system("rm -rf "+img_overlay)
    
    print("creating empty dir with name " , img_overlay)
    os.makedirs(img_overlay)
   
   
   #output_dir =os.path.join(root_dir, 'challenge', dataSplit, 'disaster','hurricanes-all', 'img_mask_overlay', hurricane_name )

    df = meta_df[meta_df['is_post_image'] == True]
    
    print("Starting : Mask overlay")
    for idx, file_name in enumerate(df['mask_file_names']):
       image = cv2.imread(os.path.join(img_input, df.iloc[idx]['img_name']))
       mask = np.zeros(image.shape[:2], dtype="uint8")
       _mask = polygons_mask([shapely.wkt.loads(df.iloc[idx]['image_polygon'])])
       masked = cv2.bitwise_and(image, image, mask=_mask)
       plt.imsave(os.path.join(img_overlay, file_name), masked)
    print("Ending : Mask overlay")

In [64]:
def get_bounds_tp(image_wkt):
    bounds = wkt.loads(image_wkt).bounds
    return (bounds[0], bounds[1], bounds[2], bounds[3]) ## 

def crop_save_masked_images(root_dir, meta_df, crop_output_dir_name = 'img_mask_overlay_crops'):
    input_dir =  os.path.join(root_dir, str(date.today())) 
    img_crop_overlay = os.path.join(input_dir, crop_output_dir_name)
    if os.path.exists(img_crop_overlay):
        print("Removing the dir with name: ", img_crop_overlay) 
    os.system("rm -rf "+img_crop_overlay)
    
    print("creating empty dir with name " , img_crop_overlay)
    os.makedirs(img_crop_overlay)
    
    print("Starting Cropping the images")
    for idx, file_name in enumerate(meta_df['mask_file_names']):
        img = Image.open(os.path.join(input_dir,'img_mask_overlay', file_name))
        minx, miny, maxx, maxy = get_bounds_tp(meta_df.iloc[idx]['image_polygon'])
        cropped_img=img.crop((minx-5, miny-5, maxx+5, maxy+5))
        cropped_img.save(os.path.join(img_crop_overlay, file_name))
    print("Finished Cropping the images")

In [77]:
def sort_masks_by_class(top_dir, meta_df, cls_path='img_mask_ov_crop_class'):
    input_dir =  os.path.join(top_dir, str(date.today())) 
    disas_post_mask= os.path.join(input_dir, 'img_mask_overlay_crops') #source
    print("Source root : ", disas_post_mask)
    disas_class_path=os.path.join(input_dir, 'img_mask_ov_crop_class' )
    if os.path.exists(disas_class_path):
        print("Removing the dir with name: ", disas_class_path) 
    os.system("rm -rf "+disas_class_path)
    
    print("creating empty dir with name " , disas_class_path)
    os.makedirs(disas_class_path)
    
    print("Destination root : ", disas_class_path)
    
    df = meta_df[meta_df['is_post_image'] == True]
    
    print("Started moving the mask files to class folder ")
    for idx, file_name in enumerate(df['mask_file_names']):
        source = os.path.join(disas_post_mask, df.iloc[idx]['mask_file_names'])
        destination = os.path.join(disas_class_path, df.iloc[idx]['damage'])
        if os.path.exists(destination):
            pass
        else:
            print( "Creating dir for " , df.iloc[idx]['damage'])
            os.makedirs(destination)
        
        if os.path.exists(source):
            shutil.copy(source, destination)
    print("Finshed moving the mask files to class folder ")

In [63]:
im_size =(1024, 1024)
create_image_mask_overall(INFERENCE_ROOT, im_size, df)


Removing the dir with name:  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/data/xview_building_damage/inference/2024-04-02
creating empty dir with name  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/data/xview_building_damage/inference/2024-04-02
creating empty dir with name  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/data/xview_building_damage/inference/2024-04-02/img_mask_overlay
Starting : Mask overlay
Ending : Mask overlay


In [72]:
crop_save_masked_images(INFERENCE_ROOT, df, crop_output_dir_name = 'img_mask_overlay_crops')

Removing the dir with name:  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/data/xview_building_damage/inference/2024-04-02/img_mask_overlay_crops
creating empty dir with name  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/data/xview_building_damage/inference/2024-04-02/img_mask_overlay_crops
Starting Cropping the images
Finished Cropping the images


In [78]:
sort_masks_by_class(INFERENCE_ROOT , df)

Source root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/data/xview_building_damage/inference/2024-04-02/img_mask_overlay_crops
creating empty dir with name  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/data/xview_building_damage/inference/2024-04-02/img_mask_ov_crop_class
Destination root :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/data/xview_building_damage/inference/2024-04-02/img_mask_ov_crop_class
Started moving the mask files to class folder 
Creating dir for  no-damage
Creating dir for  minor-damage
Creating dir for  major-damage
Creating dir for  destroyed
Finshed moving the mask files to class folder 


In [79]:
def calculate_weight_decay(batch, train_data_len, nepoches, lambda_norm):
    return lambda_norm * math.sqrt((batch/(train_data_len * nepoches)))

### Test Loader

In [92]:
test_transform = transforms.Compose([
    # Resize the images to 64x64
    transforms.Resize(size=(224, 224)),    
    # Flip the images randomly on the horizontal
    transforms.RandomHorizontalFlip(p=0.5), 
    # Turn the image into a torch.Tensor
    transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0 
])

In [100]:
batch_size = 128
inference_all_dataset = datasets.ImageFolder(os.path.join( INFERENCE_ROOT,'2024-04-02', 'img_mask_ov_crop_class'), transform=test_transform)
inference_loader = DataLoader(inference_all_dataset, batch_size=batch_size, num_workers= 2)

In [107]:
class_names = inference_all_dataset.classes
print("class_names ",class_names)

class_dict = inference_all_dataset.class_to_idx
print("class_dict ",class_dict)

class_names  ['destroyed', 'major-damage', 'minor-damage', 'no-damage']
class_dict  {'destroyed': 0, 'major-damage': 1, 'minor-damage': 2, 'no-damage': 3}


### Load the pickle file

In [85]:
path = "/Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/train_models"
vit_model=joblib.load(os.path.join("/Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/cleaned_repo/alivio/train_models", "vit2024-04-04-f155.pkl"))
print("=============================== Model Layers ==========================")
for layer_name, p in vit_model.named_parameters():
    print('Layer Name: {}, Frozen: {}'.format(layer_name, not p.requires_grad))
    print()

Layer Name: class_token, Frozen: True

Layer Name: conv_proj.weight, Frozen: True

Layer Name: conv_proj.bias, Frozen: True

Layer Name: encoder.pos_embedding, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.ln_1.weight, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.ln_1.bias, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.self_attention.in_proj_weight, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.self_attention.in_proj_bias, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.self_attention.out_proj.weight, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.self_attention.out_proj.bias, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.ln_2.weight, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.ln_2.bias, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.mlp.0.weight, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.mlp.0.bias, Frozen: True

Layer Name: encoder.layers.encoder_layer_0.mlp.3.weight, Fro

In [94]:
def get_class_weights(labels):
    class_counts = np.bincount(labels)
    num_classes = len(class_counts)
    total_samples = len(labels)
    
    class_weights = []
    for count in class_counts:
        weight = 1 / (count / total_samples)
        class_weights.append(weight)
    
    return class_weights

In [95]:
def get_metrics(preds_list, target_list, num_classes = 4 ):
    pred_ts=tensor(preds_list)
    target_ts = tensor(target_list)
    
    accuracy = multiclass_accuracy(pred_ts, target_ts, num_classes=4)
    
    f1_score = multiclass_f1_score(pred_ts, target_ts, num_classes=4, average="weighted")
    
    precision = multiclass_precision(pred_ts, target_ts, num_classes=4, average="weighted")
    recall = multiclass_recall(pred_ts, target_ts, num_classes=4, average="weighted")
    
    print("Accuracy :", accuracy)
    print("F1-score : ", f1_score)
    print("Precision : ", precision)
    print("Recall : ", recall)
    return accuracy, f1_score, precision, recall

In [96]:
def accuracy_per_class(class_correct, class_total, class_names, accuracy ):
    n_class = len(class_names)

    class_accuracy = class_correct / class_total

    print('Test Accuracy of Classes')
    print()
    
    for c in range(n_class):
        print('{}\t: {}% \t ({}/{})'.format(class_names[c],
                                    int(class_accuracy[c] * 100), int(class_correct[c]), int(class_total[c])) )
    
    print()
    print('Test Accuracy of Dataset: \t {}% \t ({}/{})'.format(int(accuracy),
                                                               int(np.sum(class_correct)), int(np.sum(class_total)) ))
    

In [118]:
def model_eval(model, test_loader, criterion, class_names ):
    model.eval()
    
    preds_list = []
    target_list = []
    output_list = []
    
    test_loss = 0.0
    # test_focal_loss = 0.0
    accuracy = 0
    
    # number of classes
    n_class = len(class_names)
    
    class_correct = np.zeros(n_class)
    class_total = np.zeros(n_class)
    
    # move model back to cpu
    model = model.to('cpu')
    
    # test model
    for images, targets in test_loader:
    
        # get outputs
        outputs = model(images)
    
        # calculate loss
        loss = criterion(outputs, targets)
        # fl = get_focal_loss(loss)
    
        # track loss
        test_loss += loss.item()
        #test_focal_loss += fl.item()
        
        # get predictions from probabilities
        preds = torch.argmax(F.softmax(outputs, dim=1), dim=1)
    
        target_list.extend(targets)
        output_list.extend(torch.argmax(F.softmax(outputs, dim=1), dim=1 ))
        preds_list.extend(preds)
    
        # get correct predictions
        correct_preds = (preds == targets).type(torch.FloatTensor)
    
        # calculate and accumulate accuracy
        accuracy += torch.mean(correct_preds).item() * 100
    
        # calculate test accuracy for each class
        for c in range(n_class):
    
            targets = targets.to('cpu')
    
            class_total[c] += (targets == c).sum()
            class_correct[c] += ((correct_preds) * (targets == c)).sum()
    
    # get average accuracy
    accuracy = accuracy / len(test_loader)
    
    # get average loss
    test_loss = test_loss / len(test_loader)

    # test_focal_loss = test_focal_loss/ len(test_loader)
    
    # output test loss statistics
    print('Test Loss: {:.6f}'.format(test_loss))
    # print('Test Focal Loss: {:.6f}'.format(test_focal_loss))
    
    accuracy_per_class(class_correct, class_total, class_names, accuracy)
    
    get_metrics(preds_list, target_list)
    today = date.today()
    
    final_pred_list = preds_list
    final_target_list = target_list

    
    return preds_list, target_list

In [119]:
criterion = nn.CrossEntropyLoss()

model_eval(vit_model,inference_loader, criterion , class_names)

Test Loss: 1.504613
Test Accuracy of Classes

destroyed	: 85% 	 (6/7)
major-damage	: 21% 	 (11/51)
minor-damage	: 44% 	 (28/63)
no-damage	: 26% 	 (25/95)

Test Accuracy of Dataset: 	 31% 	 (70/216)
Accuracy : tensor(0.3241)
F1-score :  tensor(0.3501)
Precision :  tensor(0.4904)
Recall :  tensor(0.3241)


([tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(2),
  tensor(1),
  tensor(1),
  tensor(3),
  tensor(1),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(2),
  tensor(1),
  tensor(2),
  tensor(3),
  tensor(3),
  tensor(2),
  tensor(0),
  tensor(2),
  tensor(0),
  tensor(2),
  tensor(2),
  tensor(2),
  tensor(0),
  tensor(2),
  tensor(2),
  tensor(2),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(2),
  tensor(0),
  tensor(0),
  tensor(2),
  tensor(1),
  tensor(2),
  tensor(0),
  tensor(1),
  tensor(1),
  tensor(0),
  tensor(0),
  tensor(1),
  tensor(1),
  tensor(0),
  tensor(1),
  tensor(2),
  tensor(1),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(2),
  tensor(3),
  tensor(0),
  tensor(2),
  tensor(0),
  tensor(2),
  tensor(2),
  tensor(0),
  tensor(2),
  tensor(2),
  tensor(2),
  tensor(2),
  tensor(3),
  tensor(0),
  tensor(0),
  tensor(2),
  tensor(0),
  tensor(1),
  tensor(2),
  tensor(2),
  tensor(3),
  tensor(1),
  tensor(3),
  tensor(0),