# Data Visualization

In [None]:
import pandas as pd
from pathlib import Path

xbd_path = 'datasets/xbd'
subsets = ('/train_bldgs/', '/hold_bldgs/', '/test_bldgs/', '/tier3_bldgs/')
disaster_folders = os.listdir(xbd_path + subsets[0])

i_subset = 0
i_disaster = 5

print(list(Path(xbd_path + subsets[i_subset] + disaster_folders[i_disaster]).glob('*.csv*'))[0])
labels = pd.read_csv(list(Path(xbd_path + subsets[i_subset] + disaster_folders[i_disaster]).glob('*.csv*'))[0])
labels.columns = ['name', 'xcoords', 'ycoords', 'long', 'lat', 'class']
zone = lambda row: '_'.join(row['name'].split('_', 2)[:2])
labels['zone'] = labels.apply(zone, axis=1)
labels['zone'].value_counts()
#labels['zone'].value_counts()[labels['zone'].value_counts()==1].index.tolist()

In [None]:
from visualization import plot_on_image

plot_on_image(labels, subsets[i_subset], 'mexico-earthquake_00000192')

In [None]:
from visualization import plot_on_map

plot_on_map(labels, mapbox=False)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from visualization import CmapString

cmap = CmapString(palette='viridis', domain=labels['zone'].values)

plt.figure(figsize=(12,8))
for _, row in labels.iterrows():
    plt.scatter(row['xcoords'], row['ycoords'], label=row['zone'], color=cmap.color(row['zone']))
plt.axis('off')
plt.show()

---

# Graph Generation

In [None]:
import os
from pathlib import Path
import json
import pandas as pd

In [None]:
path = 'datasets/xbd/hold_bldgs/'
disaster_folders = os.listdir(path)
disaster = 'mexico-earthquake'

labels = pd.read_csv(list(Path(path + disaster).glob('*.csv*'))[0], index_col=0)
labels.drop(columns=['long','lat'], inplace=True)
zone = lambda row: '_'.join(row.name.split('_', 2)[:2])
labels['zone'] = labels.apply(zone, axis=1)

processed_files = []
zones = labels['zone'].value_counts()[labels['zone'].value_counts()>1].index.tolist()
for zone in zones:
     if not ((labels[labels['zone'] == zone]['class'] == 'un-classified').all() or \
            (labels[labels['zone'] == zone]['class'] != 'un-classified').sum() == 1):
        processed_files.append(f'{zone}.pt')

len(processed_files)

---

# Training

In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch_geometric.data import GraphSAINTNodeSampler
from tqdm import tqdm
from dataset import xBD
from model import DeeperGCN
from metrics import xview2_f1_score

with open('exp_settings.json', 'r') as JSON:
    settings_dict = json.load(JSON)

seed = settings_dict['seed']
batch_size = settings_dict['data']['batch_size']
num_steps = settings_dict['data']['saint_num_steps']
name = settings_dict['model']['name']

train_set = settings_dict['train_set']
if len(train_set) == 1:
    if train_set[0] == 'mexico-earthquake':
        train_root = settings_dict['data']['mexico_train_root']
        test_root = settings_dict['data']['mexico_test_root']
    else:
        train_root = settings_dict['data']['palu_train_root']
        test_root = settings_dict['data']['palu_test_root']
else:
    train_root = settings_dict['data']['palu_matthew_rosa_train_root']
    test_root = settings_dict['data']['palu_matthew_rosa_test_root']
hold_root = settings_dict['data']['mexico_hold_root']

hidden_units = settings_dict['model']['hidden_units']
num_layers = settings_dict['model']['num_layers']
dropout_rate = settings_dict['model']['dropout_rate']
lr = settings_dict['model']['lr']
n_epochs = settings_dict['epochs']
starting_epoch = settings_dict['starting_epoch']
path = settings_dict['model']['path']
save_best_only = settings_dict['save_best_only']

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
#train_dataset = xBD(train_root, 'train', train_set)
#test_dataset = xBD(train_root, 'test', train_set)
hold_dataset = xBD(hold_root, 'hold', ['mexico-earthquake'])

In [4]:
model = DeeperGCN(hold_dataset.num_node_features,
                  hold_dataset.num_edge_features,
                  hidden_units,
                  hold_dataset.num_classes,
                  num_layers,
                  dropout_rate)
model_path = path + '/' + name + '_best.pt'
model.load_state_dict(torch.load(model_path))
model = model.to(device)

In [14]:
data = hold_dataset[1]
sampler = GraphSAINTNodeSampler(data, batch_size=batch_size, num_steps=num_steps, num_workers=2)

In [15]:
data

Data(edge_attr=[243951, 2], edge_index=[2, 243951], x=[699, 131072], y=[699, 4])

In [None]:
for subdata in sampler:
    subdata = subdata.to(device)
    out = model(subdata.x, subdata.edge_index, subdata.edge_attr)
    loss = F.binary_cross_entropy(input=out, target=subdata.y.float(), weight=torch.Tensor(class_weights))
    print(loss)
    break

In [16]:
with torch.no_grad():
    model.eval()
    ys = []
    outs = []
    for subdata in sampler:
        subdata = subdata.to(device)
        outs.append(model(subdata.x, subdata.edge_index, subdata.edge_attr).cpu())
        ys.append(subdata.y.cpu())
    outs = torch.cat(outs)
    ys = torch.cat(ys)
    f1 = xview2_f1_score(ys, outs)

In [27]:
from metrics import parse_ordinal_output
unique, counts = torch.unique(parse_ordinal_output(ys), return_counts=True)
print(unique)
print(counts)

tensor([0, 2])
tensor([1012,    1])


In [29]:
unique, counts = torch.unique(outs.argmax(dim=1), return_counts=True)
print(unique)
print(counts)

tensor([0])
tensor([1013])


In [19]:
f1

1.999997999015862e-06

In [31]:
from sklearn.metrics import f1_score
y_true = outs.argmax(dim=1)
y_pred = parse_ordinal_output(ys)
f1_classes = f1_score(y_true, y_pred, average=None)
print(f1_classes)

[0.99950617 0.        ]


In [33]:
epsilon = 1e-6
len(f1_classes) / sum((f1+epsilon)**-1 for f1 in f1_classes)

1.999997999015862e-06

In [36]:
f1_score(y_true, y_pred, average='macro')

0.4997530864197531

---

In [None]:
import torch
from metrics import to_onehot

y = [[1,0,0,0],[1,1,0,0],[1,1,1,1]]
y = torch.Tensor(y)

to_onehot(y)