# Data Visualization

In [None]:
import pandas as pd
from visualization import plot_on_map

In [None]:
labels = pd.read_csv("datasets/xbd/tier3_bldgs/joplin-tornado/joplin-tornado_tier3_labels.csv", index_col=0)
plot_on_map(labels, color='zone')

In [None]:
labels = pd.read_csv("datasets/xbd/tier3_bldgs/lower-puna-volcano/lower-puna-volcano_tier3_labels.csv", index_col=0)
plot_on_map(labels)

## Polygon Visualization

In [None]:
from visualization import plot_image

plot_image("datasets/xbd/tier3/labels/joplin-tornado_00000002_post_disaster.json")

## Graph Visualization

In [None]:
from visualization import plot_graph

plot_graph('datasets/joplin-tornado_00000002.pt', 'datasets/joplin-tornado_00000002_post_disaster.png')

## Beirut Graph Visualization

In [None]:
from dataset import BeirutFullGraph
import networkx as nx
from torch_geometric.utils import to_networkx
import matplotlib.pyplot as plt
from matplotlib.patches import Circle

dataset = BeirutFullGraph('datasets/beirut_bldgs/beirut_graph_meta', 'datasets/beirut_bldgs', 1366, meta_features=True)
data = dataset[0]
datax = to_networkx(data)
pos = dict(enumerate(data.pos.numpy()))
color_dict = {
    0: (0, 1, 0),
    1: (0, 0, 1),
    2: (1, 0.27, 0),
    3: (1, 0, 0)
}
colors = [color_dict[y] for y in data.y.numpy()]
fig = plt.figure()
fig.set_size_inches(35, 15)
nx.draw_networkx(datax, pos=pos, arrows=False, with_labels=False, node_size=100, node_color=colors)
custom_circles = [Circle((0,0), radius=0.2, color=(0, 1, 0)), Circle((0,0), radius=0.2, color=(0, 0, 1)),
                  Circle((0,0), radius=0.2, color=(1, 0.27, 0)), Circle((0,0), radius=0.2, color=(1, 0, 0))]
plt.legend(custom_circles, ['minor-damage', 'moderate-damage', 'major-damage', 'severe-damage'], prop={'size':15})
plt.axis('off')
#plt.savefig('beirut_graph.png', dpi=100)
plt.show()

---

## Clustering Confusion Matrix

In [None]:
import numpy as np
from train_autoencoder import _make_cost_m
from scipy.optimize import linear_sum_assignment

cm = np.array([[46,26,475,26,11],[5,445,31,108,9],[4,506,32,54,1],[164,31,43,158,197],[408,12,6,15,123]])
cm

In [None]:
indexes = linear_sum_assignment(_make_cost_m(cm))
indexes

In [None]:
cm2 = cm[:, indexes[1]]
cm2

In [None]:
#FP
cm2.sum(axis=0) - np.diag(cm2)

In [None]:
#FN
cm2.sum(axis=1) - np.diag(cm2)

---

# Beirut Meta Features

In [None]:
import numpy as np
import pandas as pd

cols = ['OID_', 'damage', 'Type', 'MIN_BLDG_N', 'MAX_BLDG_N', 'SUM_N_KWH', 'Shape_Leng', 'ORIG_FID', 'BUFF_DIST', 'ORIG_FID_1', 'Shape_Length', 'Shape_Area', 'Floors_fin', 'NbreEtages', 'Era_all', 'era_usj', 'Era_fin', 'era_usj_1']
df = pd.read_csv('datasets/beirut_bldgs/buffered_masks.csv').drop(columns=cols)
df['built_year_final'] = df.apply(lambda row: row['built_year'] if row['built_year'] else row['Annee'] , axis = 1)
df['Floors_final'] = df.apply(lambda row: row['Floors'] if row['Floors'] else row['Estim_Etag'] , axis = 1)
df.drop(columns=['built_year', 'Annee', 'Estim_Etag', 'Floors'], inplace=True)
df.replace(r'\s+', np.nan, regex=True, inplace=True)
df['Const_Year'].fillna(0, inplace=True)
df['Fonction'].fillna('Autre', inplace=True)
#df = pd.get_dummies(df, drop_first=True)
#num_cols = ['NbreAppts', 'MEAN_DSM_O', 'MEAN_Blg_H', 'Area', 'perimeter', 'era_final', 'built_year_final']
#df[num_cols] = df[num_cols]/df[num_cols].max()
df.head(10)

In [None]:
stats = df.drop(columns=['heritage','Fonction', 'Longitude', 'Latitude']).replace(0, np.nan).describe()
stats.drop(index=['25%','50%','75%'], inplace=True)
stats = stats.append(1366-stats.loc['count',:])
stats.index = ['count', 'mean', 'std', 'min', 'max', 'number of nulls']
stats.reindex(['count', 'number of nulls', 'mean', 'std', 'min', 'max'])

In [None]:
import matplotlib.pyplot as plt
f, (ax1, ax2) = plt.subplots(1, 2)
df['heritage'].value_counts().plot(kind='bar', ylabel='Heritage', ax=ax1)
df['Fonction'].replace({'Sante': 'Healthcare', 'Industrie': 'Industry', 'Religieux': 'Religion', 'Mixte': 'Mixed', 'Residentiel': 'Residential', 'Autre': 'Other'}).value_counts().plot(kind='bar', ylabel='Building Function', ax=ax2);
plt.tight_layout()

In [None]:
from dataset import BeirutFullGraph

dataset = BeirutFullGraph('datasets/beirut_bldgs/beirut_graph_meta', 'datasets/beirut_bldgs', 1366, True)
data = dataset[0]

In [None]:
import pandas as pd

df = pd.read_csv('datasets/beirut_bldgs/beirut_graph_meta/processed/beirut_metadata.csv', index_col=0)
df.head()

---

# Shannon Equitability

In [None]:
import numpy as np
from utils import shannon_equitability

print(f'Joplin original: {shannon_equitability(np.array([8225, 2192, 1005, 3274])):.3f}')
print(f'Joplin reduced: {shannon_equitability(np.array([649, 363, 243, 111])):.3f}')
print()
print(f'Pinery original: {shannon_equitability(np.array([5027, 82, 99, 229])):.3f}')
print(f'Pinery reduced: {shannon_equitability(np.array([1100, 149, 64, 53])):.3f}')
print()
print(f'Nepal original: {shannon_equitability(np.array([31225, 5134, 4721, 502])):.3f}')
print(f'Nepal reduced: {shannon_equitability(np.array([986, 189, 173, 18])):.3f}')


---
# Results Visualization

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(5) #['Pinery', 'Nepal', 'Joplin']
y1 = [0.7145-0.4773, 0.4517-0.3439, 0.4941-0.3090, 0.7743-0.6651, 0.4646-0.3044] #pinery
y2 = [0.7540-0.6179, 0.6987-0.5382, 0.6992-0.4967, 0.8778-0.7837, 0.6914-0.4967] #joplin
y3 = [0.7086-0.5930, 0.5589-0.4056, 0.5199-0.4121, 0.7613-0.7059, 0.5328-0.4082]

plt.bar(x-0.2, y1, 0.2, color='#f28e2b')
plt.bar(x, y2, 0.2, color='#4e79a7')
plt.bar(x+0.2, y3, 0.2, color='#e15759')
plt.xticks(x, ['Accuracy', 'Precision', 'Recall', 'Specificity', 'F1'])
plt.ylabel("Hold Score Differences")
plt.legend(["Pinery", "Joplin", "Nepal"])
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(4)
acc = [0.911-0.887, 0.5092-0.6075, 0.7042-0.8542, 0.7094-0.8552]
macro = [0.5642-0.5315, 0.3421-0.3275, 0.2824-0.3072, 0.2873-0.3131]
weight = [0.9205-0.8902, 0.5487-0.6727, 0.587-0.7873, 0.5951-0.7899]
auc = [0.9347-0.8374, 0.5314-0.4999, 0.5199-0.6445, 0.5394-0.6665]

plt.bar(x-0.4, acc, 0.2, color='#f28e2b')
plt.bar(x-0.2, macro, 0.2, color='#4e79a7')
plt.bar(x, weight, 0.2, color='#e15759')
plt.bar(x+0.2, auc, 0.2, color='#59a14f')
plt.xticks(x, ['1', '2', '3', '4'])
#plt.ylabel("Differences between Train and Hold Scores")
plt.legend(["Accuracy", "Macro F1", "Weighted F1", "AUC"])
plt.axhline(color='black')
plt.title('Graph SAGE')
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(4)
acc = [0.8746-0.8531, 0.6921-0.4356, 0.9283-0.6082, 0.9666-0.6668]
macro = [0.5994-0.5449, 0.6342-0.315, 0.9008-0.3913, 0.9522-0.4308]
weight = [0.9036-0.8787, 0.7275-0.5708, 0.9306-0.6966, 0.9671-0.7405]
auc = [0.9648-0.8609, 0.8764-0.6601, 0.9877-0.6597, 0.9963-0.6853]

plt.bar(x-0.4, acc, 0.2, color='#f28e2b')
plt.bar(x-0.2, macro, 0.2, color='#4e79a7')
plt.bar(x, weight, 0.2, color='#e15759')
plt.bar(x+0.2, auc, 0.2, color='#59a14f')
plt.xticks(x, ['1', '2', '3', '4'])
plt.ylabel("Differences between Train and Hold Scores")
#plt.legend(["Accuracy", "Macro F1", "Weighted F1", "AUC"])
plt.axhline(color='black')
plt.title('Siamese CNN')
plt.tight_layout()
plt.show()