In [None]:
import pandas as pd
import os
import glob

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import numpy as np

from matplotlib import pyplot as plt
import rasterio

In [None]:
import pprint

In [None]:
pprint.pprint(os.listdir('../logs/experiments/runs/'))

In [None]:
# TODO
# sentinel2_rgb_vit
# sentinel2_ndvi_vit
# topview_swin
#
# --> majority vote

In [None]:
expname = 'sentinel2_ndvi_vit'

In [None]:
expdir = f'../logs/experiments/runs/{expname}/'
os.listdir(expdir)

In [None]:
all_logdirs = [os.path.join(expdir, dd) for dd in os.listdir(expdir)]

In [None]:
logdirs = []

for fold in range(5):
    for logdir in all_logdirs:
        if os.path.exists(os.path.join(logdir, f'test_predictions_fold_{fold}.csv')):
            print(fold, logdir)
            logdirs.append(logdir)

In [None]:
all_test_df = []
all_valid_df = []

all_pids = []

for fold, logdir in enumerate(logdirs):
    rnd = {'predicted_labels':'predicted_label'}
    
    tt = pd.read_csv(os.path.join(logdir, f'test_predictions_fold_{fold}.csv')).rename(columns=rnd)
    vv = pd.read_csv(os.path.join(logdir, f'valid_predictions_fold_{fold}.csv')).rename(columns=rnd) 
    dd = pd.read_csv(f'../data/AI4EO-MapYourCity/splits/split_valid_{fold}.csv')  
    
    tt['fold'] = fold
    vv['fold'] = fold
    
    all_pids.append(tt['pid'].sort_values().values)
    
    print(len(tt['pid']))
    all_test_df.append(tt)
    all_valid_df.append(pd.merge(vv, dd, on='pid'))

In [None]:
test_df = pd.concat(all_test_df)
test_df['predicted_label'] = test_df['predicted_label'].astype(int)
test_df.pivot_table(index=['pid', 'fold'])

In [None]:
def get_best_class(sdf):
    '''
    Return the majority vote
    If tied, choose class closest to the mean
    
    '''
    
    spids = []
    slbls = []
    # mode did not work with groupby
    for pid in test_df['pid'].unique():
        dd = test_df.loc[test_df['pid'] == pid]
        mode = dd['predicted_label'].mode()
        mean = dd['predicted_label'].mean()
                
        if len(mode) > 1:
            mode = mode.iloc[np.argmin(np.abs(mode-mean))]
        else:
            mode = mode.values[0]
            
        spids.append(pid)
        slbls.append(mode)
                
    return pd.DataFrame(dict(pid=spids, predicted_label=slbls)).sort_values('pid')

In [None]:
submission_df = get_best_class(test_df)

In [None]:
#submission_df = test_df.groupby('pid')['predicted_label'].mean().to_frame().reset_index()
#submission_df['predicted_label'] = submission_df['predicted_label'].astype(int)
submission_df.to_csv(f'../submissions/{expname}.csv', index=False)
submission_df.head()

In [None]:
sns.barplot(submission_df['predicted_label'].value_counts())

In [None]:
all_submissions = pd.concat([pd.read_csv(os.path.join('../submissions/', f))['predicted_label'].value_counts().to_frame().assign(source=f) \
                             for f in os.listdir('../submissions/') if f != 'dummy.csv'])

In [None]:
sns.barplot(all_submissions, x='predicted_label', y='count', hue='source')

In [None]:
test_df['fold'].value_counts()

In [None]:
test_df['pid'].value_counts().std()

In [None]:
valid_df = pd.concat(all_valid_df)
valid_df

In [None]:
Cn = confusion_matrix(valid_df['label'], valid_df['predicted_label'], normalize='true')
C = confusion_matrix(valid_df['label'], valid_df['predicted_label'])

In [None]:
sns.heatmap(C, annot=True, fmt='.0f')

In [None]:
valid_df.query('label==6 and predicted_label==6')

In [None]:
# Define paths to data

# input_path = "directory with MapYourCity image files"
input_path = "../data/AI4EO-MapYourCity/v1/building-age-dataset/"
train_path = input_path + "train/data/"
test_path = input_path + "test/data/"

In [None]:
# Choose a building by pid:
pid = 'xas3mgbj2b'

street = plt.imread(f"{train_path}{pid}/street.jpg")
orthophoto = plt.imread(f"{train_path}{pid}/orthophoto.tif")
s2 = rasterio.open(f"{train_path}{pid}/s2_l2a.tif").read()
s2 = np.transpose(s2,[1,2,0])

# calculate normalized indices
ndvi = (s2[:,:,7] - s2[:,:,3]) / (s2[:,:,7] + s2[:,:,3])

# Default rendering is Normalized Difference Built-Up Index computed as SWIR(Band11)-NIR(Band8)/ SWIR(Band11)+NIR(Band8)
ndbi = (s2[:,:,10] - s2[:,:,7]) / (s2[:,:,10] + s2[:,:,7])

# water index
# Sentinel-2 NDWI = (B03 - B08) / (B03 + B08)
ndwi = (s2[:,:,2] - s2[:,:,7]) / (s2[:,:,2] + s2[:,:,7])

print("Image dimensions:")
print(f"street view: {street.shape}" )
print(f"orthophoto: {orthophoto.shape}" )
print(f"Sentinel 2: {s2.shape}" )
print()
print(f"Label:\n {train_df.loc[train_df['pid']==pid][['pid', 'label']]}")

In [None]:
# Show the 3 modalities - street view, orthophoto and Seninel-2

fig, axs = plt.subplots(figsize=(15, 15), nrows=2, ncols = 3)
axs = axs.flatten()
axs[0].imshow(street)
axs[1].imshow(orthophoto)
axs[2].imshow(s2[...,[3,2,1]]*3e-4)
axs[3].imshow(ndvi, cmap="PiYG", vmin=-1, vmax=+1)
axs[4].imshow(ndbi, cmap="BuGn", vmin=-1, vmax=+1)
axs[5].imshow(ndwi, cmap="Blues", vmin=-1, vmax=+1)

axs[0].set_title("Street")
axs[1].set_title("Orthophoto")
axs[2].set_title("Sentinel-2-L2A");
axs[3].set_title("Sentinel NDVI")
axs[4].set_title("Sentinel NDBI")
axs[5].set_title("Sentinel NDWI")

plt.show()

In [None]:
# Relative position and size of orthophoto and Sentinel-2 images, with building location (blue dot)

plt.figure(figsize=(7,7))
plt.imshow(s2[...,[3,2,1]]*3e-4, extent=(-320, 320, -320, 320))
plt.imshow(orthophoto, extent=(-128, 128, -128, 128))
plt.plot(0,0, "b", marker = "x", markersize = 8 ) # building location
plt.title(" Orthophoto and Sentinel-2 images, with building location in the centre (blue dot)")
plt.xlabel("distance (m)")
plt.ylabel("distance (m)")
plt.plot();

## Accuracy

In [None]:
np.diag(Cn).mean()

In [None]:
from torchmetrics import Accuracy
import torch

In [None]:
acc = Accuracy(task='multiclass', num_classes=7)

In [None]:
acc(torch.Tensor(valid_df['predicted_label'].values), torch.Tensor(valid_df['label'].values))

In [None]:
ndvi.shape

In [None]:
np.dstack([ndvi, ndwi, ndbi]).shape

In [None]:
s2[...,[3,2,1]].shape