In [None]:
import pandas as pd
import os
import glob

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns
import numpy as np

from matplotlib import pyplot as plt
import rasterio

In [None]:
expname = 'multimodal_street_swin_05-13_B'

In [None]:
expdir = f'../logs/evaluations/runs/{expname}/'
np.sort(os.listdir(expdir))

In [None]:
all_logdirs = [os.path.join(expdir, dd) for dd in np.sort(os.listdir(expdir))]

In [None]:
logdirs = []
folds = []
img_files = []

fold_key = None

for i, logdir in enumerate(all_logdirs):
    if os.path.exists(os.path.join(logdir, f'test_predictions.csv')):
        logdirs.append(logdir)
        folds.append(i)
        print(i)
            
        with open(os.path.join(logdir, 'config_tree.log')) as f:
            img_file = 'None'
            config_tree = f.readlines()
            for l in config_tree:
                if 'jpg' in l:
                    img_file = l.strip().split(' ')[-1]
            img_files.append(img_file)

In [None]:
config_tree

In [None]:
for l in config_tree:
    if 'fold_key' in l:
        fold_key = l.strip().split(' ')[-1]
    if 'jpg' in l:
        img_file = l.strip().split(' ')[-1]
print(fold_key)
print(img_file)

In [None]:
test_set = pd.read_csv('../data/AI4EO-MapYourCity/v1/building-age-dataset/test/test-set.csv')
test_set['country_id'].value_counts()

In [None]:
countries_fold = {'0':'QCD', '1':'QCD', '2':'QCD', '3':'QCD', '4':'QCD',
                  '5':'PNN', '6':'PNN', '7':'PNN', '8':'PNN', '9':'PNN',
                  '10':'HUN', '11':'HUN', '12':'HUN', '13':'HUN', '14':'HUN',
                  '15':'FMW', '16':'FMW', '17':'FMW', '18':'FMW', '19':'FMW'
                 }

In [None]:
all_test_df = []
all_valid_df = []

all_pids = []

for fold, img_file, logdir in zip(folds, img_files, logdirs):
    
    rnd = {'predicted_labels':'predicted_label'}
    
    tt = pd.read_csv(os.path.join(logdir, f'test_predictions.csv')).rename(columns=rnd)
    vv = pd.read_csv(os.path.join(logdir, f'dev_predictions.csv')).rename(columns=rnd) 
    dd = pd.read_csv(f'../data/AI4EO-MapYourCity/splits/dev-set.csv') 
    
    # restrict to single country if trained like thid
    if fold_key == 'use_only_one_country_5-fold':
        print('Use only test set from country', countries_fold[str(fold)])
        country_pids = test_set[test_set['country_id'] == countries_fold[str(fold)]]['pid'].values
        print(len(country_pids), ' samples')
        ix = np.zeros(len(tt['pid']))
        for i, pid in enumerate(tt['pid']):
            if pid in country_pids:
                ix[i] = 1.
                
        tt = tt[ix.astype(bool)]
    
    tt['fold'] = fold
    vv['fold'] = fold
    
    all_pids.append(tt['pid'].sort_values().values)
    
    print(len(tt['pid']), 'samples attached to test set')
    all_test_df.append(tt)
    all_valid_df.append(pd.merge(vv, dd, on='pid'))

In [None]:
test_df = pd.concat(all_test_df)
test_df['predicted_label'] = test_df['predicted_label'].astype(int)
test_df.pivot_table(index=['pid', 'fold'])

In [None]:
test_df.assign(experiment=expname).to_csv(f'../submissions/all_folds/{expname}.csv', index=False)

In [None]:
def get_best_class(sdf):
    '''
    Return the majority vote
    If tied, choose class closest to the mean
    
    '''
    
    spids = []
    slbls = []
    
    unique_classes = {}
    modes = {}
    
    for fold in range(len(test_df.fold.unique())):
        unique_classes[str(fold+1)] = []
        modes[str(fold+1)] = []
    
    # mode did not work with groupby
    for pid in sdf['pid'].unique():
        dd = sdf.loc[sdf['pid'] == pid]
        mode = dd['predicted_label'].mode()
        mean = dd['predicted_label'].mean()
        
        unique_classes[str(len(dd['predicted_label'].unique()))].append(pid)
        modes[str(len(mode))].append(pid)
                        
        if len(mode) > 1:
            mode = mode.iloc[np.argmin(np.abs(mode-mean))]
        else:
            mode = mode.values[0]
            
        spids.append(pid)
        slbls.append(mode)
                
    return pd.DataFrame(dict(pid=spids, predicted_label=slbls)).sort_values('pid'), modes, unique_classes

Restrict folds here: 0, 1, 2, 3, 4 - with all modalities; 5, 6, 7, 8, 9 - streetview dropped

In [None]:
submission_df, submission_modes, submission_classes = get_best_class(test_df.query('fold in [0, 1, 2, 3, 4,]'))

In [None]:
submission_df.to_csv(f'../submissions/{expname}.csv', index=False)
submission_df.head(4)

In [None]:
# how many different modes were predicted by the folds
# 1 - the majority of models agreed on one class
# 2 - tie
# 5 - every fold predicted a different class
for mode in submission_modes:
    print(mode, len(submission_modes[mode]))

In [None]:
# how many different classes were predicted by the folds
for mode in submission_classes:
    print(mode, len(submission_classes[mode]))

In [None]:
sns.barplot(submission_df['predicted_label'].value_counts().to_frame().reset_index(),
            x='predicted_label', y='count'
           );

In [None]:
reference_submissions = ['merged_topstreet_swin_04-19_A.csv',
                         'multimodal_swin_05-03_C.csv',
                         f'{expname}.csv',
                        ]

In [None]:
all_submissions = pd.concat([pd.read_csv(os.path.join('../submissions/', f))['predicted_label'].value_counts().to_frame().assign(source=f) \
                             for f in reference_submissions]).reset_index()

In [None]:
sns.barplot(all_submissions, x='predicted_label', y='count', hue='source');

In [None]:
valid_df = pd.concat(all_valid_df)
valid_df.head()

In [None]:
valid_df_M = valid_df.query('fold in [0, 1, 2, 3, 4]') # includes all modalities
valid_df_X = valid_df.query('fold in [5, 6, 7, 8, 9]') # excludes streetview

In [None]:
dev_df_M,_,_ = get_best_class(valid_df_M)
dev_df_X,_,_ = get_best_class(valid_df_X)

In [None]:
dev_df_M = pd.merge(dev_df_M, valid_df_M.drop_duplicates('pid').drop(columns='predicted_label'), on="pid", how="left")

In [None]:
dev_df_X = pd.merge(dev_df_X, valid_df_X.drop_duplicates('pid').drop(columns='predicted_label'), on="pid", how="left")

## Accuracy

In [None]:
Cn_M = confusion_matrix(dev_df_M['label'], dev_df_M['predicted_label'], normalize='true')
Cn_X = confusion_matrix(dev_df_X['label'], dev_df_X['predicted_label'], normalize='true')
C_M = confusion_matrix(dev_df_M['label'], dev_df_M['predicted_label'])

In [None]:
sns.heatmap(C_M, annot=True, fmt='.0f');
plt.gca().set_ylabel('True class')
plt.gca().set_xlabel('Predicted class');

In [None]:
acc = accuracy_score(valid_df_M['label'].values, valid_df_M['predicted_label'].values)
print(f'--- All Modalities')
print(f'Accuracy score: {acc:.4f}')
print(f'MAP:            {np.diag(Cn_M).mean():.4f}')

acc = accuracy_score(valid_df_X['label'].values, valid_df_X['predicted_label'].values)
print(f'--- Only Topview Modalities')
print(f'Accuracy score: {acc:.4f}')
print(f'MAP:            {np.diag(Cn_X).mean():.4f}')

In [None]:
country_ids = valid_df_M['country_id'].unique()

plot_dfs = []

for country in country_ids:
    country_df = valid_df_M.loc[valid_df_M['country_id'] == country]
    cacc = accuracy_score(country_df['label'].values, country_df['predicted_label'].values)
    
    cmat = confusion_matrix(country_df['label'], country_df['predicted_label'], normalize='true').diagonal()
    
    plot_dfs.append(pd.DataFrame(dict(country=country, classes=list(range(7)), cmat=cmat)))
    
    
    print(f'Country ID: {country}, MCA = {cacc:.4f}, count = {len(country_df)}')

In [None]:
folds = valid_df_M['fold'].unique()
for fold in folds:
    country_df = valid_df_M.loc[valid_df_M['fold'] == fold]
    cacc = accuracy_score(country_df['label'].values, country_df['predicted_label'].values)
    
    print(f'Fold: {fold}, MCA = {cacc:.4f}, count = {len(country_df)}')

## Inspect mis-classified samples

In [None]:
def inspect_misclassified(tclass, pclass, country_id=None):
    '''
    Pick a random misclassified sample and display
    '''
    
    input_path = "../data/AI4EO-MapYourCity/v1/building-age-dataset/"
    train_path = input_path + "train/data/"
    
    # Choose a building by pid:
    if country_id is None:
        mismatched_df = dev_df.query('label==@tclass and predicted_label==@pclass')
    else:
        mismatched_df = dev_df.query('label==@tclass and predicted_label==@pclass and country_id==@country_id')
    print(f'{len(mismatched_df)} samples were classified as {pclass} but are {tclass}')

    pid = mismatched_df['pid'].iloc[np.random.randint(len(mismatched_df))]

    print(dev_df.loc[dev_df['pid'] == pid])
    print('*'*5)
    print(valid_df.loc[valid_df['pid'] == pid])
    street = plt.imread(f"{train_path}/{pid}/street.jpg")
    streetb = plt.imread(f"{train_path}/{pid}/street_patch_tiny.jpg")
    orthophoto = plt.imread(f"{train_path}/{pid}/orthophoto.tif")
    s2 = rasterio.open(f"{train_path}/{pid}/s2_l2a.tif").read()
    s2 = np.transpose(s2,[1,2,0])

    # Show the 3 modalities - street view, orthophoto and Seninel-2

    fig, axs = plt.subplots(figsize=(15, 15), nrows=1, ncols = 4)
    axs = axs.flatten()
    axs[0].imshow(street)
    axs[1].imshow(streetb)
    axs[2].imshow(orthophoto)
    axs[3].imshow(s2[...,[3,2,1]]*3e-4)

    axs[0].set_title("Street")
    axs[1].set_title("Street - Tiny")
    axs[2].set_title("Orthophoto")
    axs[3].set_title("Sentinel-2-L2A")

    plt.show()
    

In [None]:
def inspect_sample(pid, split='train'):
    '''
    Display sample at PID
    '''
    
    input_path = "../data/AI4EO-MapYourCity/v1/building-age-dataset/"
    train_path = os.path.join(input_path, split, 'data')

    print(dev_df.loc[dev_df['pid'] == pid])
    print(test_df.loc[test_df['pid'] == pid])
    print(test_set.loc[test_set['pid'] == pid])
    
    is_street = False
    if os.path.exists(f"{train_path}/{pid}/street.jpg"):
        is_street = True
        street = plt.imread(f"{train_path}/{pid}/street.jpg")
        streetb = plt.imread(f"{train_path}/{pid}/street_patch_tiny.jpg")
    orthophoto = plt.imread(f"{train_path}/{pid}/orthophoto.tif")
    s2 = rasterio.open(f"{train_path}/{pid}/s2_l2a.tif").read()
    s2 = np.transpose(s2,[1,2,0])

    # Show the 3 modalities - street view, orthophoto and Seninel-2

    fig, axs = plt.subplots(figsize=(15, 5), nrows=1, ncols = 4)
    axs = axs.flatten()
    if is_street:
        axs[0].imshow(street)
        axs[1].imshow(streetb)
    axs[2].imshow(orthophoto)
    axs[3].imshow(s2[...,[3,2,1]]*3e-4)

    axs[0].set_title("Street")
    axs[1].set_title("Street - Tiny")
    axs[2].set_title("Orthophoto")
    axs[3].set_title("Sentinel-2-L2A")

    plt.show()
    

In [None]:
inspect_misclassified(0,6,)

In [None]:
inspect_sample(submission_modes['1'][360], 'test')

## Merge HUN samples

In [None]:
hun_pids = test_set.query('country_id=="HUN"')['pid'].values

In [None]:
# replace all test set images with HUN with the prediction by the specialized model
hun_df = pd.read_csv('../submissions/multimodal_swin_HUN_05-09_C.csv')
hun_submission_df = submission_df.copy()

for i in range(len(hun_submission_df)):
    if hun_submission_df['pid'].iloc[i] in hun_pids:
        print(i, hun_submission_df['predicted_label'].iloc[i], hun_df['predicted_label'][i])
        hun_submission_df['predicted_label'].iloc[i] = hun_df['predicted_label'][i]
        
hun_submission_df.to_csv(f'../submissions/{expname}_xHUN.csv', index=False)


## Investigate mismatched samples

In [None]:
from sklearn.manifold import TSNE

In [None]:
mismatch = all_valid_df[0]['predicted_label'] != all_valid_df[0]['label']
mis_df = all_valid_df[0][mismatch]
rig_df = all_valid_df[0][~mismatch]

Which countries were misclassified more often?

In [None]:
mis_df['country_id'].value_counts() / (rig_df['country_id'].value_counts() + mis_df['country_id'].value_counts())

In [None]:
cl = 6

In [None]:
embeddings = {'mismatched':[], 'correct':[], 'all':[]}

for pid in mis_df.query('label == @cl').pid.values:
    emb = np.load(os.path.join(all_logdirs[0], f'{pid}_streetview.npy'))
    embeddings['mismatched'].append(emb)
    
for pid in rig_df.query('label == @cl').pid.values:
    emb = np.load(os.path.join(all_logdirs[0], f'{pid}_streetview.npy'))
    embeddings['correct'].append(emb)

In [None]:
embeddings['all'] = embeddings['mismatched'] + embeddings['correct']

In [None]:
embeddings['all'] = np.asarray(embeddings['all']).squeeze()

In [None]:
tsne = TSNE()

In [None]:
comps = tsne.fit_transform(embeddings['all'])
indicator = np.zeros(len(comps)).astype(bool)
indicator[len(embeddings['mismatched']):] = True

In [None]:
plt.scatter(comps[:,0], comps[:,1], c=indicator, cmap='RdYlGn')
plt.colorbar()

plt.show()