In [63]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from tqdm import tqdm
import pickle
import time
import warnings
warnings.filterwarnings('ignore')

In [66]:
# load mapping
C = pickle.load(open("layer_mappings.pcl","rb"))

In [50]:
C.keys()

dict_keys([2, 4, 8, 16])

**Evaluations on small dataset**

In [67]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score

# create confusion matrix
path = "/mnt/nas/mnt/erebor1/tweetlocator_snam/"
y_true = []
y_pred = []

with open(path + "en.test1") as ground_truth_file:
    with open(path + "en.test1.predict") as predictions_file:
        i = 0
        limit = 10**8  # restrict to N datapoints
        while True:
            if i >= limit:
                break
            i += 1
            true = ground_truth_file.readline()
            pred = predictions_file.readline()
            
            if len(true) == 0:  # eof
                break
                
            true = true.split(" ")[0] # split label and data
            y_true.append(int(true.split("__")[-1]))  # grab ground truth label
            y_pred.append(int(pred.split("__")[-1])) # grab predicted label
            
confusion_matrix = confusion_matrix(y_true, y_pred)

In [55]:
# model qualities on highest layer (l1)

# create mapping (four classes)
mapping_l1 = {}
for layer_id in range(len(C[4])):
    for polygon_id in C[4][layer_id]:
        mapping_l1[polygon_id] = layer_id

# convert true/predicted labels with layer mapping
y_true_l1 = [mapping_l1[x] for x in y_true]
y_pred_l1 = [mapping_l1[x] for x in y_pred]

print("Precision: {}".format(precision_score(y_true_l1, y_pred_l1, average="micro")))
print("Recall: {}".format(recall_score(y_true_l1, y_pred_l1, average="micro")))
print("F1-Score: {}".format(f1_score(y_true_l1, y_pred_l1, average="micro")))

Precision: 0.7074856970510324
Recall: 0.7074856970510324
F1-Score: 0.7074856970510324


In [56]:
# model qualities on intermediate layer (l2)

# create mapping (16 classes)
mapping_l2 = {}
for layer_id in range(len(C[16])):
    for polygon_id in C[16][layer_id]:
        mapping_l2[polygon_id] = layer_id

# convert true/predicted labels with layer mapping
y_true_l2= [mapping_l2[x] for x in y_true]
y_pred_l2= [mapping_l2[x] for x in y_pred]

print("Precision: {}".format(precision_score(y_true_l2, y_pred_l2, average="micro")))
print("Recall: {}".format(recall_score(y_true_l2, y_pred_l2, average="micro")))
print("F1-Score: {}".format(f1_score(y_true_l2, y_pred_l2, average="micro")))

Precision: 0.6051513217319876
Recall: 0.6051513217319876
F1-Score: 0.6051513217319876


In [57]:
# model qualities on lowest layer (l3)
print("Precision: {}".format(precision_score(y_true, y_pred, average="micro")))
print("Recall: {}".format(recall_score(y_true, y_pred, average="micro")))
print("F1-Score: {}".format(f1_score(y_true, y_pred, average="micro")))

Precision: 0.5974065700433394
Recall: 0.5974065700433394
F1-Score: 0.5974065700433394


**Evaluations on large dataset**

In [59]:
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score

# create confusion matrix
path = "/mnt/nas/mnt/erebor1/tweetlocator_snam/"
y_true = []
y_pred = []

with open(path + "en.test2") as ground_truth_file:
    with open(path + "en.test2.predict") as predictions_file:
        i = 0
        limit = 10**9  # restrict to N datapoints
        while True:
            if i >= limit:
                break
            i += 1
            true = ground_truth_file.readline()
            pred = predictions_file.readline()
            
            if len(true) == 0:  # eof
                break
                
            true = true.split(" ")[0] # split label and data
            y_true.append(int(true.split("__")[-1]))  # grab ground truth label
            y_pred.append(int(pred.split("__")[-1])) # grab predicted label
            
confusion_matrix = confusion_matrix(y_true, y_pred)

In [60]:
# model qualities on highest layer (l1)

# create mapping (four classes)
mapping_l1 = {}
for layer_id in range(len(C[4])):
    for polygon_id in C[4][layer_id]:
        mapping_l1[polygon_id] = layer_id

# convert true/predicted labels with layer mapping
y_true_l1 = [mapping_l1[x] for x in y_true]
y_pred_l1 = [mapping_l1[x] for x in y_pred]

print("Precision: {}".format(precision_score(y_true_l1, y_pred_l1, average="micro")))
print("Recall: {}".format(recall_score(y_true_l1, y_pred_l1, average="micro")))
print("F1-Score: {}".format(f1_score(y_true_l1, y_pred_l1, average="micro")))

Precision: 0.7135346282670418
Recall: 0.7135346282670418
F1-Score: 0.713534628267042


In [61]:
# model qualities on intermediate layer (l2)

# create mapping (16 classes)
mapping_l2 = {}
for layer_id in range(len(C[16])):
    for polygon_id in C[16][layer_id]:
        mapping_l2[polygon_id] = layer_id

# convert true/predicted labels with layer mapping
y_true_l2= [mapping_l2[x] for x in y_true]
y_pred_l2= [mapping_l2[x] for x in y_pred]

print("Precision: {}".format(precision_score(y_true_l2, y_pred_l2, average="micro")))
print("Recall: {}".format(recall_score(y_true_l2, y_pred_l2, average="micro")))
print("F1-Score: {}".format(f1_score(y_true_l2, y_pred_l2, average="micro")))

Precision: 0.6169727602183437
Recall: 0.6169727602183437
F1-Score: 0.6169727602183437


In [62]:
# model qualities on lowest layer (l3)
print("Precision: {}".format(precision_score(y_true, y_pred, average="micro")))
print("Recall: {}".format(recall_score(y_true, y_pred, average="micro")))
print("F1-Score: {}".format(f1_score(y_true, y_pred, average="micro")))

Precision: 0.6096328078016952
Recall: 0.6096328078016952
F1-Score: 0.6096328078016952
