In [None]:
%config InlineBackend.figure_formats = ['retina']

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

tech_cat_st_results = pd.read_csv('data/results_categories_and_streets.csv')
tech_subcat_results = pd.read_csv('data/results_subcategories.csv')

dict = {"CATEGORY": tech_cat_st_results, "SUBCATEGORY": tech_subcat_results}

values = []

for key in dict.keys():
    data = dict[key]
    is_correct = data[f'IS_{key}_CORRECT']
    value_count = is_correct.value_counts()
    acc = value_count[0]/(value_count[0]+value_count[1])
    print(f"{key}: {acc}")
    values.append(acc)

sns.set(rc={"figure.dpi":100, 'savefig.dpi':100})
sns.set_context('notebook')
sns.set_style("ticks")

plt.figure(figsize=(10, 6))
sns.barplot(x=list(dict.keys()), y=values, hue=list(dict.keys()))
plt.title("Accuracy")
plt.ylabel("Accuracy Value")
plt.yticks(np.arange(0.0, max(values) + 0.2, 0.2))
sns.despine(left=False, bottom=False)

In [None]:
subcat_data = dict["SUBCATEGORY"]
sub_categories = subcat_data.TITLE.unique()
sets_list = []

for i in range(0, len(sub_categories)-1):
    for j in range(i+1, len(sub_categories)):
        sets_list.append({sub_categories[i], sub_categories[j]})

sub_false = subcat_data[~subcat_data.IS_SUBCATEGORY_CORRECT].reset_index(drop=True)

array_false_sub = [0]*len(sets_list)
for _, row in sub_false.iterrows():
    index = sets_list.index({row.TITLE, row.SUBCATEGORY_ANSWER})
    array_false_sub[index] += 1

sets_strings = []
for x in sets_list:
    s = x.pop().split(':')[1].strip()+ " - " + x.pop().split(':')[1].strip()
    sets_strings.append(s)

plt.figure(figsize=(10, 6))
sns.barplot(x=sets_strings, y=array_false_sub, hue=sets_strings)
plt.title("Subcategories pair")
plt.ylabel("Count")
plt.xticks(size=8)
sns.despine(left=False, bottom=False)
        

In [None]:
cat_data = dict["CATEGORY"]
count_is_street_true = len(cat_data[cat_data.STREET_CORRECTNESS>50])
count_is_street_false = len(cat_data)-count_is_street_true

print(f"Street true: {count_is_street_true} Street false: {count_is_street_false}")

plt.figure(figsize=(10, 6))
sns.barplot(x=list(['Street correct>50%', 'Street correct<50%']), y=[count_is_street_true,count_is_street_false], hue=list(['Street correct>50%', 'Street correct<50%']))
plt.title("Correct Rows")
plt.ylabel("Number of rows")
sns.despine(left=False, bottom=False)

In [None]:
print(f"Street Accuracy: {count_is_street_true/(count_is_street_true+count_is_street_false)}")

In [None]:
count_value_keys = cat_data.STREET_CORRECTNESS.value_counts().keys()

my_list = list()

for i in range(1,5):
    sub_array = [x for x in count_value_keys if 20*(i-1) <= x < 20*i]
    count_value_per_keys = sum(cat_data.STREET_CORRECTNESS.value_counts()[sub_array].values)
    my_list.append(count_value_per_keys)

my_list.append(cat_data.STREET_CORRECTNESS.value_counts()[100])
xlabels = ['0 - 24','25 - 49','50 - 74','75 - 99', '100']

plt.figure(figsize=(10, 6))
sns.barplot(x=list(xlabels), y=my_list, hue=list(xlabels))
plt.title("Street name correspondence")
plt.ylabel("Number of rows")
plt.xlabel("Score")
sns.despine(left=False, bottom=False)