## Imports

In [None]:
import csv
import json
import numpy as np
import os
import sys

from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2




import matplotlib.pyplot as plt
import numpy
import pickle


sys.path.append('..')
import main

sys.path.append('../models')
import similarity

## Similarity and right/wrong classification

In [None]:
submission_base_directory = main.SHELFY_BASE_PATH + '/static/submissions/'
submissions = [submission for submission in os.listdir(submission_base_directory)]
submission_directories = [submission_base_directory + submission for submission in submissions]


def get_submission_shelf_name(submission_directory):
    with open(submission_directory + '/info.txt', 'r') as file_handle:
        shelf_name = file_handle.readline().split('.')[0]
    return shelf_name


        

# Loop over all submissions
sims = []
corrects = []
for i in range(len(submissions)):
    
    
    # Get the book objects out
    books_directory = submission_directories[i] + '/books/'
    book_ids = [id for id in os.listdir(books_directory)]
    
    books = []
    for book_id in book_ids:
        with open(books_directory + book_id, 'rb') as file_handle:
            book = pickle.load(file_handle)
            books.append(book)
            
    # Get the actual list of books that are present in the image out
    shelf_name = get_submission_shelf_name(submission_directories[i])
    shelf_directory = main.SHELFY_BASE_PATH + '/data/shelves/' + shelf_name + '/'
    
    titles_amazon_products = []
    with open(shelf_directory + 'titles_amazon_products', 'r') as file_handle:
        reader = csv.reader(file_handle, delimiter = ',')
        for row in reader:
            titles_amazon_products.append(row[1])
            
    # For each book, calculate similarity and whether match was correct or not
    for book in books:
        
        # Calculate similarity
        sim = similarity.calculate_book_score(book)
        
        
        # Determine whether the match was correct or not
        if book.book_info['title'] in titles_amazon_products:
            correct = 1
        else:
            correct = 0
            
        # Append
        sims.append(sim)
        corrects.append(correct)
        
sims = np.array(sims)
corrects = np.array(corrects)

In [None]:
fig = plt.figure(figsize = (8,6))

plt.hist(sims[corrects == 1], alpha = 0.5, bins = 12, zorder = 2, label = 'correct')
plt.hist(sims[np.where(corrects == 0)], alpha = 0.5, bins = 12, zorder = 1, label = 'incorrect')


# Cosmetics
plt.title('similarity scores for correct and incorrect matches')
plt.xlabel('similarity score')
plt.ylabel('counts')
plt.grid(zorder = 0)
plt.legend(loc = 'best')


plt.savefig('sim_histo.png', dpi = 300)
plt.show()