# Experiments

In [1]:
from sereia import Sereia
from sereia.evaluation import EvaluationHandler

In [2]:
database_credentials = "mongodb://root:root%40server@localhost:27017/admin"
DATASET_NAME = 'yelp_expanded'

In [3]:
sereia = Sereia(
    DATASET_NAME,
    database_credentials,
    config_directory='./config/',
    topk_qms=9999,
    max_qm_size=5,
    max_cjn_size=3,
    topk_cjns=9999,
    topk_cjns_per_qm=1,
)

In [4]:
sereia.use_database(DATASET_NAME)

In [5]:
sereia.use_queryset(DATASET_NAME)

In [6]:
# sereia.create_indexes()

In [7]:
sereia.load_indexes()

In [8]:
sereia.print_runtime_configs()

Maximum QM size: 5
Top-K QMs considered: 9999
Maximum CJN size: 3
Top-K CJNs considered: 9999
Maximum CJNs per QM: 1


In [9]:
# result = sereia.run_queryset()

In [10]:
evaluation_handler = EvaluationHandler(
    DATASET_NAME,
    sereia.config,
)

evaluation_handler.load_golden_standards()

In [11]:
evaluated_results = evaluation_handler.evaluate_results(
    result,
    results_filename='results/' + DATASET_NAME,
)

QM Evaluation {'mrr': 0.8988095238095238, 'p@1': 0.8214285714285714, 'p@2': 0.9285714285714286, 'p@3': 1.0, 'p@4': 1.0, 'p@5': 1.0, 'p@6': 1.0, 'p@7': 1.0, 'p@8': 1.0, 'p@9': 1.0, 'p@10': 1.0, 'relevant_positions': [2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1]}
CJN Evaluation {'mrr': 0.8988095238095238, 'p@1': 0.8214285714285714, 'p@2': 0.9285714285714286, 'p@3': 1.0, 'p@4': 1.0, 'p@5': 1.0, 'p@6': 1.0, 'p@7': 1.0, 'p@8': 1.0, 'p@9': 1.0, 'p@10': 1.0, 'relevant_positions': [2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1]}
Results filename: results/yelp_expanded


In [12]:
import json

results_file = f'results/{DATASET_NAME}.json'
with open(results_file) as f:
    results_data = json.load(f)
quality_results = results_data['evaluation']
# quality_results['retrieval_score']

precision_data = []
recall_data = []
for item in quality_results['retrieval_score']:
    # print(item)
    precision_data.append(item['precision'])
    recall_data.append(item['recall'])

print(sum(precision_data)/len(precision_data))
print(sum(recall_data)/len(recall_data))

1.0
0.9623880597014925


In [13]:
from pprint import pprint as pp
pp(quality_results['retrieval_score'])

[{'keyword_query': 'businesses bricola stars 5.0',
  'num_documents_expected': 2,
  'num_documents_retrieved': 2,
  'precision': 1.0,
  'recall': 1.0},
 {'keyword_query': 'businesses review analucia',
  'num_documents_expected': 1,
  'num_documents_retrieved': 1,
  'precision': 1.0,
  'recall': 1.0},
 {'keyword_query': 'moroccan restaurants Texas',
  'num_documents_expected': 5,
  'num_documents_retrieved': 5,
  'precision': 1.0,
  'recall': 1.0},
 {'keyword_query': 'italian restaurants boston',
  'num_documents_expected': 346,
  'num_documents_retrieved': 346,
  'precision': 1.0,
  'recall': 1.0},
 {'keyword_query': 'cities subway',
  'num_documents_expected': 552,
  'num_documents_retrieved': 552,
  'precision': 1.0,
  'recall': 1.0},
 {'keyword_query': 'reviews analucia',
  'num_documents_expected': 1,
  'num_documents_retrieved': 1,
  'precision': 1.0,
  'recall': 1.0},
 {'keyword_query': '5.0 star italian restaurants',
  'num_documents_expected': 56,
  'num_documents_retrieved': 5

In [10]:
sereia.execute_mongo_query('user',
    [
        {
          "$match": {
            "$expr": {
              "$regexMatch": {
                "input": "$name",
                "regex": "bricola",
                "options": "i"
              }
            }
          }
        },
        {
          "$lookup": {
            "from": "review",
            "foreignField": "user_id",
            "localField": "user_id",
            "as": "review"
          }
        },
        {
          "$unwind": "$review"
        },
        {
          "$match": {
            "$expr": {
              "$eq": [
                "$review.stars",
                5.0
              ]
            }
          }
        },
        {
          "$lookup": {
            "from": "business",
            "foreignField": "business_id",
            "localField": "review.business_id",
            "as": "business"
          }
        },
        {
          "$unwind": "$business"
        },
        {
          "$project": {
            "user_id": 1,
            "review.review_id": 1,
            "business.business_id": 1
          }
        }
      ])

Executing query...
Showing enumerated results
Document #1
{'_id': ObjectId('627c56f59d0aba0daf1feb93'),
 'business': {'business_id': 'MCpfkiDhgDZy70tOza013g'},
 'review': {'review_id': 'onvDH3NskAbCMTbrirNqsg'},
 'user_id': 'On8BSYfyk5uw2P5L-90WkA'}
Document #2
{'_id': ObjectId('627c56f59d0aba0daf1feb93'),
 'business': {'business_id': 'oWkjcfej-edChEM-b5YKYQ'},
 'review': {'review_id': 'M4ssEJTZsW5wvMTEKw7Z0A'},
 'user_id': 'On8BSYfyk5uw2P5L-90WkA'}
