# Pulling DOX

In [None]:
# !git pull git@github.com:arielantony/sereia.git
# !pip install --editable .

# Importing modules

In [None]:
from dox import Dox

# Defining MongoDB access configuration

In [None]:
# database_credentials = "mongodb+srv://sereia:sereiamongodb@sereia.mgnbcjg.mongodb.net/?retryWrites=true&w=majority"
database_credentials = "mongodb://root:root%40server@localhost:27017/admin"
# mongodb://[username:password@]host1[:port1][,...hostN[:portN]][/[defaultauthdb][?options]]

# Instantiating SEREIA

- The framework instance receives as input:
  - The target database name in the MongoDB database;
  - The database credentials for authentication in MongoDB.
  - The directory where the `config.json` file can be found

In [None]:
dox = Dox(
    'yelp_expanded',
    database_credentials,
    config_directory='./config/',
)

## Creating Indexes

- The index creation queries the target database (provided for the SEREIA object instantiation) and retrieves the content of the attributes previously defined for the indexing process. These attributes are defined in the file `indexable_dataset_attributes/{dataset_name}_attributes.json` and are used to select a subset of attributes to be indexed by SEREIA.

- The indexing process iterates over all documents from all collections of a given database and generates metrics that will be used in the keyword search feature.

In [None]:
# dox.create_indexes()

## Loading Indexes

In [None]:
dox.load_indexes()

## Listing available datasets

In [None]:
dox.list_datasets()

## Selecting dataset

In [None]:
dox.use_database('yelp_expanded')

In [None]:
# sereia.config.__dict__

## Selecting query set

In [None]:
dox.use_queryset('yelp_expanded')

In [None]:
# sereia.config.__dict__

# Running entire query set over the selected dataset

In [None]:
# result = dox.run_queryset()
result = dox.keyword_search(1)

In [None]:
# result.kms()

In [None]:
# result.qms()

In [None]:
result.cjns()

In [None]:
from dox.candidate_network import CandidateNetwork

cjn_data = result.data['candidate_networks'][9]
cjn = CandidateNetwork.from_json_serializable(cjn_data)

In [None]:
cjn.generate_pipeline()

# Running Keyword Search over the selected dataset

In [None]:
dox.execute_mongo_query('user',
[{'$match': {'$expr': {'$regexMatch': {'input': '$name',
                                       'options': 'i',
                                       'regex': 'michelle'}}}},
 {'$lookup': {'as': 'review',
              'foreignField': 'user_id',
              'from': 'review',
              'localField': 'user_id'}},
 # {'$unwind': '$review'},
 {'$lookup': {'as': 'business',
              'foreignField': 'business_id',
              'from': 'business',
              'localField': 'review.business_id'}},
 # {'$unwind': '$business'},
 # {'$match': {'$expr': {'$regexMatch': {'input': '$business.categories',
 #                                       'options': 'i',
 #                                       'regex': 'restaurants'}}}},
 # {'$match': {'$expr': {'$regexMatch': {'input': '$business.categories',
 #                                       'options': 'i',
 #                                       'regex': 'italian'}}}},
{'$project': {'business': 1, 'review': 1, 'user_id': 1}}, {'$limit': 1}]
                       )

In [None]:
# sereia.keyword_search()

In [None]:
# result = sereia.keyword_search(
#     1
# )

## KMs

In [None]:
# result.kms()

## QMs

In [None]:
# result.qms()

## CJNs

In [None]:
# result.cjns()