## IMPORTANT
1. Run Milvus Docker first

## Scheme Preparation

Imports

In [2]:
from pymilvus import connections, DataType, CollectionSchema, FieldSchema, Collection, Partition, utility
import openai
import pandas as pd
import numpy as np
import re
import json
from openai.embeddings_utils import get_embedding
import time
from tqdm import tqdm

Constants

In [45]:
OPENAI_API_KEY = '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'
embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"
max_tokens = 8000
dimensions =1536
openai.api_key = OPENAI_API_KEY

Mutable variables

In [101]:
partition_name = 'usjr_about'
bundled_schema = {'rmrj_articles': ['author', 'title', 'published_date', 'text'],
                  'facebook_posts': ['text', 'time', 'link'],
                  'usjr_about': ['text', 'content_id'],
                  'all': ['author', 'title', 'published_date', 'text', 'time', 'post', 'link', 'content_id']}
collection_names = bundled_schema[partition_name]
json_path = f'raw_jsons/{partition_name}.json'
description = 'description'

Function definitions:

In [102]:
def get_embedding(text, model=embedding_model):
   text = text.replace("\n", " ")
   return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']

Connection

In [103]:
# Check if the connection already exists
if connections.has_connection('default'):
    connections.remove_connection('default')  # Disconnect if it exists

# Now, reconnect with your new configuration
connections.connect(alias='default', host='localhost', port='19530')

Drop collection

In [67]:
for name in collection_names:
    utility.drop_collection(f"{name}_collection")
utility.list_collections()

['LangChainCollection']

Collection schema definition

In [104]:
collections = {}  # To store the created collections

for name in collection_names:
    if name not in utility.list_collections():
        fields = [
            FieldSchema(name="uuid", dtype=DataType.VARCHAR, is_primary=True, max_length=36),
            FieldSchema(name=name, dtype=DataType.VARCHAR, max_length=5000),
            FieldSchema(name="embeds", dtype=DataType.FLOAT_VECTOR, dim=dimensions)
        ]

        schema = CollectionSchema(fields=fields, description=f"Collection for {name}")

        # Create the collection and store it in the dictionary
        collections[name] = Collection(name=f"{name}_collection", schema=schema)

List collections

In [105]:
utility.list_collections()

['title_collection',
 'text_collection',
 'author_collection',
 'link_collection',
 'LangChainCollection',
 'published_date_collection',
 'time_collection',
 'content_id_collection']

Partition creation

In [106]:
for collection in collections.values():
    partition = Partition(collection, partition_name)

List partitions

In [107]:
for collection in collections.values():
    display(collection.partitions)

[{"name": "_default", "collection_name": "text_collection", "description": ""},
 {"name": "rmrj_articles", "collection_name": "text_collection", "description": ""},
 {"name": "facebook_posts", "collection_name": "text_collection", "description": ""},
 {"name": "usjr_about", "collection_name": "text_collection", "description": ""}]

[{"name": "_default", "collection_name": "content_id_collection", "description": ""},
 {"name": "usjr_about", "collection_name": "content_id_collection", "description": ""}]

Index definition

In [108]:
index_params = {
  "metric_type": "L2", # Euclidean distance
  "index_type": "FLAT", # FLAT index type
  "params": {} # No additional parameters needed for FLAT
}

Index creation

In [109]:
for collection in collections.values():
    collection.create_index("embeds", index_params)

## Data Processing

Data loading

In [110]:
with open(json_path) as f:
    data = json.load(f)

Lowercasing dictionary

In [111]:
for row in data:
    # Check if the second element of the row is a dictionary
    if isinstance(row[1], dict):
        # Create a new dictionary with keys in lowercase
        new_dict = {k.lower(): v for k, v in row[1].items()}
        
        # Check if 'published date' is a key in the new dictionary
        if 'published date' in new_dict:
            # If it is, rename it to 'published_date'
            new_dict['published_date'] = new_dict.pop('published date')
        
        # Replace the old dictionary with the new one
        row[1] = new_dict

In [117]:
data[0][2]

'RMRJ offers open access to its contents on the principle that it supports a greater global exchange of knowledge. Hence, it does not charge its readers any subscription fee to access full text of all its articles.  Permission to read, download, and print from the publisher or author is not necessary. Also, the journal accepts articles for publication at no cost on the part of the author.RMRJ is licensed under a Creative Commons Attribution-Noncommercial 4.0 International (CC BY-NC 4.0).Authors grant the publisher an exclusive publication right but retain copyright in their article. In this case, the author/s have the right to (a) share their article in the same ways permitted to third parties under the relevant user license so long as it contains the publisherâ€™s logo, and a link to the version of record on Recoletos Multidisciplinary Research Journal; (b) retain patent, trademark and other intellectual property rights (including research data); and (c) proper attribution and credit 

Time refactoring

In [34]:
from datetime import datetime

def change_date_format(date_string):
    date_object = datetime.strptime(date_string, '%Y-%m-%d')
    return date_object.strftime('%Y-%m-%d %B %d, %Y')

In [None]:
from datetime import datetime

def change_time_format(time_string):
    dt = datetime.strptime(time_string, "%Y-%m-%dT%H:%M:%S")
    formatted_time = dt.strftime("%Y-%m-%d %B %d, %Y %H:%M:%S")
    return formatted_time

for item in data:
    item[1]['time']=change_time_format(item[1]['time'])

In [38]:
for item in data:
    item[1]['published_date']= change_date_format(item[1]['published_date'])

In [39]:
data[0]

['111553fe-23fc-45e4-ad46-0c56b61aee0e',
 {'chunk': 0,
  'text': 'title: Timeless Existence and Principle of Creation: Notions Embedded in John 1:1, "In the Beginning Was the Word", keywords: John 1:1, Word, beginning, timeless existence, principle of creation, intentionality, author: Emiliano C. De Catalina, doi: https://doi.org/10.32871/rmrj2210.01.01, abstract: St. John\'s Gospel begins with a prologue, serving as an overture to the whole Gospel. This paper investigates the philosophical notions embedded in the first three lines of John 1:1. The inquiry focuses on whether or not the accepted meaning of this line as "indicating timeless existence" can be deduced from John 1:1 and whether or not John 1:1 also indicates the meaning of the "principle of creation." This paper proceeds to make this inquiry in the following order: Introduction; The questions arising in John 1:1; Word as God is eternal, outside time; "In the beginning" as predicate; "The Word was in the beginning"; Timeless

Dividing attributes to their corresponding collection (based on collection_names above)

In [118]:
data_lists = {f"{name}_obj": [] for name in collection_names}

for record in data:
    for name in collection_names:
        if name in record[1]:
            data_lists[f"{name}_obj"].append(record[1][name])
        else:
            print(f"The key '{name}' is not in the record.")

In [119]:
for name in collection_names:
    print(name, " - ", data_lists[f'{name}_obj'][0])

text  -  RMRJ offers open access to its contents on the principle that it supports a greater global exchange of knowledge. Hence, it does not charge its readers any subscription fee to access full text of all its articles.  Permission to read, download, and print from the publisher or author is not necessary. Also, the journal accepts articles for publication at no cost on the part of the author.

RMRJ is licensed under a Creative Commons Attribution-Noncommercial 4.0 International (CC BY-NC 4.0).

Authors grant the publisher an exclusive publication right but retain copyright in their article. In this case, the author/s have the right to (a) share their article in the same ways permitted to third parties under the relevant user license so long as it contains the publisherâ€™s logo, and a link to the version of record on Recoletos Multidisciplinary Research Journal; (b) retain patent, trademark and other intellectual property rights (including research data); and (c) proper attribution

Save uuids as list

In [120]:
uuid_list = []
for item in data:
    uuid_list.append(item[0])
uuid_list

['34f773a3-9cf1-4276-9965-1016d005258c',
 '9d67d88e-8fa9-40d7-b99f-d524d665825e',
 'ca294847-da13-405c-ab34-78828480d9d0',
 '1943d181-4f6f-434f-9fae-cd9fb404f0ca',
 'eaeb718b-c86a-4eb1-9f42-d9eed0d32a78',
 'b0c99c2b-03b2-42f8-9a76-5ffc483b8e45',
 '33464ac0-cfd8-468f-8164-d20f91489bee',
 '0d76a721-0772-4951-8d7e-8bec4dc6f46c',
 '768c2724-dcfb-44e2-88a4-495a96b0eb71',
 'c298635c-6e18-4149-99e0-1aea72a0eb48',
 'e4ce7220-177d-4f27-90be-caa08d1d0fe9',
 '3048aa54-88b5-43cb-80be-152a014794d0',
 '45fb753d-50cc-47d8-a5a1-ff8f398c5325',
 '5ccb78a2-632e-436e-93b4-f12d67c63a62',
 '22825fc4-caff-4e89-82fe-41396090fdb5',
 '0df153b4-b7b4-4e42-a334-7b07765bd823',
 '22579de5-8b56-4c6b-aa78-fa7f64cf5a6c',
 '1eedbb34-a099-405c-9c30-d6c4737c7ab6',
 '01569648-dbe5-4872-96d1-f069992a7a8e',
 'b39680f8-6e94-4b92-8601-fc049b81125b',
 'f7334cc5-692f-4526-bd1a-602485b674ca',
 'c55c1aa9-9880-4ac7-b82c-25032c3ae5c1',
 '35f5a888-7a70-4ea4-92bb-9c7af06d0308',
 '8849f666-7948-49fb-a4f7-20545408880b',
 '7b222372-d89d-

Accessing data_lists

Embeddings

In [121]:
import string
json_path = "json_per_collection/"
def get_data_embeds(collection_names, data_lists, uuid_list):
    data_lists_embeds = {f"{name}_obj": [] for name in collection_names}
    for name in collection_names:
        for item, id_uuid in zip(tqdm(data_lists[f'{name}_obj'], desc=f'Processing {name}'), uuid_list):
            item_lower = item.lower()
            print(item_lower)
            embedding = get_embedding(item_lower)
            data_lists_embeds[f'{name}_obj'].append(embedding)
            time.sleep(1)  # Add a time break of 1 second (adjust as needed)
    return data_lists_embeds

def create_obj_data(collection_names, data_lists, uuid_list):
    data_lists_embeds = get_data_embeds(collection_names, data_lists, uuid_list)
    obj_list = {}
    for name in collection_names:
        obj_data = [
            {
                'uuid': id_uuid,
                f'{name}': item,
                'embeds': embedding
            } 
            for item, id_uuid, embedding in zip(data_lists[f'{name}_obj'], uuid_list, data_lists_embeds[f'{name}_obj'])
        ]
        obj_list[name] = obj_data
    return obj_list

def save_obj_data_to_json(obj_list):
    for name, obj_data in obj_list.items():
        with open(f'{json_path}{partition_name}_{name}.json', 'w') as file:
            json.dump(obj_data, file)

In [122]:
obj_list = create_obj_data(collection_names, data_lists, uuid_list)
save_obj_data_to_json(obj_list)

Processing text:   0%|                                   | 0/31 [00:00<?, ?it/s]

rmrj offers open access to its contents on the principle that it supports a greater global exchange of knowledge. hence, it does not charge its readers any subscription fee to access full text of all its articles.  permission to read, download, and print from the publisher or author is not necessary. also, the journal accepts articles for publication at no cost on the part of the author.

rmrj is licensed under a creative commons attribution-noncommercial 4.0 international (cc by-nc 4.0).

authors grant the publisher an exclusive publication right but retain copyright in their article. in this case, the author/s have the right to (a) share their article in the same ways permitted to third parties under the relevant user license so long as it contains the publisherâ€™s logo, and a link to the version of record on recoletos multidisciplinary research journal; (b) retain patent, trademark and other intellectual property rights (including research data); and (c) proper attribution and cred

Processing text:   3%|▊                          | 1/31 [00:01<00:43,  1.46s/it]

the recoletos multidisciplinary research journal (rmrj) is the official bi-annual journal of the university of san jose-recoletos (usj-r) center for policy, research, and development studies (cprds).  being an internationally peer-reviewed journal, rmrj adopts the double-blind review process wherein the reviewer/s and the author/s do not know each other's identity.


Processing text:   6%|█▋                         | 2/31 [00:02<00:39,  1.37s/it]

rmrj is made up of internationally renowned scholars in the journal's subject. they offer professional opinions on significant journal policies and content. the editorial team members improve and strengthen the quality, integrity, reputation, and sustainability of our publication through their field of expertise and substantial work as scientists and researchers.
the journal adheres to the cope code of conduct for journal editors to ensure fair and unbiased appraisal, confidentiality, non-competing interest compliance, and editorial duty and accountability for all submitted articles.


Processing text:  10%|██▌                        | 3/31 [00:04<00:38,  1.39s/it]

the recoletos multidisciplinary research journal (rmrj) is committed to upholding the highest standards of publication ethics and takes all possible measures against publication malpractices. rmrj commits herself to objective and fair double-blind peer-review of the submitted for publication works and to prevent any actual or potential conflict of interests between the editorial and review personnel and the reviewed material. any departures from the stipulated guidelines should be reported directly to the editor-in-chief, who is unequivocally committed to providing swift resolutions to any such problems.


Processing text:  13%|███▍                       | 4/31 [00:05<00:36,  1.35s/it]

authors must strictly adhere to the format and style of the journal to avoid manuscript rejection. hence, authors are encouraged to carefully read the instructions for authors before submitting their manuscript.


Processing text:  16%|████▎                      | 5/31 [00:06<00:34,  1.33s/it]

rmrj is dedicated to the promotion of knowledge through high-quality research publication in various disciplines. it adheres to the policy that all articles contained therein must meet the rigors of an independent double-blind peer-reviewing system and editing to ensure that the publication possesses scientific and academic merit.


Processing text:  19%|█████▏                     | 6/31 [00:08<00:32,  1.32s/it]

rmrj welcomes submission of quality researches in any of the following academic domains:

accountancy, business and management, and finance;
communication, humanities, psychology, and religion;
education and educational management;
engineering, mathematics, statistics, and technology;
environment, health, and natural sciences;
philosophical and mathematical reviews; and
politics and governance, and social sciences.


Processing text:  23%|██████                     | 7/31 [00:09<00:33,  1.38s/it]

rmrj is dedicated to promoting knowledge through high-quality research publications in various disciplines. she is committed to editorial independence, diversity, and equity. submissions from people of different backgrounds and geographic locations are welcome. submissions are assigned to editors who will do the initial review. should the manuscript be suitable for consideration by rmrj, the paper will be sent to at least two independent peer reviewers. the peer reviewers' assessments are used to inform the associate editor's decision on whether or not to recommend publication. endorsed papers will be forwarded to the chief editor for final approval. 

we do not put up with rude behavior or letters directed at our editors, staff, or other people helping us publish. we have the right to take appropriate measures to safeguard others from it. this situation may involve, for instance, withdrawing a manuscript from consideration or objecting to offensive remarks made by peers.


Processing text:  26%|██████▉                    | 8/31 [00:10<00:31,  1.35s/it]

rmrj is dedicated to promoting knowledge through high-quality research publications in various disciplines. she is committed to editorial independence, diversity, and equity. submissions from people of different backgrounds and geographic locations are welcome. submissions are assigned to editors who will do the initial review. should the manuscript be suitable for consideration by rmrj, the paper will be sent to at least two independent peer reviewers. the peer reviewers' assessments are used to inform the associate editor's decision on whether or not to recommend publication. endorsed papers will be forwarded to the chief editor for final approval. 

we do not put up with rude behavior or letters directed at our editors, staff, or other people helping us publish. we have the right to take appropriate measures to safeguard others from it. this situation may involve, for instance, withdrawing a manuscript from consideration or objecting to offensive remarks made by peers.


Processing text:  29%|███████▊                   | 9/31 [00:12<00:29,  1.34s/it]

peer review (also known as refereeing) is the process of subjecting an author's scholarly work, research, or ideas to the scrutiny of others who are experts in the same field. it requires a community of experts in a given (and often narrowly defined) field who are qualified and able to perform impartial reviews. likewise, it also refers to the work done while screening submitted manuscripts and funding applications. this normative process encourages authors to meet the accepted standards of their discipline. it prevents disseminating unwarranted claims, unacceptable interpretations, and personal views. peer review increases the probability that weaknesses will be identified and fixed. for both grant funding and publication in a scholarly journal, it is also usually required that the subject be both novel and substantial.

reviewers and editors are responsible for providing a constructive and prompt evaluation of submitted research papers based on the significance of their contribution 

Processing text:  32%|████████▍                 | 10/31 [00:13<00:28,  1.35s/it]

peer review (also known as refereeing) is the process of subjecting an author's scholarly work, research, or ideas to the scrutiny of others who are experts in the same field. it requires a community of experts in a given (and often narrowly defined) field who are qualified and able to perform impartial reviews. likewise, it also refers to the work done while screening submitted manuscripts and funding applications. this normative process encourages authors to meet the accepted standards of their discipline. it prevents disseminating unwarranted claims, unacceptable interpretations, and personal views. peer review increases the probability that weaknesses will be identified and fixed. for both grant funding and publication in a scholarly journal, it is also usually required that the subject be both novel and substantial.

reviewers and editors are responsible for providing a constructive and prompt evaluation of submitted research papers based on the significance of their contribution 

Processing text:  35%|█████████▏                | 11/31 [00:14<00:26,  1.34s/it]

a manuscript is accepted when

it is endorsed for publication by at least two referees,
it substantially complies with the instructions of the reviewers;
it complies with the ethical standards and protocols with studies involving humans and animals;
the manuscript passed the plagiarism detection test with a score of at most 10% similarity index and a grammarly rating of 95% or more. otherwise, the manuscript is returned to the author(s).
the referees' evaluations include an explicit recommendation of what to do with the manuscript, chosen from options provided by the journal. most recommendations are along the following lines:

accept without revisions
accept with minor revisions
accept with major revisions
reject with the option to resubmit
reject
in situations where the referees disagree substantially about the quality of work, there are several strategies for reaching a decision. when the editor receives positive and negative reviews for the same manuscript, the board will solicit o

Processing text:  39%|██████████                | 12/31 [00:16<00:28,  1.48s/it]

a manuscript is accepted when

it is endorsed for publication by at least two referees,
it substantially complies with the instructions of the reviewers;
it complies with the ethical standards and protocols with studies involving humans and animals;
the manuscript passed the plagiarism detection test with a score of at most 10% similarity index and a grammarly rating of 95% or more. otherwise, the manuscript is returned to the author(s).
the referees' evaluations include an explicit recommendation of what to do with the manuscript, chosen from options provided by the journal. most recommendations are along the following lines:

accept without revisions
accept with minor revisions
accept with major revisions
reject with the option to resubmit
reject
in situations where the referees disagree substantially about the quality of work, there are several strategies for reaching a decision. when the editor receives positive and negative reviews for the same manuscript, the board will solicit o

Processing text:  42%|██████████▉               | 13/31 [00:18<00:25,  1.44s/it]

rmrj welcomes the submission of comments on previous articles. comments on articles previously published in the journal will generally be reviewed by two reviewers, usually an author of the original article (to assist the editor in evaluating whether the submitted comment represents the previous article's accuracy) and an independent reviewer. the original author will be invited to reply if a comment is accepted for publication. all other editorial requirements, as enumerated above, apply to proposed comments.


Processing text:  45%|███████████▋              | 14/31 [00:19<00:24,  1.44s/it]

rmrj welcomes the submission of comments on previous articles. comments on articles previously published in the journal will generally be reviewed by two reviewers, usually an author of the original article (to assist the editor in evaluating whether the submitted comment represents the previous article's accuracy) and an independent reviewer. the original author will be invited to reply if a comment is accepted for publication. all other editorial requirements, as enumerated above, apply to proposed comments.


Processing text:  48%|████████████▌             | 15/31 [00:20<00:23,  1.46s/it]

rmrj makes it a point of adhering to cope's principles of transparency and best practice in scholarly publishing, and we encourage our publishing partners to do the same.


Processing text:  52%|█████████████▍            | 16/31 [00:22<00:21,  1.42s/it]

rmrj makes it a point of adhering to cope's principles of transparency and best practice in scholarly publishing, and we encourage our publishing partners to do the same.


Processing text:  55%|██████████████▎           | 17/31 [00:23<00:19,  1.43s/it]

the names and email addresses entered in this journal site will be used exclusively for the stated purposes of this journal and will not be made available for any other purpose or to any other party.


Processing text:  58%|███████████████           | 18/31 [00:25<00:18,  1.40s/it]

the names and email addresses entered in this journal site will be used exclusively for the stated purposes of this journal and will not be made available for any other purpose or to any other party.


Processing text:  61%|███████████████▉          | 19/31 [00:26<00:16,  1.37s/it]

the names and email addresses entered in this journal site will be used exclusively for the stated purposes of this journal and will not be made available for any other purpose or to any other party.


Processing text:  65%|████████████████▊         | 20/31 [00:27<00:15,  1.37s/it]

the names and email addresses entered in this journal site will be used exclusively for the stated purposes of this journal and will not be made available for any other purpose or to any other party.


Processing text:  68%|█████████████████▌        | 21/31 [00:29<00:13,  1.36s/it]

jessica magallon- avenido, ph.d., university of san jose-recoletos, philippines


Processing text:  71%|██████████████████▍       | 22/31 [00:30<00:12,  1.38s/it]

dr. agnes c. sequino, university of san jose-recoletos, philippines
dr. ravindra c. joshi, cabi-sea, malaysia
dr. erwin faller, san pedro college, philippines
enrique g. oracion, ph.d., silliman university, philippines
dr. jay p. picardal, cebu normal university, philippines
dr. roger lincoln radix, st. george university, university center grenada, united states
dr. gaurang dattubhai rami, veer narmad south gujarat university, india
dr. brian a. vasquez, majmaah university, kingdom of saudi arabia
dr. diane bandow, troy university, united states of america
dr. glenn g. pajares, university of san jose-recoletos, philippines
dr. william j. heisler, troy university, united states of america
dr. lanndon a. ocampo, cebu technological university, philippines
dr. rene m. odendaal, university of south africa, south africa
dr. robert halliman, austin peay state university, united states of america
dr. mohammed seghir halimi, university of kasdi merbah ouargla, algeria
dr. djuwari djuwari, unive

Processing text:  74%|███████████████████▎      | 23/31 [00:31<00:10,  1.36s/it]

mr. jesse sagayno susada, university of san jose-recoletos, philippines
mrs milagros b. baclayon, university of san jose-recoletos, philippines
miss chanine f. sevilla, university of san jose-recoletos, philippines
mrs. ingrid s. ramos, university of san jose-recoletos, philippines


Processing text:  77%|████████████████████▏     | 24/31 [00:33<00:09,  1.36s/it]

rmrj picks its reviewers through its editorial office. when a manuscript arrives, an editor solicits reviews from scholars or other experts to referee the manuscript. the identities of the referees selected by the editorial board are kept unknown to research authors. however, the reviewer's identity can be disclosed under some particular circumstances. disclosure of peer review can be granted under the following grounds: as evidence to prove that the published paper underwent peer review as required by the university for ranking and financial incentives, for regulatory bodies such as the commission on higher education, accreditation of academic programs. requests for peer review results shall be made in writing.


Processing text:  81%|████████████████████▉     | 25/31 [00:34<00:08,  1.35s/it]

all authors submitting their works to the rmrj for publication as original articles attest that the submitted works represent their authors' contributions and have not been copied or plagiarized in whole or in part from other works.

rmrj adheres to cope's first two requirements in defining authorship:  a. making a substantial contribution to the work and b. being accountable for the work and its published form. substantial contribution includes revising the manuscript to include important intellectual content.

corresponding authors are to use their institutional email in the submission process. all contributing authors are to be declared upon submission. their names, orcid number, and affiliation are to be written on the cover page and submitted separately in the journal's portal as a separate file. only authors whose names are found on the cover page upon submission are acknowledged as authors.   individuals who do not meet the criteria for authorship may be mentioned in the acknowl

Processing text:  84%|█████████████████████▊    | 26/31 [00:35<00:06,  1.34s/it]

we reserve the right to review all contributions using proper plagiarism detection software, such as turnitin. the journal permits a similarity rating of no more than 10%. submissions that have more than the permitted rating will be rejected or given the option to resubmit. we will adhere to the procedures indicated in the retractions section of these guidelines if plagiarism is found after publication. any allegations of plagiarism should be reported to us by our readers, reviewers, and editors.


Processing text:  87%|██████████████████████▋   | 27/31 [00:37<00:05,  1.33s/it]

rmrj does not endorse significant publication overlap. when overlap is necessary, it is only permitted if the manuscript may advance the field of study, has the explicit endorsement of the original publication, and cites the original source. (cambridge university press, version 4.0, september 20, 2021).


Processing text:  90%|███████████████████████▍  | 28/31 [00:38<00:03,  1.32s/it]

the authors declare that they have revealed all actual and potential conflicts of interest and any partial advantages related to their work. additionally, rmrj demands that writers include a statement about their funding. additionally, editors and reviewers are expected to disclose any potential conflicting interests that can compromise the impartiality or integrity of a publication.


Processing text:  94%|████████████████████████▎ | 29/31 [00:39<00:02,  1.33s/it]

rmrj offers open access to its contents on the principle that it supports a greater global knowledge exchange. hence, it does not charge its readers any subscription fee to access the full text of all its articles. permission to read, download, and print from the publisher or author is unnecessary. also, the journal accepts articles for publication at no cost on the author's part.

rmrj is licensed under a creative commons attribution-noncommercial 4.0 international (cc by-nc 4.0).
authors grant the publisher an exclusive publication right but retain copyright in their article. in this case, the author/s have the right to (a) share their article in the same ways permitted to third parties under the relevant user license so long as it contains the publisher's logo and a link to the version of record on recoletos multidisciplinary research journal; (b) retain patent, trademark and other intellectual property rights (including research data); and (c) proper attribution and credit for the 

Processing text:  97%|█████████████████████████▏| 30/31 [00:41<00:01,  1.34s/it]

all appeals and complaints are to be in writing and addressed to the editor in chief of the rmrj. these concerns are to be addressed by the members of the rmrj editorial board. emails are to be sent to: recoletos_journal@usjr.edu.ph.


Processing text: 100%|██████████████████████████| 31/31 [00:42<00:00,  1.37s/it]
Processing content_id:   0%|                             | 0/31 [00:00<?, ?it/s]

eb87b395-1abc-4520-bcae-5da4d1054aeb


Processing content_id:   3%|▋                    | 1/31 [00:01<00:39,  1.33s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:   6%|█▎                   | 2/31 [00:02<00:37,  1.31s/it]

86834f4d-7748-4bf2-92c4-88a7f4fe2e7a


Processing content_id:  10%|██                   | 3/31 [00:04<00:37,  1.34s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  13%|██▋                  | 4/31 [00:05<00:41,  1.53s/it]

a8df567b-ab68-4599-9bbb-33899f7aa9b7


Processing content_id:  16%|███▍                 | 5/31 [00:07<00:39,  1.50s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:  19%|████                 | 6/31 [00:08<00:36,  1.47s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:  23%|████▋                | 7/31 [00:10<00:34,  1.44s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:  26%|█████▍               | 8/31 [00:11<00:32,  1.42s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  29%|██████               | 9/31 [00:12<00:30,  1.39s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:  32%|██████▍             | 10/31 [00:14<00:29,  1.39s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  35%|███████             | 11/31 [00:15<00:27,  1.37s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:  39%|███████▋            | 12/31 [00:16<00:25,  1.36s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  42%|████████▍           | 13/31 [00:18<00:24,  1.35s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:  45%|█████████           | 14/31 [00:19<00:23,  1.35s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  48%|█████████▋          | 15/31 [00:22<00:28,  1.81s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:  52%|██████████▎         | 16/31 [00:24<00:27,  1.81s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  55%|██████████▉         | 17/31 [00:25<00:23,  1.67s/it]

c48bddb8-cdab-4e22-9020-138560775d68


Processing content_id:  58%|███████████▌        | 18/31 [00:26<00:20,  1.55s/it]

761186d4-a6f6-45e2-ad12-1755a47a0807


Processing content_id:  61%|████████████▎       | 19/31 [00:28<00:17,  1.50s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  65%|████████████▉       | 20/31 [00:29<00:15,  1.45s/it]

a8df567b-ab68-4599-9bbb-33899f7aa9b7


Processing content_id:  68%|█████████████▌      | 21/31 [00:30<00:14,  1.41s/it]

86834f4d-7748-4bf2-92c4-88a7f4fe2e7a


Processing content_id:  71%|██████████████▏     | 22/31 [00:32<00:12,  1.42s/it]

86834f4d-7748-4bf2-92c4-88a7f4fe2e7a


Processing content_id:  74%|██████████████▊     | 23/31 [00:33<00:11,  1.39s/it]

86834f4d-7748-4bf2-92c4-88a7f4fe2e7a


Processing content_id:  77%|███████████████▍    | 24/31 [00:35<00:09,  1.40s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  81%|████████████████▏   | 25/31 [00:36<00:09,  1.54s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  84%|████████████████▊   | 26/31 [00:38<00:07,  1.47s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  87%|█████████████████▍  | 27/31 [00:39<00:05,  1.42s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  90%|██████████████████  | 28/31 [00:40<00:04,  1.41s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  94%|██████████████████▋ | 29/31 [00:42<00:03,  1.60s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id:  97%|███████████████████▎| 30/31 [00:44<00:01,  1.52s/it]

7adc6410-bb74-4faa-9cec-9b3a4bab3718


Processing content_id: 100%|████████████████████| 31/31 [00:45<00:00,  1.47s/it]


Loading

In [123]:
json_path = "json_per_collection/" 
def open_json(filename):
    with open(filename + ".json") as file:
        return json.load(file)

obj_list = {}
for name in collection_names:
    obj_list[name] = open_json(json_path + f"{partition_name}_{name}")

for name in collection_names:
    for obj in obj_list[name]:
        if len(obj[name]) > 5000:
            obj[name] = obj[name][:2480]


Upserting

In [124]:
for name in collection_names:
    collection = Collection(f"{name}_collection")
    print(collection.insert(obj_list[name], partition_name=partition_name))


(insert count: 31, delete count: 0, upsert count: 0, timestamp: 442914952394833928, success count: 31, err count: 0)
(insert count: 31, delete count: 0, upsert count: 0, timestamp: 442914952407416840, success count: 31, err count: 0)


In [125]:
print(collection.flush())

None
