In [20]:
# pip install pymongo 
# pip install python-dotenv
from pymongo import MongoClient
# the connection string has credentials so cannot be pushed to github, its kept locally on an environment variable on the .env file
import os
from dotenv import load_dotenv
load_dotenv()
# if this is not pushed to github you can just paste the connection string i sent you here:
# otherwise create a file named .env that contains this MONGO_CONNECTION_STRING=mongodb+srv://........mongodb.net with the string i sent you instead of the dots

connection_string = os.environ["MONGO_CONNECTION_STRING"]
client = MongoClient(connection_string)

dragonfly_database = client["dragonfly"]
telegram_database = client["telegram"]



In [8]:
# in a mongo database collections (tables on traditional SQL) contain documents, which are basically dictionaries
# get all collections in a database
print("Collections in dragonfly db:")
print(dragonfly_database.list_collection_names(),end="\n\n")
# get all documents in a collection, the documents are not returned at once, rather you have to iterate through them
all_events = []
for event in dragonfly_database["events"].find():
    all_events.append(event)

print("The events collection has: " + str(len(all_events)) + " documents, the first one is:")
print(all_events[0])

Collections in dragonfly db:
['entities', 'events', 'scraping_reddit']

The events collection has: 11 documents, the first one is:
{'_id': ObjectId('62101e9a7d3722b1d749dde9'), 'event': {'entity': 'DOT', 'type': 'opinion', 'origin': 'reddit', 'value': 'neutral'}, 'timestamp': datetime.datetime(2022, 2, 18, 22, 29, 56), 'metadata': {'scraping_id': '62101e83aa5e8c0ab00950ea', 'chunk_id': '62101e9af2eafd89306bb540', 'text': 'The biggest red flag as such for me is the fact that the projects on Cosmos don’t need ATOM or  Cosmos to survive/exsist, unlike on DOT.', 'services': {'request': {'type': 'subreddit', 'post_limit': 200}, 'scraper': {'text': {'timestamp': 1645223396.0, 'comment_id': 'hxi7pwi', 'post_id': 'svu48s'}, 'version': 'praw-mongo_0'}, 'ner': {'version': 'naivekw_0'}, 'sa': {'version': 'distilroberta-finsa_0', 'model': 'models/distilroberta-finetuned-financial-news-sentiment-analysis'}}}, 'results': {'ner': {'DOT': {'span': [132, 135], 'match': 'DOT', 'domain': 'crypto'}, 'ATOM

In [9]:
# to make a query, for example get the bitcoin entity on the entities collection
dragonfly_database["entities"].find_one({"entity":"BTC"})

{'_id': ObjectId('6206be239e5bd118e0ce5199'),
 'entity': 'BTC',
 'synonims': ['Bitcoin', 'BTC'],
 'domain': 'crypto'}

In [17]:
# a more complex query, aggregating the amount of events that are registered each day for each crypto
# if you don"t want to use an iterator to see the reuslts, use list() instead
list(dragonfly_database.events.aggregate([
        {"$match": {"event.type": "opinion"}},
        {
            "$group": {
                "_id": "$event.entity", "count": {"$sum": 1}
            }
        }]))

[{'_id': 'MANA', 'count': 1},
 {'_id': 'ID', 'count': 2},
 {'_id': 'TIME', 'count': 1},
 {'_id': 'ETH', 'count': 1},
 {'_id': 'CRO', 'count': 2},
 {'_id': 'BTC', 'count': 70},
 {'_id': 'SOL', 'count': 1},
 {'_id': 'DOT', 'count': 1},
 {'_id': 'FTM', 'count': 1},
 {'_id': 'ATOM', 'count': 2},
 {'_id': 'QQQ', 'count': 1},
 {'_id': 'BNB', 'count': 1},
 {'_id': 'WING', 'count': 1},
 {'_id': 'NFT', 'count': 3},
 {'_id': 'MCO', 'count': 1}]

In [16]:
# insert one element to a "test" collection on telegram_bot database, it is not needed to "initialize" the collection
telegram_database["test"].insert_one({
    "test_key":"test_value",
    "test_list":[1,2,3],
    "test_object":{
        "hello":"world",
        "a":["b","c","d"]
    }
})
# let"s check
list(telegram_database["test"].find())

[{'_id': ObjectId('621023f308d56e8903fcca9f'),
  'test_key': 'test_value',
  'test_list': [1, 2, 3],
  'test_object': {'hello': 'world', 'a': ['b', 'c', 'd']}}]

In [None]:
# note the ObjectId that appears, to filter by this internal id (gets generated randomly with inserts), you need to use this import
from bson.objectid import ObjectId
telegram_database["test"].find_one(ObjectId("paste here the numbers that appeared after _id on last block (they are autogenerated each time)"))

In [15]:
# delete everything in a collection
telegram_database["test"].delete_many({})

<pymongo.results.DeleteResult at 0x7f8f3045f980>