In [62]:
import pandas as pd
import pymongo

class MongoDBHandler:
    SCHEMA_MODEL = {
        'msg_type': str,
        'msg_content': str,
        'sender_name': str,
        'msg_sent_time': pd.Timestamp,
        'msg_dist_type': str,
        'time_thread_start': pd.Timestamp,
        'reply_count': int,
        'reply_users_count': int,
        'reply_users': list,
        'tm_thread_end': pd.Timestamp,
        'channel': str
    }

    def __init__(self, host="localhost", port=27017, database_name="slack_messages", collection_name="messages"):
        self.client = pymongo.MongoClient(f"mongodb://{host}:{port}/")
        self.db = self.client[database_name]
        self.collection = self.db[collection_name]

    def validate_document(self, document):
        """
        Validate a document against the schema model.
        """
        for key, expected_type in self.SCHEMA_MODEL.items():
            if key not in document:
                raise ValueError(f"Missing key '{key}' in the document.")

            if not isinstance(document[key], expected_type):
                raise ValueError(f"Invalid type for '{key}'. Expected {expected_type}, got {type(document[key])}.")
    

    def insert_many_documents(self, documents):
        self.collection.insert_many(documents)

    def find_one_document(self, query):
        return self.collection.find_one(query)

    def find(self, query=None, projection=None):
        return list(self.collection.find(query, projection))

    def find_all_documents(self, query=None):
        return list(self.collection.find(query)) if query else list(self.collection.find())

    def close_connection(self):
        self.client.close()

    @staticmethod
    def read_csv_to_documents(csv_file_path):
        return pd.read_csv(csv_file_path).to_dict(orient='records')

csv_file_path = 'slack-data-m.csv'

mongo_handler = MongoDBHandler()

documents = MongoDBHandler.read_csv_to_documents(csv_file_path)

mongo_handler.insert_many_documents(documents)


find one record from mongo db


In [63]:
# Find one document (example query)
query_one = {'sender_name': "Leah Newton"}
result_one = mongo_handler.find_one_document(query_one)
print("Found One Document:", result_one)

Found One Document: {'_id': ObjectId('65694634b65bb69cc03f92ae'), 'Unnamed: 0': 0, 'msg_type': 'message', 'msg_content': 'Check this site. It mighy help\n<https://www.tutorialspoint.com/python_data_science/python_data_aggregation.htm|https://www.tutorialspoint.com/python_data_science/python_data_aggregation.htm>', 'sender_name': 'Leah Newton', 'msg_sent_time': 1661238355.026149, 'msg_dist_type': 'text', 'time_thread_start': 1661237526.480599, 'reply_count': 0, 'reply_users_count': 0, 'reply_users': '0', 'tm_thread_end': 0.0, 'channel': 1}


find all record from mongo db
where sender name "Leah Newton" from channel "1"

In [64]:
query_find = {'sender_name': "Leah Newton", 'channel': 1}
find_results = mongo_handler.find(query_find)
print(find_results)

[{'_id': ObjectId('65694634b65bb69cc03f92ae'), 'Unnamed: 0': 0, 'msg_type': 'message', 'msg_content': 'Check this site. It mighy help\n<https://www.tutorialspoint.com/python_data_science/python_data_aggregation.htm|https://www.tutorialspoint.com/python_data_science/python_data_aggregation.htm>', 'sender_name': 'Leah Newton', 'msg_sent_time': 1661238355.026149, 'msg_dist_type': 'text', 'time_thread_start': 1661237526.480599, 'reply_count': 0, 'reply_users_count': 0, 'reply_users': '0', 'tm_thread_end': 0.0, 'channel': 1}, {'_id': ObjectId('65694634b65bb69cc03f92b0'), 'Unnamed: 0': 2, 'msg_type': 'message', 'msg_content': 'Important site for working with missing data in pandas. <https://www.geeksforgeeks.org/working-with-missing-data-in-pandas/|https://www.geeksforgeeks.org/working-with-missing-data-in-pandas/>', 'sender_name': 'Leah Newton', 'msg_sent_time': 1661238426.371919, 'msg_dist_type': 'text', 'time_thread_start': 0.0, 'reply_count': 0, 'reply_users_count': 0, 'reply_users': '0'

return all records from the database where channel is 1

In [65]:
query_all = {'channel':1}
results_all = mongo_handler.find_all_documents()
print("Found All Documents:", results_all)

