In [None]:
import os
import pandas as pd
import pymongo
import certifi

MySQL connection arguments (not used in MongoDB operations)

In [None]:
mysql_args = {
    "uid": "root",
    "pwd": "Laxbro22",
    "hostname": "localhost",
    "dbname": "northwind_dw2"
}

MongoDB connection arguments

In [None]:
mongodb_args = {
    "user_name": "",
    "password": "",
    "cluster_name": "",
    "cluster_subnet": "",
    "cluster_location": "local",  # Set to "atlas" if using MongoDB Atlas
    "db_name": "northwind_dw2"
}

In [None]:
def get_mongo_client(**args):
    if args["cluster_location"] not in ['atlas', 'local']:
        raise Exception("Specify 'atlas' or 'local' for the cluster_location parameter.")
    elif args["cluster_location"] == "atlas":
        connect_str = f"mongodb+srv://{args['user_name']}:{args['password']}@{args['cluster_name']}.{args['cluster_subnet']}.mongodb.net"
        client = pymongo.MongoClient(connect_str, tlsCAFile=certifi.where())
    else:  # "local"
        client = pymongo.MongoClient("mongodb://localhost:27017/")
    return client

In [None]:
def set_mongo_collection(client, db_name, csv_path):
    db = client[db_name]
    collection_name = "RetailSales"  # Single collection for the dataset
    db[collection_name].drop()  # Drop existing collection if it exists
    
    try:
        df = pd.read_csv(csv_path, encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(csv_path, encoding='ISO-8859-1')
        
    records = df.to_dict(orient='records')
    db[collection_name].insert_many(records)
    client.close()

In [None]:
def get_mongo_dataframe(mongo_client, db_name, collection, query={}):
    db = mongo_client[db_name]
    dframe = pd.DataFrame(list(db[collection].find(query)))
    if '_id' in dframe.columns:
        dframe.drop(['_id'], axis=1, inplace=True)
    mongo_client.close()
    return dframe

Set up MongoDB collection and insert data

In [None]:
file_path = os.path.join(os.getcwd(), 'retail_sales_dataset.csv')  # Path to your dataset
client = get_mongo_client(**mongodb_args)
set_mongo_collection(client, mongodb_args["db_name"], file_path)

Sample Query: Total Sales by Product Category

In [None]:
client = get_mongo_client(**mongodb_args)
db = client[mongodb_args["db_name"]]
query_aggregation = [
    {"$group": {
        "_id": "$Product Category",
        "TotalSales": {"$sum": "$Total Amount"}
    }}
]
result = list(db["RetailSales"].aggregate(query_aggregation))
print("Total Sales by Product Category:")
for doc in result:
    print(doc)

Example Query: Monthly Sales Totals

In [None]:
query_aggregation = [
    {"$project": {
        "Month": {"$month": {"$dateFromString": {"dateString": "$Date"}}},
        "Total Amount": 1
    }},
    {"$group": {
        "_id": "$Month",
        "MonthlySales": {"$sum": "$Total Amount"}
    }},
    {"$sort": {"_id": 1}}
]
result = list(db["RetailSales"].aggregate(query_aggregation))
print("\nMonthly Sales Totals:")
for doc in result:
    print(doc)

Close MongoDB client connection

In [None]:
client.close()
print("MongoDB connection closed.")