In [1]:
# Standard library imports
import sys
import subprocess
from pathlib import Path

# Import Dependencies
import psycopg2
from pymongo import MongoClient
from pprint import pprint
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func

# Local application imports
from utils import fetch_api_data, load_config, write_to_csv

In [2]:
# Load configuration from a JSON file
config = load_config('config.json')

In [3]:
# Retrieve the database credentials from the configuration
mongodb_user = config.get('mongodb_cluster', {}).get('user')
mongodb_pswd = config.get('mongodb_cluster', {}).get('password')

# Check if the API key was found in the configuration
if not mongodb_user or not mongodb_pswd:
    print("MongoDB credentials not found in the configuration file.")
    sys.exit()  # Exit the script if the key is missing


In [4]:
# Load data in MongoDB using the CSV file
def mongoImportFromCSV(mongo_uri, db_name, collection_name, csv_file_location ):
    # Build the mongoimport command
    command = [
        'mongoimport',
        '--uri', mongo_uri,
        '--db', db_name,
        '--collection', collection_name,
        '--type', 'csv',
        '--headerline',  # Assumes the first line of the CSV file contains column headers
        '--file', str(csv_file_location)
    ]
    # Execute the command
    try:
        subprocess.run(command, check=True)
    except subprocess.CalledProcessError as e:
        print(f"Error importing {csv_file_location}: {e}")
    
    print(f"Imported {csv_file_location} into collection {collection_name}.")


In [5]:
# Load data in MongoDB using the CSV file
def writeDataToDB(collection_name, csv_file_location):
    mongoImportFromCSV(mongo_uri,db_name,collection_name,csv_file_location)

In [6]:
def getDatabase(mongo, db_name):    
    print(mongo.list_database_names())
    db = mongo[f'{db_name}']  
    return db

def checkCollectionNames(db):      
    return db.list_collection_names()

In [7]:
# assign the collections to variables
# subcategory = db['subcategory']
# contacts = db['contacts']
# campaign = db['campaign']

In [8]:
# Create a query that finds the all documents in the category collection
def getCategoryCount(db):   
    category = db['category']
    query = {}
    results = category.find(query)
    # Print the number of results
    print("Number of crowdfunding categories:", category.count_documents({}))
    for i in range(9):
        pprint(results[i])
    return category.count_documents({})

In [9]:
def populateDatabase(db_name, mongo_uri):
    # Database name    
    
    mongoImportFromCSV(mongo_uri,db_name,'contacts','Resources/contacts.csv')
    mongoImportFromCSV(mongo_uri,db_name,'subcategory','Resources/subcategory.csv')
    mongoImportFromCSV(mongo_uri,db_name,'category','Resources/category.csv')
    mongoImportFromCSV(mongo_uri,db_name,'campaign','Resources/campaign.csv')
    


In [10]:
def createCrowdFundingCluster():
    db_name = 'crowdfunding_db'
    mongo_uri = f"mongodb+srv://{mongodb_user}:{mongodb_pswd}@datafirst.8afvk.mongodb.net/"
    populateDatabase(db_name, mongo_uri)
    mongo = MongoClient(mongo_uri)
    db = getDatabase(mongo, db_name)
    print(checkCollectionNames(db))    
    print(getCategoryCount(db))
    mongo.close()