In [3]:
# Standard library imports
import sys
import subprocess
from pathlib import Path

# Import Dependencies
import psycopg2
from pymongo import MongoClient
from pprint import pprint
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func

# Local application imports
from utils import fetch_api_data, load_config, write_to_csv

In [4]:
# Load configuration from a JSON file
config = load_config('config.json')

# Retrieve the database credentials from the configuration
postgres_user = config.get('postgres_connection', {}).get('user')
postgres_pswd = config.get('postgres_connection', {}).get('password')

# Check if the API key was found in the configuration
if not postgres_user or not postgres_pswd:
    print("Postgres credentials not found in the configuration file.")
    sys.exit()  # Exit the script if the key is missing

# Database name
db_name = 'crowdfunding_db'

In [None]:
def execute_sql_script(dbname, user, password, host, port, sql_file):
    try:
        # Connect to PostgreSQL server (use an administrative database for dropping/creating databases)
        conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host, port=port)
        conn.autocommit = True
        cur = conn.cursor()

        # Read the SQL script
        with open(sql_file, 'r') as file:
            sql_script = file.read()

        # Split the script into individual commands (assuming they are properly separated)
        commands = sql_script.split(';')
        
        # Execute each command separately
        for command in commands:
            if command.strip():  # Avoid empty commands
                cur.execute(command)

        print(f"SQL script {sql_file} executed successfully.")
        
    except Exception as e:
        print(f"Error executing {sql_file}: {e}")
    finally:
        if cur:
            cur.close()
        if conn:
            conn.close()

In [None]:
# Step 1: Execute the setup database script
execute_sql_script(
    dbname='postgres',       # Use the maintenance database
    user=postgres_user,      # Replace with your PostgreSQL username
    password=postgres_pswd,  # Replace with your PostgreSQL password
    host='localhost',        # Replace with your PostgreSQL host
    port='5432',             # Replace with your PostgreSQL port
    sql_file='Resources/create_crowdfunding_db.sql'  # SQL script to drop and recreate the database
)

# Step 2: Execute the schema and data script
execute_sql_script(
    dbname=db_name,          # Connect to the new database
    user=postgres_user,      # Replace with your PostgreSQL username
    password=postgres_pswd,  # Replace with your PostgreSQL password
    host='localhost',        # Replace with your PostgreSQL host
    port='5432',             # Replace with your PostgreSQL port
    sql_file='Resources/cowdfunding_db_schema.sql'  # SQL script to create schema and insert data
)

In [7]:
# # Create an engine for the database
# # engine = create_engine("sqlite:///../Resources/chinook.sqlite")
postgres_engine = create_engine(f"postgresql+psycopg2://{postgres_user}:{postgres_pswd}@localhost:5432/{db_name}")

In [10]:
# Reflect Database into ORM classes
Base = automap_base()
Base.prepare(autoload_with=postgres_engine)
Base.classes.keys()

['contacts', 'campaign', 'category', 'subcategory']

In [12]:
# Save references to the crowdfunding tables
Category = Base.classes.category
Subcategory = Base.classes.subcategory
Contacts = Base.classes.contacts
Campaign = Base.classes.campaign

In [13]:
postgres_session = Session(bind=postgres_engine)

postgres_session.query(Category.category).all()

[]

In [7]:
postgres_session.close()

In [3]:
# Retrieve the database credentials from the configuration
mongodb_user = config.get('mongodb_cluster', {}).get('user')
mongodb_pswd = config.get('mongodb_cluster', {}).get('password')
mongodb_srvr = config.get('mongodb_cluster', {}).get('server')

# Check if the API key was found in the configuration
if not mongodb_user or not mongodb_pswd:
    print("MongoDB credentials not found in the configuration file.")
    sys.exit()  # Exit the script if the key is missing

In [5]:
# MongoDB connection details
mongo_uri = f"mongodb+srv://{mongodb_user}:{mongodb_pswd}@{mongodb_srvr}/"
db_name = db_name
directory = Path('Resources')

# Loop through CSV files in the directory
for csv_file in directory.glob('*.csv'):
    # Extract the collection name from the file name
    collection_name = csv_file.stem  # Use file name without extension as collection name
    
    # Build the mongoimport command
    command = [
        'mongoimport',
        '--uri', mongo_uri,
        '--db', db_name,
        '--collection', collection_name,
        '--type', 'csv',
        '--headerline',  # Assumes the first line of the CSV file contains column headers
        '--file', str(csv_file)
    ]
    
    # Print the command for debugging purposes
    #print('Executing command:', ' '.join(command))
    
    # Execute the command
    try:
        subprocess.run(command, check=True)
    except subprocess.CalledProcessError as e:
        print(f"Error importing {csv_file}: {e}")
    
    print(f"Imported {csv_file} into collection {collection_name}.")

print("All files imported.")

2024-09-15T16:44:42.432-0700	connected to: mongodb+srv://[**REDACTED**]@mongofreecluster.xk1c6.mongodb.net/
2024-09-15T16:44:45.166-0700	1000 document(s) imported successfully. 0 document(s) failed to import.


Imported Resources/contacts.csv into collection contacts.


2024-09-15T16:44:45.915-0700	connected to: mongodb+srv://[**REDACTED**]@mongofreecluster.xk1c6.mongodb.net/
2024-09-15T16:44:46.025-0700	24 document(s) imported successfully. 0 document(s) failed to import.


Imported Resources/subcategory.csv into collection subcategory.


2024-09-15T16:44:46.740-0700	connected to: mongodb+srv://[**REDACTED**]@mongofreecluster.xk1c6.mongodb.net/
2024-09-15T16:44:46.865-0700	9 document(s) imported successfully. 0 document(s) failed to import.


Imported Resources/category.csv into collection category.


2024-09-15T16:44:47.596-0700	connected to: mongodb+srv://[**REDACTED**]@mongofreecluster.xk1c6.mongodb.net/


Imported Resources/campaign.csv into collection campaign.
All files imported.


2024-09-15T16:44:50.578-0700	1000 document(s) imported successfully. 0 document(s) failed to import.


In [None]:
def getTableCount(postgres_engine, postgres_session, table_name):
    table_class = getBase(postgres_engine).classes.get(table_name)
    results = postgres_session.query(table_class).count()
    return results

In [18]:
def getBase(postgres_engine):
    Base = automap_base()
    Base.prepare(autoload_with=postgres_engine)
    print(Base.classes.keys())
    return Base

table_name = 'category'
table_class = getBase(postgres_engine).classes.get(table_name)
results = postgres_session.query(table_class).count()
print(results)

['contacts', 'campaign', 'category', 'subcategory']
9


In [6]:
mongo = MongoClient(mongo_uri)

print(mongo.list_database_names())

db = mongo[f'{db_name}']

print(db.list_collection_names())

['crowdfunding_db', 'sample_mflix', 'admin', 'local']
['contacts', 'category', 'campaign', 'subcategory']


In [7]:
# assign the collections to variables
category = db['category']
subcategory = db['subcategory']
contacts = db['contacts']
campaign = db['campaign']

In [8]:
# Create a query that finds the all documents in the category collection
query = {}

results = category.find(query)

# Print the number of results
print("Number of crowdfunding categories:", category.count_documents({}))
for i in range(9):
    pprint(results[i])


Number of crowdfunding categories: 27
{'_id': ObjectId('66e5ccad3d92b391a401471b'),
 'category': 'food',
 'category_id': 'cat1'}
{'_id': ObjectId('66e5ccad3d92b391a401471c'),
 'category': 'music',
 'category_id': 'cat2'}
{'_id': ObjectId('66e5ccad3d92b391a401471d'),
 'category': 'technology',
 'category_id': 'cat3'}
{'_id': ObjectId('66e5ccad3d92b391a401471e'),
 'category': 'theater',
 'category_id': 'cat4'}
{'_id': ObjectId('66e5ccad3d92b391a401471f'),
 'category': 'film & video',
 'category_id': 'cat5'}
{'_id': ObjectId('66e5ccad3d92b391a4014720'),
 'category': 'publishing',
 'category_id': 'cat6'}
{'_id': ObjectId('66e5ccad3d92b391a4014721'),
 'category': 'games',
 'category_id': 'cat7'}
{'_id': ObjectId('66e5ccad3d92b391a4014722'),
 'category': 'photography',
 'category_id': 'cat8'}
{'_id': ObjectId('66e5ccad3d92b391a4014723'),
 'category': 'journalism',
 'category_id': 'cat9'}


In [9]:
mongo.close()