In [None]:
# Brief Summary: In this Jupyter Notebook, I extract data about my fake library, which consists of book, member, loan, and late fee data.
# I extract my book data from a MySQL database table, my member and loan data from MongoDBCompass database collections, and my late fee
# data from my local machine's file system. After I extract all of those data, I transform the data as necessary. This includes replacing
# date values with their corresponding date keys (as per the 'date_dim' MySQL database table in the 'northwind_dw' MySQL database).
# Finally, I load the transformed data into MySQL database tables. I extract those data to prove that those MySQL database tables were
# successfully created.

In [1]:
# MODULE IMPORTS:
import pymongo
import pandas as pd
from sqlalchemy import create_engine

In [19]:
# CONSTANTS:
## Local Machine:
LATE_FEES_FILE_PATH = 'Late Fees.csv'

## MySQL:
MySQL_USER = 'root'
MySQL_PASSWORD = '311X2EjL534m0956'
MySQL_DATABASE_NAME = 'my_library'
MySQL_DATABASE_BOOKS_TABLE_NAME = 'books'

## MongoDBCompass:
MongoDBCompass_HOST_NAME = 'localhost'
MongoDBCompass_TCP_IP_PORT = '27017'
MongoDBCompass_CONNECTION_STRING = f'mongodb://{MongoDBCompass_HOST_NAME}:{MongoDBCompass_TCP_IP_PORT}/'
MongoDBCompass_DATABASE_NAME = 'my_library'
MongoDBCompass_MEMBERS_COLLECTION_NAME = 'members'
MongoDBCompass_LOANS_COLLECTION_NAME = 'loans'

In [5]:
# FUNCTIONS:
## Create pandas Dataframe from MySQL database table (Assuming server is the local machine)
def get_MySQL_database_table(user, password, database_name, SQL_query):
    MySQL_connection_string = f'mysql+pymysql://{user}:{password}@localhost/{database_name}'
    MySQL_engine = create_engine(MySQL_connection_string, pool_recycle=3600)
    MySQL_connection = MySQL_engine.connect()
    df = pd.read_sql(SQL_query, MySQL_connection)
    MySQL_connection.close()
    return df

## Create (or update) MySQL database table from pandas Dataframe (Assuming server is the local machine)
def set_MySQL_database_table(user, password, df, database_name, table_name, primary_key_name, database_operation):
    MySQL_connection_string = f'mysql+pymysql://{user}:{password}@localhost/{database_name}' # Copied from previous function
    MySQL_engine = create_engine(MySQL_connection_string, pool_recycle=3600) # Also copied from previous function
    MySQL_connection = MySQL_engine.connect() # Also copied from previous function
    if database_operation == 'insert':
        df.to_sql(table_name, con=MySQL_connection, index=False, if_exists='replace')
        MySQL_engine.execute(f'ALTER TABLE {table_name} ADD PRIMARY KEY ({primary_key_name});')
    elif database_operation == 'update':
        df.to_sql(table_name, con=MySQL_connection, index=False, if_exists='append')
    MySQL_connection.close()

In [9]:
# MAIN BODY OF CODE:
## Extract MySQL database table 'books' from MySQL database 'my_library'
get_all_books = f'SELECT * FROM {MySQL_DATABASE_NAME}.{MySQL_DATABASE_BOOKS_TABLE_NAME};'
books_df = get_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, MySQL_DATABASE_NAME, get_all_books)
books_df

Unnamed: 0,id,title,author,year_published
0,1,The Martian,Andy Weir,2011
1,2,How Not to Die,Michael Greger,2015
2,3,Book Lovers,Emily Henry,2022
3,4,Becoming,Michelle Obama,2018
4,5,Cosmos,Carl Sagan,1980
5,6,Naked Statistics: Stripping the Dread from the...,Charles Wheelan,2012
6,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1998
7,8,Jane Eyre,Charlotte Bronte,1847
8,9,Pro Git,Ben Straub,2009
9,10,The Burnout Society,Byung-Chul Han,2010


In [10]:
### Note(s): The MySQL database table 'books' was created by (1) creating a JSON file of a list of books and
### (2) importing that JSON file into a table of the MySQL database 'my_library'.

In [59]:
## Extract MySQL database table 'dim_date' from MySQL database 'northwind_dw'
get_relevant_dates = 'SELECT date_key, full_date FROM northwind_dw.dim_date WHERE calendar_year = 2023;'
dates_df = get_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, MySQL_DATABASE_NAME, get_relevant_dates)
dates_df

Unnamed: 0,date_key,full_date
0,20230101,2023-01-01
1,20230102,2023-01-02
2,20230103,2023-01-03
3,20230104,2023-01-04
4,20230105,2023-01-05
...,...,...
360,20231227,2023-12-27
361,20231228,2023-12-28
362,20231229,2023-12-29
363,20231230,2023-12-30


In [14]:
## Extract MongoDBCompass database collections 'members' and 'loans' from MongoDBCompass database 'my_library'
MongoDBCompass_client = pymongo.MongoClient(MongoDBCompass_CONNECTION_STRING)
### Verify that the MongoDBCompass database collections 'members' and 'loans' exist (Optional)
MongoDBCompass_database = MongoDBCompass_client[MongoDBCompass_DATABASE_NAME]
MongoDBCompass_database.list_collection_names()

['members', 'loans']

In [15]:
### Define conditions and projections
#### For the MongoDBCompass collection 'members'
members_conditions = {}
members_projection = {'_id': 0, 'id': 1, 'name': 1, 'physical_address': 1, 'phone_number': 1, 'email_address': 1}

#### For the MongoDBCompass collection 'loans'
loans_conditions = {}
loans_projection = {'_id': 0, 'book_id': 1, 'book_title': 1, 'book_author': 1, 'loaner_id': 1, 'loaner_name': 1, 'loan_start_date': 1, 'loan_exp_date': 1, 'return_date': 1}

In [16]:
members_df = pd.DataFrame(list(MongoDBCompass_database.members.find(members_conditions, members_projection)))
members_df

Unnamed: 0,id,name,physical_address,phone_number,email_address
0,1,Ailey Thicknesse,Over water and under hill,(123) 234-9878,ailey.thicknesse@gmail.com
1,2,Septima Fairfax,The Forbidden Forest,,
2,3,October Lynx,Bridgemar,(575) 433-9122,octo_cat@outlook.com
3,4,Fen Galeway,Past the ravine and up the mountain,(911) 232-7363,
4,5,Robert John,"1523 Cherry Lane, Charlottesville, VA 22901",(255) 109-1293,robert.john@gmail.com


In [52]:
loans_df = pd.DataFrame(list(MongoDBCompass_database.loans.find(loans_conditions, loans_projection)))
loans_df

Unnamed: 0,book_id,book_title,book_author,loaner_id,loaner_name,loan_start_date,loan_exp_date,return_date
0,1,The Martian,Andy Weir,4,Fen Galeway,10/12/2023,11/12/2023,
1,2,How Not to Die,Michael Greger,4,Fen Galeway,9/20/2023,10/20/2023,
2,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1,Ailey Thicknesse,8/07/2023,9/07/2023,9/01/2023
3,9,Pro Git,Ben Straub,2,Septima Fairfax,8/15/2023,9/15/2023,10/11/2023
4,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,5,Robert John,10/21/2023,11/21/2023,


In [18]:
### Note(s): The MongoDBCompass database collections 'members' and 'loans' were created by (1) creating
### JSON files for lists of members and loans and (2) importing those JSON files into collections of
### the MongoDBCompass database 'my_library'.

In [20]:
## Extract CSV file 'Late Fees' from local machine file system
late_fees_df = pd.read_csv(LATE_FEES_FILE_PATH)
late_fees_df

Unnamed: 0,Book ID,Book Title,Book Author,Base Late Fee,Additional Late Fee (Cumulating Daily)
0,1,The Martian,Andy Weir,5.0,1.75
1,2,How Not to Die,Michael Greger,2.0,1.0
2,3,Book Lovers,Emily Henry,2.25,1.5
3,4,Becoming,Michelle Obama,2.0,1.0
4,5,Cosmos,Carl Sagan,1.75,0.75
5,6,Naked Statistics: Stripping the Dread from the...,Charles Wheelan,1.5,0.5
6,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,7.0,3.0
7,8,Jane Eyre,Charlotte Bronte,3.0,1.5
8,9,Pro Git,Ben Straub,1.25,0.5


In [22]:
df_late_fees = pd.read_csv('Late Fees.csv')
df_late_fees

Unnamed: 0,Book ID,Book Title,Book Author,Base Late Fee,Additional Late Fee (Cumulating Daily)
0,1,The Martian,Andy Weir,5.0,1.75
1,2,How Not to Die,Michael Greger,2.0,1.0
2,3,Book Lovers,Emily Henry,2.25,1.5
3,4,Becoming,Michelle Obama,2.0,1.0
4,5,Cosmos,Carl Sagan,1.75,0.75
5,6,Naked Statistics: Stripping the Dread from the...,Charles Wheelan,1.5,0.5
6,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,7.0,3.0
7,8,Jane Eyre,Charlotte Bronte,3.0,1.5
8,9,Pro Git,Ben Straub,1.25,0.5
9,10,The Burnout Society,Byung-Chul Han,1.0,0.5


In [99]:
## Transform pandas Dataframe 'loans_df'
### Create primary key column
# loans_df.insert(0, 'loan_key', range(1, loans_df.shape[0]+1))

### Create pandas Dataframe 'facts_df' from pandas Dataframe 'loans_df'
facts_df = loans_df.copy()
facts_df

Unnamed: 0,loan_key,book_id,book_title,book_author,loaner_id,loaner_name,loan_start_date,loan_exp_date,return_date
0,1,1,The Martian,Andy Weir,4,Fen Galeway,10/12/2023,11/12/2023,
1,2,2,How Not to Die,Michael Greger,4,Fen Galeway,9/20/2023,10/20/2023,
2,3,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1,Ailey Thicknesse,8/07/2023,9/07/2023,9/01/2023
3,4,9,Pro Git,Ben Straub,2,Septima Fairfax,8/15/2023,9/15/2023,10/11/2023
4,5,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,5,Robert John,10/21/2023,11/21/2023,


In [100]:
### Add loaner contact information
facts_df['loaner_physical_address'] = None
facts_df['loaner_phone_number'] = None
facts_df['loaner_email_address'] = None
facts_df

Unnamed: 0,loan_key,book_id,book_title,book_author,loaner_id,loaner_name,loan_start_date,loan_exp_date,return_date,loaner_physical_address,loaner_phone_number,loaner_email_address
0,1,1,The Martian,Andy Weir,4,Fen Galeway,10/12/2023,11/12/2023,,,,
1,2,2,How Not to Die,Michael Greger,4,Fen Galeway,9/20/2023,10/20/2023,,,,
2,3,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1,Ailey Thicknesse,8/07/2023,9/07/2023,9/01/2023,,,
3,4,9,Pro Git,Ben Straub,2,Septima Fairfax,8/15/2023,9/15/2023,10/11/2023,,,
4,5,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,5,Robert John,10/21/2023,11/21/2023,,,,


In [101]:
for index in range(len(facts_df)):
    facts_df_row = facts_df.iloc[index]
    facts_df_row_loaner_id = facts_df_row['loaner_id']
    members_df_row = members_df.iloc[facts_df_row_loaner_id-1]
    facts_df.loc[index, 'loaner_physical_address'] = members_df_row['physical_address']
    facts_df.loc[index, 'loaner_phone_number'] = members_df_row['phone_number']
    facts_df.loc[index, 'loaner_email_address'] = members_df_row['email_address']
facts_df

Unnamed: 0,loan_key,book_id,book_title,book_author,loaner_id,loaner_name,loan_start_date,loan_exp_date,return_date,loaner_physical_address,loaner_phone_number,loaner_email_address
0,1,1,The Martian,Andy Weir,4,Fen Galeway,10/12/2023,11/12/2023,,Past the ravine and up the mountain,(911) 232-7363,
1,2,2,How Not to Die,Michael Greger,4,Fen Galeway,9/20/2023,10/20/2023,,Past the ravine and up the mountain,(911) 232-7363,
2,3,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1,Ailey Thicknesse,8/07/2023,9/07/2023,9/01/2023,Over water and under hill,(123) 234-9878,ailey.thicknesse@gmail.com
3,4,9,Pro Git,Ben Straub,2,Septima Fairfax,8/15/2023,9/15/2023,10/11/2023,The Forbidden Forest,,
4,5,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,5,Robert John,10/21/2023,11/21/2023,,"1523 Cherry Lane, Charlottesville, VA 22901",(255) 109-1293,robert.john@gmail.com


In [102]:
### Convert the 'loan_start_date', 'loan_exp_date', and 'return_date' string columns to date columns
facts_df['loan_start_date'] = pd.to_datetime(facts_df['loan_start_date'], format='%m/%d/%Y')
facts_df['loan_exp_date'] = pd.to_datetime(facts_df['loan_exp_date'], format='%m/%d/%Y')
facts_df['return_date'] = pd.to_datetime(facts_df['return_date'], format='%m/%d/%Y', errors='coerce') # 'errors="coerce"' is necessary for handling null values
facts_df

Unnamed: 0,loan_key,book_id,book_title,book_author,loaner_id,loaner_name,loan_start_date,loan_exp_date,return_date,loaner_physical_address,loaner_phone_number,loaner_email_address
0,1,1,The Martian,Andy Weir,4,Fen Galeway,2023-10-12,2023-11-12,NaT,Past the ravine and up the mountain,(911) 232-7363,
1,2,2,How Not to Die,Michael Greger,4,Fen Galeway,2023-09-20,2023-10-20,NaT,Past the ravine and up the mountain,(911) 232-7363,
2,3,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1,Ailey Thicknesse,2023-08-07,2023-09-07,2023-09-01,Over water and under hill,(123) 234-9878,ailey.thicknesse@gmail.com
3,4,9,Pro Git,Ben Straub,2,Septima Fairfax,2023-08-15,2023-09-15,2023-10-11,The Forbidden Forest,,
4,5,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,5,Robert John,2023-10-21,2023-11-21,NaT,"1523 Cherry Lane, Charlottesville, VA 22901",(255) 109-1293,robert.john@gmail.com


In [108]:
### Replace 'loan_start_date', 'loan_exp_date', and 'return_date' column dates to their corresponding date keys (as per the MySQL database
### table 'dim_date')
for index in range(len(facts_df)):
    facts_df_row = facts_df.iloc[index]
    dates_df_row = dates_df[dates_df['full_date']==facts_df_row['loan_start_date'].date()] # For some reason, the pandas Dataframe 'facts_df' date columns are Timestamp objects
    facts_df.loc[index, 'loan_start_date'] = dates_df_row['date_key'].values[0]
    dates_df_row = dates_df[dates_df['full_date']==facts_df_row['loan_exp_date'].date()]
    facts_df.loc[index, 'loan_exp_date'] = dates_df_row['date_key'].values[0]
    dates_df_row = dates_df[dates_df['full_date']==facts_df_row['return_date'].date()]
    try:
        facts_df.loc[index, 'return_date'] = dates_df_row['date_key'].values[0]
    except IndexError:
        facts_df.loc[index, 'return_date'] = None
facts_df

Unnamed: 0,loan_key,book_id,book_title,book_author,loaner_id,loaner_name,loan_start_date,loan_exp_date,return_date,loaner_physical_address,loaner_phone_number,loaner_email_address
0,1,1,The Martian,Andy Weir,4,Fen Galeway,20231012,20231112,NaT,Past the ravine and up the mountain,(911) 232-7363,
1,2,2,How Not to Die,Michael Greger,4,Fen Galeway,20230920,20231020,NaT,Past the ravine and up the mountain,(911) 232-7363,
2,3,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1,Ailey Thicknesse,20230807,20230907,20230901,Over water and under hill,(123) 234-9878,ailey.thicknesse@gmail.com
3,4,9,Pro Git,Ben Straub,2,Septima Fairfax,20230815,20230915,20231011,The Forbidden Forest,,
4,5,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,5,Robert John,20231021,20231121,,"1523 Cherry Lane, Charlottesville, VA 22901",(255) 109-1293,robert.john@gmail.com


In [56]:
## Transform pandas Dataframe 'late_fees_df'
column_name_map = {'Book ID': 'book_id'}
late_fees_df.rename(columns=column_name_map, inplace=True)
late_fees_df

Unnamed: 0,book_id,Book Title,Book Author,Base Late Fee,Additional Late Fee (Cumulating Daily)
0,1,The Martian,Andy Weir,5.0,1.75
1,2,How Not to Die,Michael Greger,2.0,1.0
2,3,Book Lovers,Emily Henry,2.25,1.5
3,4,Becoming,Michelle Obama,2.0,1.0
4,5,Cosmos,Carl Sagan,1.75,0.75
5,6,Naked Statistics: Stripping the Dread from the...,Charles Wheelan,1.5,0.5
6,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,7.0,3.0
7,8,Jane Eyre,Charlotte Bronte,3.0,1.5
8,9,Pro Git,Ben Straub,1.25,0.5


In [48]:
## Load pandas Dataframe 'members_df' into MySQL database table 'members'
df = members_df.copy()
table_name = 'members'
primary_key_name = 'id'
database_operation = 'insert'
set_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, df, MySQL_DATABASE_NAME, table_name, primary_key_name, database_operation)

In [54]:
## Load pandas Dataframe 'loans_df' into MySQL database table 'loans'
df = loans_df.copy()
table_name = 'loans'
primary_key_name = 'loan_key'
database_operation = 'insert'
set_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, df, MySQL_DATABASE_NAME, table_name, primary_key_name, database_operation)

In [57]:
## Load pandas Dataframe 'late_fees_df' into MySQL database table 'late_fees'
df = late_fees_df.copy()
table_name = 'late_fees'
primary_key_name = 'book_id'
database_operation = 'insert'
set_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, df, MySQL_DATABASE_NAME, table_name, primary_key_name, database_operation)

In [109]:
## Load pandas Dataframe 'facts_df' into MySQL database table 'facts'
df = facts_df.copy()
table_name = 'facts'
primary_key_name = 'loan_key'
database_operation = 'insert'
set_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, df, MySQL_DATABASE_NAME, table_name, primary_key_name, database_operation)

In [110]:
## Prove that the above MySQL database tables were created
get_all_members = f'SELECT * FROM {MySQL_DATABASE_NAME}.members;'
MySQL_members_df = get_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, MySQL_DATABASE_NAME, get_all_members)
MySQL_members_df

Unnamed: 0,id,name,physical_address,phone_number,email_address
0,1,Ailey Thicknesse,Over water and under hill,(123) 234-9878,ailey.thicknesse@gmail.com
1,2,Septima Fairfax,The Forbidden Forest,,
2,3,October Lynx,Bridgemar,(575) 433-9122,octo_cat@outlook.com
3,4,Fen Galeway,Past the ravine and up the mountain,(911) 232-7363,
4,5,Robert John,"1523 Cherry Lane, Charlottesville, VA 22901",(255) 109-1293,robert.john@gmail.com


In [111]:
get_all_loans = f'SELECT * FROM {MySQL_DATABASE_NAME}.loans;'
MySQL_loans_df = get_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, MySQL_DATABASE_NAME, get_all_loans)
MySQL_loans_df

Unnamed: 0,loan_key,book_id,book_title,book_author,loaner_id,loaner_name,loan_start_date,loan_exp_date,return_date
0,1,1,The Martian,Andy Weir,4,Fen Galeway,10/12/2023,11/12/2023,
1,2,2,How Not to Die,Michael Greger,4,Fen Galeway,9/20/2023,10/20/2023,
2,3,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1,Ailey Thicknesse,8/07/2023,9/07/2023,9/01/2023
3,4,9,Pro Git,Ben Straub,2,Septima Fairfax,8/15/2023,9/15/2023,10/11/2023
4,5,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,5,Robert John,10/21/2023,11/21/2023,


In [112]:
get_all_late_fees = f'SELECT * FROM {MySQL_DATABASE_NAME}.late_fees;'
MySQL_late_fees_df = get_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, MySQL_DATABASE_NAME, get_all_late_fees)
MySQL_late_fees_df

Unnamed: 0,book_id,Book Title,Book Author,Base Late Fee,Additional Late Fee (Cumulating Daily)
0,1,The Martian,Andy Weir,5.0,1.75
1,2,How Not to Die,Michael Greger,2.0,1.0
2,3,Book Lovers,Emily Henry,2.25,1.5
3,4,Becoming,Michelle Obama,2.0,1.0
4,5,Cosmos,Carl Sagan,1.75,0.75
5,6,Naked Statistics: Stripping the Dread from the...,Charles Wheelan,1.5,0.5
6,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,7.0,3.0
7,8,Jane Eyre,Charlotte Bronte,3.0,1.5
8,9,Pro Git,Ben Straub,1.25,0.5


In [113]:
get_all_facts = f'SELECT * FROM {MySQL_DATABASE_NAME}.facts;'
MySQL_facts_df = get_MySQL_database_table(MySQL_USER, MySQL_PASSWORD, MySQL_DATABASE_NAME, get_all_facts)
MySQL_facts_df

Unnamed: 0,loan_key,book_id,book_title,book_author,loaner_id,loaner_name,loan_start_date,loan_exp_date,return_date,loaner_physical_address,loaner_phone_number,loaner_email_address
0,1,1,The Martian,Andy Weir,4,Fen Galeway,20231012,20231112,,Past the ravine and up the mountain,(911) 232-7363,
1,2,2,How Not to Die,Michael Greger,4,Fen Galeway,20230920,20231020,,Past the ravine and up the mountain,(911) 232-7363,
2,3,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,1,Ailey Thicknesse,20230807,20230907,20230901.0,Over water and under hill,(123) 234-9878,ailey.thicknesse@gmail.com
3,4,9,Pro Git,Ben Straub,2,Septima Fairfax,20230815,20230915,20231011.0,The Forbidden Forest,,
4,5,7,Harry Potter and the Chamber of Secrets,J.K. Rowling,5,Robert John,20231021,20231121,,"1523 Cherry Lane, Charlottesville, VA 22901",(255) 109-1293,robert.john@gmail.com
