/
analyze_employees_in_database.py3
executable file
·120 lines (100 loc) · 3.87 KB
/
analyze_employees_in_database.py3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
# encoding=utf8
#
# Flattens out Employee sample database from live MariaDB databse
# illustrates how DataFrame can be created from calls to relational database
#
# blog: https://fabianlee.org/2021/12/11/python-constructing-a-dataframe-from-a-relational-database-with-pandas/
#
# Module requirement:
# pip3 install pandas mysql-client mysql-connector --user
#
import sys
import argparse
import datetime
import pandas as pd
import mysql.connector
######### MAIN ##########################
ap = argparse.ArgumentParser()
ap.add_argument('-H', '--host', default="172.17.0.2", help="IP address of MariaDB server")
ap.add_argument('-u', '--user', default="root", help="user for MariaDB server")
ap.add_argument('-p', '--password', default="thepassword", help="password for MariaDB server")
ap.add_argument('-d', '--debug', action="store_true", help="whether to show debug print")
args = ap.parse_args()
debug = args.debug
# connect to MariaDB server
db_conn = mysql.connector.connect(
host=args.host,
user=args.user,
passwd=args.password,
database="employees"
)
# simplest test of DB connection
all_tables = pd.read_sql("show tables",db_conn)
#print(all_tables)
department_name = "Finance"
# lookup finance department ID from database
#try:
# cursor = db_conn.cursor()
# statement = "SELECT dept_no from departments where dept_name=%s"
# data = (department_name,)
# cursor.execute(statement, data)
# for (thenum) in cursor:
# department_no = thenum[0]
#except mysql.connector.Error as e:
# print(f"Error finding department number from database: {e}")
#print("department_no = {}".format(department_no))
#
# APPROACH #1:
# pull DataFrame in single SQL complex query
#
print("")
print("=================================================")
print("1. DataFrame using single SQL with complex join")
print("=================================================")
employee_data = pd.read_sql("""
SELECT d.dept_no,d.dept_name,e.emp_no,e.first_name, e.last_name, t.title
FROM employees e
INNER JOIN dept_emp de ON e.emp_no=de.emp_no
INNER JOIN departments d ON d.dept_no=de.dept_no
INNER JOIN titles t ON e.emp_no=t.emp_no
WHERE d.dept_name='{}'
""".format(department_name),
db_conn)
print(employee_data)
#
# APPROACH #2:
# pull multiple DataFrame and use pandas.merge to flatten
#
print("\n\n")
print("=================================================")
print("2. Multiple DataFrame merged by pandas")
print("=================================================")
print("Select department info for each employee in {}...".format(department_name))
dept_info = pd.read_sql("""
SELECT d.dept_no,d.dept_name,de.emp_no
FROM departments d, dept_emp de
WHERE d.dept_name='{}' AND d.dept_no=de.dept_no
""".format(department_name),
db_conn)
print("Select titles just for {} group..".format(department_name))
titles = pd.read_sql("""
SELECT t.emp_no,t.title
FROM titles t
INNER JOIN dept_emp de ON de.emp_no=t.emp_no
WHERE de.dept_no = (select de.dept_no from departments de where de.dept_name='{}')
""".format(department_name),
db_conn)
print("Select employees in {}...".format(department_name))
employees_from_dept = pd.read_sql("""
SELECT de.dept_no,e.emp_no,e.last_name,e.first_name
FROM employees e
INNER JOIN dept_emp de ON de.emp_no=e.emp_no
WHERE de.dept_no = (select de.dept_no from departments de where de.dept_name='{}')
""".format(department_name),
db_conn)
# merge department and employee data
employees_merged = pd.merge(dept_info,employees_from_dept,how="left",left_on="emp_no",right_on="emp_no",suffixes=(None,"_e") )
# additional merge of titles
employees_merged = pd.merge(employees_merged,titles,how="left",left_on="emp_no",right_on="emp_no",suffixes=(None,"_e") )
print(employees_merged)