# Database Administration

## HR Database

<img src = "../Data/HR ERD.png">

## Imports & Creating Connection

In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import pymysql
pymysql.install_as_MySQLdb()
from urllib.parse import quote_plus as urlquote

In [2]:
import json
with open('/Users/purvikansara/.secret/mysql.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['username', 'password'])

In [3]:
connection = f"mysql+pymysql://{login['username']}:{urlquote(login['password'])}@localhost/hr"
engine = create_engine(connection)

In [4]:
## Check if database exists, if not, create it
if database_exists(connection):
    print('It exists!')
else:
    create_database(connection)
    print('Database created!')

It exists!


## Showing Tables

In [5]:
# Preview the names of all tables 
q = '''SHOW TABLES;'''
pd.read_sql(q, engine)

Unnamed: 0,Tables_in_hr
0,department
1,employee
2,job
3,job_history


## Showing Empty Tables

In [6]:
q= '''SELECT * FROM department'''
pd.read_sql(q, engine)

Unnamed: 0,DEPT_ID_DEP,DEPT_NAME,MANAGER_ID,LOC_ID,created_at,updated_at


In [7]:
q= '''SELECT * FROM employee'''
pd.read_sql(q, engine)

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,SALARY,MANAGER_ID,created_at,updated_at,JOB_ID,DEP_ID


In [8]:
q= '''SELECT * FROM job'''
pd.read_sql(q, engine)

Unnamed: 0,JOB_IDENT,JOB_TITLE,MIN_SALARY,MAX_SALARY,created_at,updated_at


In [9]:
q= '''SELECT * FROM job_history'''
pd.read_sql(q, engine)

Unnamed: 0,EMPL_ID,START_DATE,created_at,updated_at,DEPT_ID,JOBS_ID


## Importing Data into Notebook

In [10]:
department = pd.read_csv('Data/Department-data.csv')
department.info()
department.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   dept_id_dep  3 non-null      int64 
 1   dep_name     3 non-null      object
 2   manager_id   3 non-null      int64 
 3   loc_id       3 non-null      object
dtypes: int64(2), object(2)
memory usage: 224.0+ bytes


Unnamed: 0,dept_id_dep,dep_name,manager_id,loc_id
0,2,Architect Group,30001,L0001
1,5,Software Group,30002,L0002
2,7,Design Team,30003,L0003


In [11]:
employees = pd.read_csv('Data/Employee-data.csv')
employees.info()
employees.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   employee_id  8 non-null      object
 1   f_name       8 non-null      object
 2   l_name       8 non-null      object
 3   ssn          8 non-null      int64 
 4   birth_date   8 non-null      object
 5   sex          8 non-null      object
 6   address      8 non-null      object
 7   job_id       8 non-null      int64 
 8   salary       8 non-null      int64 
 9   manager_id   8 non-null      int64 
 10  dep_id       8 non-null      int64 
dtypes: int64(5), object(6)
memory usage: 832.0+ bytes


Unnamed: 0,employee_id,f_name,l_name,ssn,birth_date,sex,address,job_id,salary,manager_id,dep_id
0,E1001,John,Thomas,123456,01/09/1976,M,"5631 Rice, OakPark,IL",100,100000,30001,2
1,E1002,Alice,James,123457,07/31/1972,F,"980 Berry ln, Elgin,IL",200,80000,30002,5
2,E1003,Steve,Wells,123458,08/10/1980,M,"291 Springs, Gary,IL",300,50000,30002,5
3,E1004,Santosh,Kumar,123459,07/20/1985,M,"511 Aurora Av, Aurora,IL",400,60000,30004,5
4,E1005,Ahmed,Hussain,123410,01/04/1981,M,"216 Oak Tree, Geneva,IL",500,70000,30001,2


In [12]:
job = pd.read_csv('Data/Jobs-data.csv')
job.info()
job.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   job_ident   10 non-null     int64 
 1   job_title   10 non-null     object
 2   min_salary  10 non-null     int64 
 3   max_salary  10 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 448.0+ bytes


Unnamed: 0,job_ident,job_title,min_salary,max_salary
0,100,Sr. Architect,60000,100000
1,200,Sr.Software Developer,60000,80000
2,300,Jr.Software Developer,40000,60000
3,400,Jr.Software Developer,40000,60000
4,500,Jr. Architect,50000,70000


In [13]:
job_history = pd.read_csv('Data/Job_History-data.csv')
job_history.info()
job_history.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   empl_id     10 non-null     object
 1   start_date  10 non-null     object
 2   dept_id     10 non-null     int64 
 3   jobs_id     10 non-null     int64 
dtypes: int64(2), object(2)
memory usage: 448.0+ bytes


Unnamed: 0,empl_id,start_date,dept_id,jobs_id
0,E1001,08/01/2000,100,2
1,E1002,08/01/2001,200,5
2,E1003,08/16/2001,300,5
3,E1004,08/16/2000,400,5
4,E1005,05/30/2000,500,2


## Loading Data into Tables

### Department

In [14]:
q = '''DESCRIBE department;'''
describe = pd.read_sql(q, engine)
describe

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,DEPT_ID_DEP,char(9),NO,PRI,,
1,DEPT_NAME,varchar(15),YES,,,
2,MANAGER_ID,char(9),YES,,,
3,LOC_ID,char(9),YES,,,
4,created_at,datetime,YES,,,
5,updated_at,datetime,YES,,,


In [15]:
# Checking describe's Field names
describe['Field'].values

array(['DEPT_ID_DEP', 'DEPT_NAME', 'MANAGER_ID', 'LOC_ID', 'created_at',
       'updated_at'], dtype=object)

In [16]:
# Checking dataframe's columns
department.columns

Index(['dept_id_dep', 'dep_name', 'manager_id', 'loc_id'], dtype='object')

In [17]:
# replace original column names
department.columns = department.columns.str.upper()
department.head(2)

Unnamed: 0,DEPT_ID_DEP,DEP_NAME,MANAGER_ID,LOC_ID
0,2,Architect Group,30001,L0001
1,5,Software Group,30002,L0002


In [18]:
# Reviewing SQL table's data types
describe[['Field','Type']]

Unnamed: 0,Field,Type
0,DEPT_ID_DEP,char(9)
1,DEPT_NAME,varchar(15)
2,MANAGER_ID,char(9)
3,LOC_ID,char(9)
4,created_at,datetime
5,updated_at,datetime


In [19]:
# Reviewing dataframe's data types
department.dtypes

DEPT_ID_DEP     int64
DEP_NAME       object
MANAGER_ID      int64
LOC_ID         object
dtype: object

In [20]:

department.to_sql("department",engine,index=False, if_exists='append')

OperationalError: (pymysql.err.OperationalError) (1054, "Unknown column 'DEP_NAME' in 'field list'")
[SQL: INSERT INTO department (`DEPT_ID_DEP`, `DEP_NAME`, `MANAGER_ID`, `LOC_ID`) VALUES (%(DEPT_ID_DEP)s, %(DEP_NAME)s, %(MANAGER_ID)s, %(LOC_ID)s)]
[parameters: ({'DEPT_ID_DEP': 2, 'DEP_NAME': 'Architect Group', 'MANAGER_ID': 30001, 'LOC_ID': 'L0001'}, {'DEPT_ID_DEP': 5, 'DEP_NAME': 'Software Group', 'MANAGER_ID': 30002, 'LOC_ID': 'L0002'}, {'DEPT_ID_DEP': 7, 'DEP_NAME': 'Design Team', 'MANAGER_ID': 30003, 'LOC_ID': 'L0003'})]
(Background on this error at: https://sqlalche.me/e/14/e3q8)

In [21]:
# confirm the data has been added
q = """SELECT * FROM department;"""
pd.read_sql(q,engine)

Unnamed: 0,DEPT_ID_DEP,DEPT_NAME,MANAGER_ID,LOC_ID,created_at,updated_at


In [22]:
# check the describe again to confirm no changes
q = """DESCRIBE department;"""
pd.read_sql(q,engine)

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,DEPT_ID_DEP,char(9),NO,PRI,,
1,DEPT_NAME,varchar(15),YES,,,
2,MANAGER_ID,char(9),YES,,,
3,LOC_ID,char(9),YES,,,
4,created_at,datetime,YES,,,
5,updated_at,datetime,YES,,,


### Employee

In [23]:
q = '''DESCRIBE employee;'''
describe = pd.read_sql(q, engine)
describe

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,EMP_ID,varchar(9),NO,PRI,,
1,F_NAME,varchar(45),YES,,,
2,L_NAME,varchar(45),YES,,,
3,SSN,varchar(9),YES,,,
4,B_DATE,date,YES,,,
5,SEX,char(1),YES,,,
6,ADDRESS,varchar(45),YES,,,
7,SALARY,"decimal(10,2)",YES,,,
8,MANAGER_ID,char(9),YES,,,
9,created_at,datetime,YES,,,


In [24]:
# Checking describe's Field names
describe['Field'].values

array(['EMP_ID', 'F_NAME', 'L_NAME', 'SSN', 'B_DATE', 'SEX', 'ADDRESS',
       'SALARY', 'MANAGER_ID', 'created_at', 'updated_at', 'JOB_ID',
       'DEP_ID'], dtype=object)

In [25]:
# Checking dataframe's columns
employees.columns

Index(['employee_id', 'f_name', 'l_name', 'ssn', 'birth_date', 'sex',
       'address', 'job_id', 'salary', 'manager_id', 'dep_id'],
      dtype='object')

In [26]:
# replace original column names
employees.columns = employees.columns.str.upper()
employees.head(2)

Unnamed: 0,EMPLOYEE_ID,F_NAME,L_NAME,SSN,BIRTH_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID
0,E1001,John,Thomas,123456,01/09/1976,M,"5631 Rice, OakPark,IL",100,100000,30001,2
1,E1002,Alice,James,123457,07/31/1972,F,"980 Berry ln, Elgin,IL",200,80000,30002,5


In [27]:

# Rename columns to match SQL table
rename_map = {"EMPLOYEE_ID":"EMP_ID",
             "BIRTH_DATE":"B_DATE"}
employees = employees.rename(rename_map,axis=1)
employees.head(2)

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID
0,E1001,John,Thomas,123456,01/09/1976,M,"5631 Rice, OakPark,IL",100,100000,30001,2
1,E1002,Alice,James,123457,07/31/1972,F,"980 Berry ln, Elgin,IL",200,80000,30002,5


In [28]:
# Reviewing SQL table's data types
describe[['Field','Type']]

Unnamed: 0,Field,Type
0,EMP_ID,varchar(9)
1,F_NAME,varchar(45)
2,L_NAME,varchar(45)
3,SSN,varchar(9)
4,B_DATE,date
5,SEX,char(1)
6,ADDRESS,varchar(45)
7,SALARY,"decimal(10,2)"
8,MANAGER_ID,char(9)
9,created_at,datetime


In [29]:
# Reviewing dataframe's data types
employees.dtypes

EMP_ID        object
F_NAME        object
L_NAME        object
SSN            int64
B_DATE        object
SEX           object
ADDRESS       object
JOB_ID         int64
SALARY         int64
MANAGER_ID     int64
DEP_ID         int64
dtype: object

In [30]:
# Converting B_DATE to datetime dtype
employees['B_DATE'] = pd.to_datetime(employees['B_DATE'])
employees.dtypes

EMP_ID                object
F_NAME                object
L_NAME                object
SSN                    int64
B_DATE        datetime64[ns]
SEX                   object
ADDRESS               object
JOB_ID                 int64
SALARY                 int64
MANAGER_ID             int64
DEP_ID                 int64
dtype: object

In [31]:

employees.to_sql("employee",engine,index=False, if_exists='append')

IntegrityError: (pymysql.err.IntegrityError) (1452, 'Cannot add or update a child row: a foreign key constraint fails (`hr`.`employee`, CONSTRAINT `fk_employee_job_history` FOREIGN KEY (`EMP_ID`) REFERENCES `job_history` (`EMPL_ID`))')
[SQL: INSERT INTO employee (`EMP_ID`, `F_NAME`, `L_NAME`, `SSN`, `B_DATE`, `SEX`, `ADDRESS`, `JOB_ID`, `SALARY`, `MANAGER_ID`, `DEP_ID`) VALUES (%(EMP_ID)s, %(F_NAME)s, %(L_NAME)s, %(SSN)s, %(B_DATE)s, %(SEX)s, %(ADDRESS)s, %(JOB_ID)s, %(SALARY)s, %(MANAGER_ID)s, %(DEP_ID)s)]
[parameters: ({'EMP_ID': 'E1001', 'F_NAME': 'John', 'L_NAME': 'Thomas', 'SSN': 123456, 'B_DATE': datetime.datetime(1976, 1, 9, 0, 0), 'SEX': 'M', 'ADDRESS': '5631 Rice, OakPark,IL', 'JOB_ID': 100, 'SALARY': 100000, 'MANAGER_ID': 30001, 'DEP_ID': 2}, {'EMP_ID': 'E1002', 'F_NAME': 'Alice', 'L_NAME': 'James', 'SSN': 123457, 'B_DATE': datetime.datetime(1972, 7, 31, 0, 0), 'SEX': 'F', 'ADDRESS': '980 Berry ln, Elgin,IL', 'JOB_ID': 200, 'SALARY': 80000, 'MANAGER_ID': 30002, 'DEP_ID': 5}, {'EMP_ID': 'E1003', 'F_NAME': 'Steve', 'L_NAME': 'Wells', 'SSN': 123458, 'B_DATE': datetime.datetime(1980, 8, 10, 0, 0), 'SEX': 'M', 'ADDRESS': '291 Springs, Gary,IL', 'JOB_ID': 300, 'SALARY': 50000, 'MANAGER_ID': 30002, 'DEP_ID': 5}, {'EMP_ID': 'E1004', 'F_NAME': 'Santosh', 'L_NAME': 'Kumar', 'SSN': 123459, 'B_DATE': datetime.datetime(1985, 7, 20, 0, 0), 'SEX': 'M', 'ADDRESS': '511 Aurora Av, Aurora,IL', 'JOB_ID': 400, 'SALARY': 60000, 'MANAGER_ID': 30004, 'DEP_ID': 5}, {'EMP_ID': 'E1005', 'F_NAME': 'Ahmed', 'L_NAME': 'Hussain', 'SSN': 123410, 'B_DATE': datetime.datetime(1981, 1, 4, 0, 0), 'SEX': 'M', 'ADDRESS': '216 Oak Tree, Geneva,IL', 'JOB_ID': 500, 'SALARY': 70000, 'MANAGER_ID': 30001, 'DEP_ID': 2}, {'EMP_ID': 'E1006', 'F_NAME': 'Nancy', 'L_NAME': 'Allen', 'SSN': 123411, 'B_DATE': datetime.datetime(1978, 2, 6, 0, 0), 'SEX': 'F', 'ADDRESS': '111 Green Pl, Elgin,IL', 'JOB_ID': 600, 'SALARY': 90000, 'MANAGER_ID': 30001, 'DEP_ID': 2}, {'EMP_ID': 'E1007', 'F_NAME': 'Mary', 'L_NAME': 'Thomas', 'SSN': 123412, 'B_DATE': datetime.datetime(1975, 5, 5, 0, 0), 'SEX': 'F', 'ADDRESS': '100 Rose Pl, Gary,IL', 'JOB_ID': 650, 'SALARY': 65000, 'MANAGER_ID': 30003, 'DEP_ID': 7}, {'EMP_ID': 'E1008', 'F_NAME': 'Bharath', 'L_NAME': 'Gupta', 'SSN': 123413, 'B_DATE': datetime.datetime(1985, 5, 6, 0, 0), 'SEX': 'M', 'ADDRESS': '145 Berry Ln, Naperville,IL', 'JOB_ID': 660, 'SALARY': 65000, 'MANAGER_ID': 30003, 'DEP_ID': 7})]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

#### We hit an unexpected "Integrity Error" related to foreign key constraints.

- By default, MySQL won't allow us to insert new data into 1 table if we have not yet included the connected data in a related table.
- In this case, it would not let us add to the Employee table because we included ID columns that are the foreign keys for another table.
- We can temporarily deactivate this constraint and then re-run the to_sql command.

#### Handling Foreign Key Constraints

In [32]:
# Checking the setting for FOREIGN_KEY_CHECKS
q = """SELECT @@FOREIGN_KEY_CHECKS"""
pd.read_sql(q, engine)

Unnamed: 0,@@FOREIGN_KEY_CHECKS
0,1


- Whenever we need to run a query that does not return data, we will need to use the connection object to .execute the query instead of using pd.read.sql.

In [33]:
# Changing the setting for FOREIGN_KEY_CHECKS with the connection
q = """SET @@FOREIGN_KEY_CHECKS=0"""
engine.execute(q)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x13b89b100>

In [34]:

# Confirm the checks are deactiavated
q = """SELECT @@FOREIGN_KEY_CHECKS"""
pd.read_sql(q,engine)

Unnamed: 0,@@FOREIGN_KEY_CHECKS
0,0


- We have deactivated the checks (the value is 0). Now we can try the to_sql command again!

In [35]:
# Inserting the data now that foreign key checks are disabled
employees.to_sql("employee",engine,index=False, if_exists='append')

8

In [36]:

# confirm the data has been added
q = """SELECT * FROM employee;"""
pd.read_sql(q,engine)

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,SALARY,MANAGER_ID,created_at,updated_at,JOB_ID,DEP_ID
0,E1001,John,Thomas,123456,1976-01-09,M,"5631 Rice, OakPark,IL",100000.0,30001,,,100,2
1,E1002,Alice,James,123457,1972-07-31,F,"980 Berry ln, Elgin,IL",80000.0,30002,,,200,5
2,E1003,Steve,Wells,123458,1980-08-10,M,"291 Springs, Gary,IL",50000.0,30002,,,300,5
3,E1004,Santosh,Kumar,123459,1985-07-20,M,"511 Aurora Av, Aurora,IL",60000.0,30004,,,400,5
4,E1005,Ahmed,Hussain,123410,1981-01-04,M,"216 Oak Tree, Geneva,IL",70000.0,30001,,,500,2
5,E1006,Nancy,Allen,123411,1978-02-06,F,"111 Green Pl, Elgin,IL",90000.0,30001,,,600,2
6,E1007,Mary,Thomas,123412,1975-05-05,F,"100 Rose Pl, Gary,IL",65000.0,30003,,,650,7
7,E1008,Bharath,Gupta,123413,1985-05-06,M,"145 Berry Ln, Naperville,IL",65000.0,30003,,,660,7


In [37]:
# check the describe again to confirm no changes
q = """DESCRIBE employee;"""
pd.read_sql(q,engine)

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,EMP_ID,varchar(9),NO,PRI,,
1,F_NAME,varchar(45),YES,,,
2,L_NAME,varchar(45),YES,,,
3,SSN,varchar(9),YES,,,
4,B_DATE,date,YES,,,
5,SEX,char(1),YES,,,
6,ADDRESS,varchar(45),YES,,,
7,SALARY,"decimal(10,2)",YES,,,
8,MANAGER_ID,char(9),YES,,,
9,created_at,datetime,YES,,,


### Job

In [38]:
q = '''DESCRIBE job;'''
describe = pd.read_sql(q, engine)
describe

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,JOB_IDENT,char(9),NO,PRI,,
1,JOB_TITLE,varchar(45),YES,,,
2,MIN_SALARY,"decimal(10,2)",YES,,,
3,MAX_SALARY,"decimal(10,2)",YES,,,
4,created_at,datetime,YES,,,
5,updated_at,datetime,YES,,,


In [39]:
# Checking describe's Field names
describe['Field'].values

array(['JOB_IDENT', 'JOB_TITLE', 'MIN_SALARY', 'MAX_SALARY', 'created_at',
       'updated_at'], dtype=object)

In [40]:
# Checking dataframe's columns
job.columns


Index(['job_ident', 'job_title', 'min_salary', 'max_salary'], dtype='object')

In [41]:
# replace original column names
job.columns = job.columns.str.upper()
job.head(2)

Unnamed: 0,JOB_IDENT,JOB_TITLE,MIN_SALARY,MAX_SALARY
0,100,Sr. Architect,60000,100000
1,200,Sr.Software Developer,60000,80000


In [42]:
# Reviewing SQL table's data types
describe[['Field','Type']]

Unnamed: 0,Field,Type
0,JOB_IDENT,char(9)
1,JOB_TITLE,varchar(45)
2,MIN_SALARY,"decimal(10,2)"
3,MAX_SALARY,"decimal(10,2)"
4,created_at,datetime
5,updated_at,datetime


In [43]:
# Reviewing dataframe's data types
job.dtypes

JOB_IDENT      int64
JOB_TITLE     object
MIN_SALARY     int64
MAX_SALARY     int64
dtype: object

In [44]:

job.to_sql("job",engine,index=False, if_exists='append')

10

In [45]:
# confirm the data has been added
q = """SELECT * FROM job;"""
pd.read_sql(q,engine)

Unnamed: 0,JOB_IDENT,JOB_TITLE,MIN_SALARY,MAX_SALARY,created_at,updated_at
0,100,Sr. Architect,60000.0,100000.0,,
1,200,Sr.Software Developer,60000.0,80000.0,,
2,220,Sr. Designer,70000.0,90000.0,,
3,234,Sr. Designer,70000.0,90000.0,,
4,300,Jr.Software Developer,40000.0,60000.0,,
5,400,Jr.Software Developer,40000.0,60000.0,,
6,500,Jr. Architect,50000.0,70000.0,,
7,600,Lead Architect,70000.0,100000.0,,
8,650,Jr. Designer,60000.0,70000.0,,
9,660,Jr. Designer,60000.0,70000.0,,


In [46]:
# check the describe again to confirm no changes
q = """DESCRIBE job;"""
pd.read_sql(q,engine)

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,JOB_IDENT,char(9),NO,PRI,,
1,JOB_TITLE,varchar(45),YES,,,
2,MIN_SALARY,"decimal(10,2)",YES,,,
3,MAX_SALARY,"decimal(10,2)",YES,,,
4,created_at,datetime,YES,,,
5,updated_at,datetime,YES,,,


### Job History

In [47]:
q = '''DESCRIBE job_history;'''
describe = pd.read_sql(q, engine)
describe

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,EMPL_ID,char(9),NO,PRI,,
1,START_DATE,date,YES,,,
2,created_at,datetime,YES,,,
3,updated_at,datetime,YES,,,
4,DEPT_ID,char(9),YES,MUL,,
5,JOBS_ID,char(9),YES,MUL,,


In [48]:
# Checking describe's Field names
describe['Field'].values

array(['EMPL_ID', 'START_DATE', 'created_at', 'updated_at', 'DEPT_ID',
       'JOBS_ID'], dtype=object)

In [49]:
# Checking dataframe's columns
job_history.columns

Index(['empl_id', 'start_date', 'dept_id', 'jobs_id'], dtype='object')

In [50]:
# replace original column names
job_history.columns = job_history.columns.str.upper()
job_history.head(2)

Unnamed: 0,EMPL_ID,START_DATE,DEPT_ID,JOBS_ID
0,E1001,08/01/2000,100,2
1,E1002,08/01/2001,200,5


In [51]:
# Reviewing SQL table's data types
describe[['Field','Type']]

Unnamed: 0,Field,Type
0,EMPL_ID,char(9)
1,START_DATE,date
2,created_at,datetime
3,updated_at,datetime
4,DEPT_ID,char(9)
5,JOBS_ID,char(9)


In [52]:
# Reviewing dataframe's data types
job_history.dtypes

EMPL_ID       object
START_DATE    object
DEPT_ID        int64
JOBS_ID        int64
dtype: object

In [53]:
# Converting START_DATE to datetime dtype
job_history['START_DATE'] = pd.to_datetime(job_history['START_DATE'])
job_history.dtypes

EMPL_ID               object
START_DATE    datetime64[ns]
DEPT_ID                int64
JOBS_ID                int64
dtype: object

In [54]:

job_history.to_sql("job_history",engine,index=False, if_exists='append')

10

In [55]:
# confirm the data has been added
q = """SELECT * FROM job_history;"""
pd.read_sql(q,engine)

Unnamed: 0,EMPL_ID,START_DATE,created_at,updated_at,DEPT_ID,JOBS_ID
0,E1001,2000-08-01,,,100,2
1,E1002,2001-08-01,,,200,5
2,E1003,2001-08-16,,,300,5
3,E1004,2000-08-16,,,400,5
4,E1005,2000-05-30,,,500,2
5,E1006,2001-08-16,,,600,2
6,E1007,2002-05-30,,,650,7
7,E1008,2010-05-06,,,660,7
8,E1009,2016-08-16,,,234,7
9,E1010,2016-08-16,,,220,5


In [56]:
# check the describe again to confirm no changes
q = """DESCRIBE job_history;"""
pd.read_sql(q,engine)

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,EMPL_ID,char(9),NO,PRI,,
1,START_DATE,date,YES,,,
2,created_at,datetime,YES,,,
3,updated_at,datetime,YES,,,
4,DEPT_ID,char(9),YES,MUL,,
5,JOBS_ID,char(9),YES,MUL,,


## Running Test Query To Verify Data
- Display the first name, last name, job title, start date and deparment of Alice James.

In [57]:

q = '''
SELECT e.f_name, e.l_name, j.job_title, jh.start_date, d.dep_name
FROM employee e
JOIN department d ON e.dep_id = d.dept_id_dep
JOIN job j ON e.job_id = j.job_ident
JOIN job_history jh ON e.emp_id = jh.empl_id
WHERE e.f_name = 'Alice' AND e.l_name = 'James';
    '''
pd.read_sql(q, engine)

OperationalError: (pymysql.err.OperationalError) (1054, "Unknown column 'd.dep_name' in 'field list'")
[SQL: 
SELECT e.f_name, e.l_name, j.job_title, jh.start_date, d.dep_name
FROM employee e
JOIN department d ON e.dep_id = d.dept_id_dep
JOIN job j ON e.job_id = j.job_ident
JOIN job_history jh ON e.emp_id = jh.empl_id
WHERE e.f_name = 'Alice' AND e.l_name = 'James';
    ]
(Background on this error at: https://sqlalche.me/e/14/e3q8)