## Task - Setup HR DB

As part of this task you need to setup the HR Database with required tables so that you can take care of exercises based upon HR Database. If you run into any issue, reach out to the support staff.
* We will be creating departments and employees table.
* **departments** is the parent table as each department can have multiple employees.

In [70]:
!ls -ltr /data/hr_db/

total 116
drwxr-xr-x 2 root root  4096 Jan 21  2021 regions
drwxr-xr-x 2 root root  4096 Jan 21  2021 locations
-rw-r--r-- 1 root root 32719 Jan 21  2021 load_data.sql
-rw-r--r-- 1 root root 30266 Jan 21  2021 load_data_pg.sql
drwxr-xr-x 2 root root  4096 Jan 21  2021 jobs
drwxr-xr-x 2 root root  4096 Jan 21  2021 job_history
drwxr-xr-x 2 root root  4096 Jan 21  2021 employees
drwxr-xr-x 2 root root  4096 Jan 21  2021 emp_details_view
-rw-r--r-- 1 root root   207 Jan 21  2021 drop_tables_pg.sql
drwxr-xr-x 2 root root  4096 Jan 21  2021 departments
-rw-r--r-- 1 root root  4380 Jan 21  2021 create_tables_pg.sql
-rw-r--r-- 1 root root  4810 Jan 21  2021 create_tables_mysql.sql
drwxr-xr-x 2 root root  4096 Jan 21  2021 countries


In [71]:
!ls -ltr /data/hr_db/employees

total 12
-rw-r--r-- 1 root root 8252 Jan 21  2021 part-m-00000


In [72]:
!ls -ltr /data/hr_db/departments

total 4
-rw-r--r-- 1 root root 608 Jan 21  2021 part-m-00000


In [73]:
%run 00_setup_database_variables.ipynb

In [74]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [75]:
%env DATABASE_URL=postgresql://{username}_hr_user:{password}@{postgres_host}:{postgres_port}/{username}_hr_db

env: DATABASE_URL=postgresql://itv002461_hr_user:7ji8g7gg8p8olbqbna5vz1tjyikaixco@m01.itversity.com:5433/itv002461_hr_db


In [76]:
%%sql

DROP TABLE IF EXISTS employees CASCADE;
DROP TABLE IF EXISTS departments CASCADE;

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
Done.
Done.


[]

In [77]:
%%sql

CREATE TABLE departments
   ( department_id INTEGER
   , department_name VARCHAR(30)
   , manager_id INTEGER
   , location_id INTEGER
   ) ;

CREATE UNIQUE INDEX dept_id_pk
         ON departments (department_id) ;

ALTER TABLE departments ADD
        PRIMARY KEY (department_id);

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
Done.
Done.
Done.


[]

In [78]:
%%sql

CREATE TABLE employees
   ( employee_id INTEGER
   , first_name VARCHAR(20)
   , last_name VARCHAR(25)
   , email VARCHAR(25)
   , phone_number VARCHAR(20)
   , hire_date DATE
   , job_id VARCHAR(10)
   , salary NUMERIC(8,2)
   , commission_pct NUMERIC(2,2)
   , manager_id INTEGER
   , department_id INTEGER
   ) ;

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
Done.


[]

In [79]:
%%sql

CREATE UNIQUE INDEX emp_emp_id_pk
         ON employees (employee_id) ;
ALTER TABLE employees ADD
   PRIMARY KEY (employee_id) ;

ALTER TABLE employees ADD
   FOREIGN KEY (department_id)
   REFERENCES departments (department_id);

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
Done.
Done.
Done.


[]

In [80]:
import psycopg2
import pandas as pd
import math

In [81]:
connection = psycopg2.connect(
    host=postgres_host,
    port=postgres_port,
    database=f'{username}_hr_db',
    user=f'{username}_hr_user',
    password=password
)

In [82]:
departments = pd.read_csv('/data/hr_db/departments/part-m-00000', sep='\t', header=None)

In [83]:
departments_ins_stmt = '''
INSERT INTO departments 
    (department_id, department_name, manager_id, location_id
    ) 
VALUES 
    (%s, %s, %s, %s
    )
'''
cursor = connection.cursor()

In [84]:
def convert_dept_dtype(r):
    l = list(r)
    manager_id = int(l[2]) if not math.isnan(float(l[2])) else None
    location_id = l[3]
    l1 = l[:2]
    l1.append(manager_id)
    l1.append(location_id)
    return l1

In [85]:
department_list = list(map(lambda r: tuple(convert_dept_dtype(r)), departments.values))

In [86]:
cursor.executemany(departments_ins_stmt, department_list)

In [87]:
connection.commit()

In [88]:
employees = pd.read_csv('/data/hr_db/employees/part-m-00000', sep='\t', header=None)

In [89]:
employees_ins_stmt = '''
INSERT INTO employees 
    (employee_id, first_name, last_name, email, phone_number,
     hire_date, job_id, salary, commission_pct, manager_id, department_id
    ) 
VALUES 
    (%s, %s, %s, %s, %s,
     %s, %s, %s, %s, %s, %s
    )
'''
cursor = connection.cursor()

In [90]:
def convert_emp_dtype(r):
    l = list(r)
    manager_id = int(l[9]) if not math.isnan(float(l[9])) else None
    department_id = int(l[10]) if not math.isnan(float(l[10])) else None
    l1 = l[:9]
    l1.append(manager_id)
    l1.append(department_id)
    return l1

In [91]:
employees_list = list(map(lambda r: tuple(convert_emp_dtype(r)), employees.values))

In [92]:
cursor.executemany(employees_ins_stmt, employees_list)

In [93]:
connection.commit()

In [94]:
connection.close()

* Here are the validation steps to confirm we have both departments and employees are populated.
* If validation is successful, you can start taking next few exercises based up on these data sets.

In [95]:
%%sql

SELECT * FROM departments LIMIT 100

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
27 rows affected.


department_id,department_name,manager_id,location_id
10,Administration,200.0,1700
20,Marketing,201.0,1800
30,Purchasing,114.0,1700
40,Human Resources,203.0,2400
50,Shipping,121.0,1500
60,IT,103.0,1400
70,Public Relations,204.0,2700
80,Sales,145.0,2500
90,Executive,100.0,1700
100,Finance,108.0,1700


In [96]:
%%sql

SELECT * FROM employees LIMIT 10

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
10 rows affected.


employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,,,90
101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,,100.0,90
102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,,100.0,90
103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,,102.0,60
104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,,103.0,60
105,David,Austin,DAUSTIN,590.423.4569,1997-06-25,IT_PROG,4800.0,,103.0,60
106,Valli,Pataballa,VPATABAL,590.423.4560,1998-02-05,IT_PROG,4800.0,,103.0,60
107,Diana,Lorentz,DLORENTZ,590.423.5567,1999-02-07,IT_PROG,4200.0,,103.0,60
108,Nancy,Greenberg,NGREENBE,515.124.4569,1994-08-17,FI_MGR,12000.0,,101.0,100
109,Daniel,Faviet,DFAVIET,515.124.4169,1994-08-16,FI_ACCOUNT,9000.0,,108.0,100


In [97]:
%%sql

SELECT DISTINCT department_id FROM employees

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
12 rows affected.


department_id
""
70.0
80.0
60.0
40.0
30.0
10.0
90.0
50.0
100.0


In [98]:
%%sql

INSERT INTO employees(employee_id, department_id)
VALUES (2000, null)

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
1 rows affected.


[]

In [99]:
%%sql

INSERT INTO departments (department_id)
VALUES (300)

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
1 rows affected.


[]

In [100]:
%%sql

UPDATE employees SET department_id = 300
WHERE employee_id = 2000

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
1 rows affected.


[]

In [101]:
%%sql

SELECT * FROM employees WHERE employee_id = 2000

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
1 rows affected.


employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
2000,,,,,,,,,,300


In [102]:
%%sql

UPDATE employees SET department_id = 110
WHERE employee_id = 2000

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
1 rows affected.


[]

In [103]:
#%%sql

#DELETE FROM departments WHERE department_id = 300

In [104]:
%%sql

INSERT INTO employees(employee_id, department_id)
VALUES (2001, 300)

 * postgresql://itv002461_hr_user:***@m01.itversity.com:5433/itv002461_hr_db
1 rows affected.


[]