## Task - Setup HR DB

As part of this task you need to setup the HR Database with required tables so that you can take care of exercises based upon HR Database. If you run into any issue, reach out to the support staff.
* We will be creating departments and employees table.
* **departments** is the parent table as each department can have multiple employees.

In [None]:
!ls -ltr /data/hr_db/

In [None]:
!ls -ltr /data/hr_db/employees

In [None]:
!ls -ltr /data/hr_db/departments

In [1]:
%load_ext sql

In [2]:
%env DATABASE_URL=postgresql://deepan:DB_PASSWORD@localhost:5432/itversity_hr_db

env: DATABASE_URL=postgresql://deepan:DB_PASSWORD@localhost:5432/itversity_hr_db


In [3]:
%%sql

DROP TABLE IF EXISTS employees CASCADE;
DROP TABLE IF EXISTS departments CASCADE;

Done.
Done.


[]

In [4]:
%%sql

CREATE TABLE departments
   ( department_id INTEGER
   , department_name VARCHAR(30)
   , manager_id INTEGER
   , location_id INTEGER
   ) ;

CREATE UNIQUE INDEX dept_id_pk
         ON departments (department_id) ;

ALTER TABLE departments ADD
        PRIMARY KEY (department_id);

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
Done.
Done.
Done.


[]

In [5]:
%%sql

CREATE TABLE employees
   ( employee_id INTEGER
   , first_name VARCHAR(20)
   , last_name VARCHAR(25)
   , email VARCHAR(25)
   , phone_number VARCHAR(20)
   , hire_date DATE
   , job_id VARCHAR(10)
   , salary NUMERIC(8,2)
   , commission_pct NUMERIC(2,2)
   , manager_id INTEGER
   , department_id INTEGER
   ) ;

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
Done.


[]

In [6]:
%%sql

CREATE UNIQUE INDEX emp_emp_id_pk
         ON employees (employee_id) ;
ALTER TABLE employees ADD
   PRIMARY KEY (employee_id) ;

ALTER TABLE employees ADD
   FOREIGN KEY (department_id)
   REFERENCES departments (department_id);

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
Done.
Done.
Done.


[]

In [7]:
import psycopg2
import pandas as pd
import math

In [8]:
connection = psycopg2.connect(
    host='localhost',
    port='5432',
    database='itversity_hr_db',
    user='deepan',
    password='DB_PASSWORD'
)

In [9]:
departments = pd.read_csv(r'D:\BIGDATA_LEARN\bigdata-learn\data\hr_db\departments\part-m-00000', sep='\t', header=None)

In [10]:
dep1 = pd.read_csv(r'D:\BIGDATA_LEARN\bigdata-learn\data\hr_db\departments\part-m-00001', sep='\t', header=None)
dep2 = pd.read_csv(r'D:\BIGDATA_LEARN\bigdata-learn\data\hr_db\departments\part-m-00002', sep='\t', header=None)
dep3 = pd.read_csv(r'D:\BIGDATA_LEARN\bigdata-learn\data\hr_db\departments\part-m-00003', sep='\t', header=None)

In [11]:
departments_ins_stmt = '''
INSERT INTO departments 
    (department_id, department_name, manager_id, location_id
    ) 
VALUES 
    (%s, %s, %s, %s
    )
'''
cursor = connection.cursor()

In [12]:
def convert_dept_dtype(r):
    l = list(r)
    manager_id = int(l[2]) if not math.isnan(float(l[2])) else None
    location_id = l[3]
    l1 = l[:2]
    l1.append(manager_id)
    l1.append(location_id)
    return l1

In [13]:
department_list = list(map(lambda r: tuple(convert_dept_dtype(r)), departments.values))
department_list_1 = list(map(lambda r: tuple(convert_dept_dtype(r)), dep1.values))
department_list_2 = list(map(lambda r: tuple(convert_dept_dtype(r)), dep2.values))
department_list_3 = list(map(lambda r: tuple(convert_dept_dtype(r)), dep3.values))

In [14]:
department_list

[(10, 'Administration', 200, 1700),
 (20, 'Marketing', 201, 1800),
 (30, 'Purchasing', 114, 1700),
 (40, 'Human Resources', 203, 2400),
 (50, 'Shipping', 121, 1500),
 (60, 'IT', 103, 1400),
 (70, 'Public Relations', 204, 2700)]

In [15]:
cursor.executemany(departments_ins_stmt, department_list)
connection.commit()
cursor.executemany(departments_ins_stmt, department_list_1)
connection.commit()
cursor.executemany(departments_ins_stmt, department_list_2)
connection.commit()
cursor.executemany(departments_ins_stmt, department_list_3)

In [16]:
connection.commit()

In [17]:
employees = pd.read_csv(r'D:\BIGDATA_LEARN\bigdata-learn\data\hr_db\employees\part-00000.csv', sep='\t', header=None)

In [18]:
employees_ins_stmt = '''
INSERT INTO employees 
    (employee_id, first_name, last_name, email, phone_number,
     hire_date, job_id, salary, commission_pct, manager_id, department_id
    ) 
VALUES 
    (%s, %s, %s, %s, %s,
     %s, %s, %s, %s, %s, %s
    )
'''
cursor = connection.cursor()

In [19]:
def convert_emp_dtype(r):
    l = list(r)
    manager_id = int(l[9]) if not math.isnan(float(l[9])) else None
    department_id = int(l[10]) if not math.isnan(float(l[10])) else None
    l1 = l[:9]
    l1.append(manager_id)
    l1.append(department_id)
    return l1

In [20]:
employees_list = list(map(lambda r: tuple(convert_emp_dtype(r)), employees.values))

In [21]:
employees_list

[(100,
  'Steven',
  'King',
  'SKING',
  '515.123.4567',
  '1987-06-17',
  'AD_PRES',
  24000.0,
  nan,
  None,
  90),
 (101,
  'Neena',
  'Kochhar',
  'NKOCHHAR',
  '515.123.4568',
  '1989-09-21',
  'AD_VP',
  17000.0,
  nan,
  100,
  90),
 (102,
  'Lex',
  'De Haan',
  'LDEHAAN',
  '515.123.4569',
  '1993-01-13',
  'AD_VP',
  17000.0,
  nan,
  100,
  90),
 (103,
  'Alexander',
  'Hunold',
  'AHUNOLD',
  '590.423.4567',
  '1990-01-03',
  'IT_PROG',
  9000.0,
  nan,
  102,
  60),
 (104,
  'Bruce',
  'Ernst',
  'BERNST',
  '590.423.4568',
  '1991-05-21',
  'IT_PROG',
  6000.0,
  nan,
  103,
  60),
 (105,
  'David',
  'Austin',
  'DAUSTIN',
  '590.423.4569',
  '1997-06-25',
  'IT_PROG',
  4800.0,
  nan,
  103,
  60),
 (106,
  'Valli',
  'Pataballa',
  'VPATABAL',
  '590.423.4560',
  '1998-02-05',
  'IT_PROG',
  4800.0,
  nan,
  103,
  60),
 (107,
  'Diana',
  'Lorentz',
  'DLORENTZ',
  '590.423.5567',
  '1999-02-07',
  'IT_PROG',
  4200.0,
  nan,
  103,
  60),
 (108,
  'Nancy',
  'Green

In [22]:
cursor.executemany(employees_ins_stmt, employees_list)

In [23]:
connection.commit()

In [24]:
connection.close()

* Here are the validation steps to confirm we have both departments and employees are populated.
* If validation is successful, you can start taking next few exercises based up on these data sets.

In [25]:
%%sql

SELECT * FROM departments LIMIT 100

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
27 rows affected.


department_id,department_name,manager_id,location_id
10,Administration,200.0,1700
20,Marketing,201.0,1800
30,Purchasing,114.0,1700
40,Human Resources,203.0,2400
50,Shipping,121.0,1500
60,IT,103.0,1400
70,Public Relations,204.0,2700
80,Sales,145.0,2500
90,Executive,100.0,1700
100,Finance,108.0,1700


In [26]:
%%sql

SELECT * FROM employees LIMIT 10

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
10 rows affected.


employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,,,90
101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,,100.0,90
102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,,100.0,90
103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,,102.0,60
104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,,103.0,60
105,David,Austin,DAUSTIN,590.423.4569,1997-06-25,IT_PROG,4800.0,,103.0,60
106,Valli,Pataballa,VPATABAL,590.423.4560,1998-02-05,IT_PROG,4800.0,,103.0,60
107,Diana,Lorentz,DLORENTZ,590.423.5567,1999-02-07,IT_PROG,4200.0,,103.0,60
108,Nancy,Greenberg,NGREENBE,515.124.4569,1994-08-17,FI_MGR,12000.0,,101.0,100
109,Daniel,Faviet,DFAVIET,515.124.4169,1994-08-16,FI_ACCOUNT,9000.0,,108.0,100


In [27]:
%%sql

SELECT DISTINCT department_id FROM employees

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
12 rows affected.


department_id
""
70.0
80.0
60.0
40.0
30.0
10.0
90.0
50.0
100.0


In [28]:
%%sql

INSERT INTO employees(employee_id, department_id)
VALUES (2000, null)

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
1 rows affected.


[]

In [29]:
%%sql

INSERT INTO departments (department_id)
VALUES (300)

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
1 rows affected.


[]

In [30]:
%%sql

UPDATE employees SET department_id = 300
WHERE employee_id = 2000

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
1 rows affected.


[]

In [31]:
%%sql

SELECT * FROM employees WHERE employee_id = 2000

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
1 rows affected.


employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
2000,,,,,,,,,,300


In [32]:
%%sql

UPDATE employees SET department_id = 110
WHERE employee_id = 2000

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
1 rows affected.


[]

In [36]:
%%sql

SELECT * FROM employees WHERE employee_id = 2000

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
1 rows affected.


employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
2000,,,,,,,,,,110


In [33]:
%%sql

DELETE FROM departments WHERE department_id = 300

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
1 rows affected.


[]

In [34]:
%%sql 

commit;

 * postgresql://deepan:***@localhost:5432/itversity_hr_db
Done.


[]

In [35]:
%%sql

INSERT INTO employees(employee_id, department_id)
VALUES (2001, 300)

 * postgresql://deepan:***@localhost:5432/itversity_hr_db


IntegrityError: (psycopg2.errors.ForeignKeyViolation) insert or update on table "employees" violates foreign key constraint "employees_department_id_fkey"
DETAIL:  Key (department_id)=(300) is not present in table "departments".

[SQL: INSERT INTO employees(employee_id, department_id)
VALUES (2001, 300)]
(Background on this error at: https://sqlalche.me/e/14/gkpj)