# An Integrated HR Data Warehouse for Employee Retention and Performance Insights

## Group 6

### 1. Data Extraction

In [None]:
# !pip install --upgrade sqlalchemy==1.4.52

Collecting sqlalchemy==1.4.52
  Downloading SQLAlchemy-1.4.52-cp311-cp311-macosx_10_9_universal2.whl.metadata (10 kB)
Downloading SQLAlchemy-1.4.52-cp311-cp311-macosx_10_9_universal2.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sqlalchemy
  Attempting uninstall: sqlalchemy
    Found existing installation: SQLAlchemy 2.0.40
    Uninstalling SQLAlchemy-2.0.40:
      Successfully uninstalled SQLAlchemy-2.0.40
Successfully installed sqlalchemy-1.4.52


In [2]:
import pandas as pd
from sqlalchemy import create_engine

In [3]:
df = pd.read_csv('/Users/wangxd/Desktop/M.S. of AA/APAN 5310 SQL/5310_Project/Employee.csv')
df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,4,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,1,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1470 entries, 0 to 1469
Data columns (total 35 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Age                       1470 non-null   int64 
 1   Attrition                 1470 non-null   object
 2   BusinessTravel            1470 non-null   object
 3   DailyRate                 1470 non-null   int64 
 4   Department                1470 non-null   object
 5   DistanceFromHome          1470 non-null   int64 
 6   Education                 1470 non-null   int64 
 7   EducationField            1470 non-null   object
 8   EmployeeCount             1470 non-null   int64 
 9   EmployeeNumber            1470 non-null   int64 
 10  EnvironmentSatisfaction   1470 non-null   int64 
 11  Gender                    1470 non-null   object
 12  HourlyRate                1470 non-null   int64 
 13  JobInvolvement            1470 non-null   int64 
 14  JobLevel                

### Create database tables

In [5]:
conn_url = 'postgresql://postgres:123@localhost:5432/53_Project'

engine = create_engine(conn_url)

connection = engine.connect()

In [8]:
stmt = """
DROP TABLE IF EXISTS SalaryComponents;
DROP TABLE IF EXISTS AttritionStatus;
DROP TABLE IF EXISTS EmployeeAsset;
DROP TABLE IF EXISTS WorkSchedule;
DROP TABLE IF EXISTS WorkExperience;
DROP TABLE IF EXISTS Satisfaction;
DROP TABLE IF EXISTS Performance;
DROP TABLE IF EXISTS Income;
DROP TABLE IF EXISTS Education;
DROP TABLE IF EXISTS WorkAssignments;
DROP TABLE IF EXISTS Employees;
DROP TABLE IF EXISTS Departments;
DROP TABLE IF EXISTS JobRoles;
DROP TABLE IF EXISTS EducationFields;


CREATE TABLE Departments (
    DepartmentID INT PRIMARY KEY,
    DepartmentName VARCHAR(50) NOT NULL UNIQUE
);

CREATE TABLE JobRoles (
    JobRoleID INT PRIMARY KEY,
    JobRoleName VARCHAR(100) NOT NULL UNIQUE
);

CREATE TABLE EducationFields (
    EducationFieldID INT PRIMARY KEY,
    EducationFieldName VARCHAR(100) NOT NULL UNIQUE
);

CREATE TABLE Employees (
    EmployeeNumber INT PRIMARY KEY,
    Age INT NOT NULL CHECK (Age >= 18),
    Gender VARCHAR(10) NOT NULL CHECK (Gender IN ('Male', 'Female', 'Other')),
    MaritalStatus VARCHAR(20),
    DepartmentID INT NOT NULL,
    FOREIGN KEY (DepartmentID) REFERENCES Departments(DepartmentID)
);

CREATE TABLE WorkAssignments (
    EmployeeNumber INT PRIMARY KEY,
    JobRoleID INT NOT NULL,
    BusinessTravel VARCHAR(50),
    JobLevel INT,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE,
    FOREIGN KEY (JobRoleID) REFERENCES JobRoles(JobRoleID)
);

CREATE TABLE Education (
    EmployeeNumber INT PRIMARY KEY,
    EducationLevel INT NOT NULL,
    EducationFieldID INT NOT NULL,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE,
    FOREIGN KEY (EducationFieldID) REFERENCES EducationFields(EducationFieldID)
);

CREATE TABLE SalaryComponents (
    EmployeeNumber INT PRIMARY KEY,
    DailyRate FLOAT,
    HourlyRate FLOAT,
    MonthlyRate FLOAT NOT NULL,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE
);

CREATE TABLE Income (
    EmployeeNumber INT PRIMARY KEY,
    MonthlyIncome FLOAT,
    PercentSalaryHike INT,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE
);

CREATE TABLE Performance (
    EmployeeNumber INT PRIMARY KEY,
    PerformanceRating INT,
    TrainingTimesLastYear INT,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE
);

CREATE TABLE Satisfaction (
    EmployeeNumber INT PRIMARY KEY,
    EnvironmentSatisfaction INT,
    JobSatisfaction INT,
    RelationshipSatisfaction INT,
    WorkLifeBalance INT,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE
);

CREATE TABLE WorkExperience (
    EmployeeNumber INT PRIMARY KEY,
    NumCompaniesWorked INT,
    TotalWorkingYears INT,
    YearsAtCompany INT,
    YearsInCurrentRole INT,
    YearsSinceLastPromotion INT,
    YearsWithCurrManager INT,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE
);

CREATE TABLE WorkSchedule (
    EmployeeNumber INT PRIMARY KEY,
    OverTime VARCHAR(5),
    DistanceFromHome INT,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE
);

CREATE TABLE EmployeeAsset (
    EmployeeNumber INT PRIMARY KEY,
    StockOptionLevel INT,
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE
);

CREATE TABLE AttritionStatus (
    EmployeeNumber INT PRIMARY KEY,
    Attrition VARCHAR(10) NOT NULL CHECK (Attrition IN ('Yes', 'No')),
    FOREIGN KEY (EmployeeNumber) REFERENCES Employees(EmployeeNumber) ON DELETE CASCADE
);

"""

connection.execute(stmt)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x13e0a4a50>

### 2. Transform and Loading

### 3. TBD 