In [3]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float
import pandas as pd
import numpy as np
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
import random
from random import randint
import string

# PostgreSQL info:
import psycopg2
from config import PGSQLDB_PSWD
PASSWORD = PGSQLDB_PSWD

In [2]:
df_cases = pd.read_csv('HR_Datasets/human-resources-data-set/IT-Help-Desk-data.csv')
df_cases = df_cases.rename(columns={'ITOwner':'CaseOwner','ticketID':'CaseID','requestor':'RequestorID','daysOpen':'DateReceived','FiledAgainst':'CaseType','RequestorSeniority':'RequestorType','TicketType':'CaseStatus','Severity':'TierLevel','Satisfaction':'SatisfactionScore'})
df_cases.head()

Unnamed: 0,CaseID,RequestorID,RequestorType,CaseOwner,CaseType,CaseStatus,TierLevel,Priority,DateReceived,SatisfactionScore
0,1,1929,1 - Junior,50,Systems,Issue,2 - Normal,0 - Unassigned,3,1 - Unsatisfied
1,2,1587,2 - Regular,15,Software,Request,1 - Minor,1 - Low,5,1 - Unsatisfied
2,3,925,2 - Regular,15,Access/Login,Request,2 - Normal,0 - Unassigned,0,0 - Unknown
3,4,413,4 - Management,22,Systems,Request,2 - Normal,0 - Unassigned,20,0 - Unknown
4,5,318,1 - Junior,22,Access/Login,Request,2 - Normal,1 - Low,1,1 - Unsatisfied


In [3]:
df_cases['CaseID'] = df_cases['CaseID'].apply(str)
df_cases['RequestorID'] = df_cases['RequestorID'].apply(str)
df_cases['CaseOwner'] = df_cases['CaseOwner'].apply(str)
df_cases['DateReceived'] = pd.to_datetime(df_cases['DateReceived'])

df_cases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 10 columns):
CaseID               100000 non-null object
RequestorID          100000 non-null object
RequestorType        100000 non-null object
CaseOwner            100000 non-null object
CaseType             100000 non-null object
CaseStatus           100000 non-null object
TierLevel            100000 non-null object
Priority             100000 non-null object
DateReceived         100000 non-null datetime64[ns]
SatisfactionScore    100000 non-null object
dtypes: datetime64[ns](1), object(9)
memory usage: 7.6+ MB


In [4]:
HR_Specialists = ['tSjw2hYY','Qwsfd5ND','nbRWtYc6','vuo1zJHG','oDoCidPt','XLbl6V9K','m5kYwx5P','3vGwUNjj','QFu2Ydv1','qabMRJqu']

tierMap = {'tSjw2hYY':'Tier 1','Qwsfd5ND':'Tier 1','nbRWtYc6':'Tier 1','vuo1zJHG':'Tier 1','oDoCidPt':'Tier 1',
           'XLbl6V9K':'Tier 2','m5kYwx5P':'Tier 2','3vGwUNjj':'Tier 2','QFu2Ydv1':'Tier 2','qabMRJqu':'Tier 2'}

deptMap = {'tSjw2hYY':'Recruitment & Selection','Qwsfd5ND':'Benefits & Compensation','nbRWtYc6':'Training & Development',
          'vuo1zJHG':'Attrition & Retention','oDoCidPt':'Employee Relations','XLbl6V9K':'Recruitment & Selection',
          'm5kYwx5P':'Benefits & Compensation','3vGwUNjj':'Training & Development','QFu2Ydv1':'Attrition & Retention',
          'qabMRJqu':'Employee Relations'}

df_cases['CaseOwner'] = np.random.choice(list(HR_Specialists), len(df_cases))
df_cases['DateClosed'] = df_cases['DateReceived']

df_cases['CaseType'] = df_cases.CaseOwner.map(deptMap)
df_cases["TierLevel"] = df_cases["CaseOwner"].map(tierMap)

df_cases["RequestorType"]= df_cases["RequestorType"].replace("1 - Junior", "1 - Intern")
df_cases["RequestorType"]= df_cases["RequestorType"].replace("3 - Senior", "3 - Non-staff")
df_cases["Priority"]= df_cases["Priority"].replace("0 - Unassigned", "0 - None")

df_cases.head()

Unnamed: 0,CaseID,RequestorID,RequestorType,CaseOwner,CaseType,CaseStatus,TierLevel,Priority,DateReceived,SatisfactionScore,DateClosed
0,1,1929,1 - Intern,XLbl6V9K,Recruitment & Selection,Issue,Tier 2,0 - None,1970-01-01 00:00:00.000000003,1 - Unsatisfied,1970-01-01 00:00:00.000000003
1,2,1587,2 - Regular,Qwsfd5ND,Benefits & Compensation,Request,Tier 1,1 - Low,1970-01-01 00:00:00.000000005,1 - Unsatisfied,1970-01-01 00:00:00.000000005
2,3,925,2 - Regular,m5kYwx5P,Benefits & Compensation,Request,Tier 2,0 - None,1970-01-01 00:00:00.000000000,0 - Unknown,1970-01-01 00:00:00.000000000
3,4,413,4 - Management,XLbl6V9K,Recruitment & Selection,Request,Tier 2,0 - None,1970-01-01 00:00:00.000000020,0 - Unknown,1970-01-01 00:00:00.000000020
4,5,318,1 - Intern,bRWtYc6,Training & Development,Request,Tier 1,1 - Low,1970-01-01 00:00:00.000000001,1 - Unsatisfied,1970-01-01 00:00:00.000000001


In [5]:
Inactive_EE_Num = [1307060199, 1005019209, 1001417624, 1307060083, 1312063507, 1212051962, 1102024056, 1008021030, 1303054580, 
1406068293, 1504073368, 1101023394, 1012023226, 1209048697, 1001450968, 1306058816, 1104025179, 1406067865, 
1104025243, 1107027392, 1401064670, 1412071562, 1502072511, 1109029103, 1012023204, 1308060622, 1302053046, 
1008020960, 905013738, 1411071324, 1103024859, 1001167253, 1110029602, 1011022932, 1410071156, 1001944783, 
1207046956, 1102024057, 1306058509, 1403065874, 1105025721, 1107027450, 1109029264, 710007401, 903013071, 
1010022030, 1104025486, 1408069409, 1107027575, 1206042315, 1305056276, 1001138521, 1201031032, 1304055986, 
1409070245, 1205033439, 1204033041, 1204033041, 1206044851, 1001856521, 1109029531, 1112030979, 1102023965, 
1307060058, 1202031821, 1109029186, 1411071406, 1402065355, 1305057440, 1102024274, 1011022926, 1206043417, 
1208048229, 1211050793, 1402065340, 1499902991, 1206038000, 1403065625, 1111030266, 1405067492, 1001175250, 
1308060535, 1111030148, 1012023103, 711007713, 1405067138, 1405067501, 1406068345, 1404066739, 1011022777, 
1103024504, 1104025466, 1107027551, 1203032235, 1011022814, 1410071026, 1101023619, 1307060212, 1101023839, 
1308060671, 1405067188, 1001268402]

Active_EE_Num = [1409070522, 1011022883, 1501072311, 1304055947, 1107027358, 1301052124, 1212051409, 1212052023, 1101023679, 
1303054329, 1403066194, 1110029777, 1109029366, 1412071844, 1308060366, 1403066125, 1307060188, 1006020066, 
1103024456, 1101023457, 1504073313, 1408069635, 1011022820, 1301052449, 1110029990, 1101023577, 1311063114, 
1010022337, 1403065721, 1208048062, 1009919920, 1305057282, 1111030129, 1012023295, 1102024173, 1301052902, 
1407069280, 1599991009, 1403066069, 1409070567, 1311063172, 1402065303, 1411071312, 1001109612, 1201031274, 
1203032498, 1501072124, 706006285, 1408069481, 1306059197, 1209048696, 1202031618, 1101023540, 1101023754, 
1409070147, 1007020403, 1101023612, 1105025661, 1108028428, 1402065085, 1302053339, 803009012, 1006020020, 
1192991000, 1201031308, 1411071302, 1204032843, 1501072093, 1309061015, 1104025414, 1105026041, 1501072192, 
1204032927, 1203032099, 1408069539, 1411071481, 1411071212, 1988299991, 1001549006, 1106026579, 1108028351, 
1312063675, 1307060077, 1405067064, 1307059937, 1406068241, 1404066622, 1002017900, 1105025718, 1009021646, 
1211050782, 1009920000, 1106026433, 1103024924, 1408069882, 1201031438, 1503072857, 1008020942, 1412071713, 
1003018246, 1306057810, 1301052462, 1104025008, 1001495124, 1304055683, 1311062610, 1108028108, 1304055987, 
1106026572, 1009919930, 1102024115, 1412071660, 1103024679, 1405067565, 602000312, 1001504432, 1101023353, 
1209049259, 1110029623, 1308060754, 1303054625, 1209048771, 1401064327, 1406067957, 909015167, 1209049326, 
1109029256, 1012023013, 1107027351, 1404066711, 1011022863, 1001103149, 1408069503, 1001956578, 1102024121, 
1301052436, 1306057978, 1406068403, 1009919960, 1302053044, 1403066020, 1203032357, 1111030684, 1104025435, 
1501072180, 1404066949, 1103024335, 812011761, 1307059944, 1103024843, 1108027853, 1001735072, 1102024106, 
1001084890, 1401064562, 1009919940, 1502072711, 1405067642, 1011022887, 1407068885, 1203032255, 1009919950, 
710007555, 1106026933, 1012023185, 1410070998, 1106026462, 1302053362, 904013591, 807010161, 906014183, 
1205033102, 808010278, 1307059817, 1501071909, 1499902910, 1001970770, 1009919980, 1203032263, 1308060959, 
1410071137, 1102024149, 1111030244, 1000974650, 1302053333, 1106026896, 1411071295, 1405067298, 1201031310, 
1407069061, 1001644719, 1201031324, 1401064637, 1409070255, 1012023152, 1411071506, 1312063714, 1111030503, 
1106026474, 1011022818, 1009919970, 1301052347, 1009919990, 1205033180, 1012023010, 1112030816, 1110029732, 1211051232]

df_cases['RequestorID'] = np.where(df_cases['RequestorType'] == "3 - Non-staff", np.random.choice(list(Inactive_EE_Num), len(df_cases)), np.random.choice(list(Active_EE_Num), len(df_cases)))
df_cases.tail(5)

Unnamed: 0,CaseID,RequestorID,RequestorType,CaseOwner,CaseType,CaseStatus,TierLevel,Priority,DateReceived,SatisfactionScore,DateClosed
99995,99996,1209049259,2 - Regular,tSjw2hYY,Recruitment & Selection,Issue,Tier 1,1 - Low,1970-01-01 00:00:00.000000004,1 - Unsatisfied,1970-01-01 00:00:00.000000004
99996,99997,1007020403,4 - Management,oDoCidPt,Employee Relations,Request,Tier 1,2 - Medium,1970-01-01 00:00:00.000000000,0 - Unknown,1970-01-01 00:00:00.000000000
99997,99998,1110029623,1 - Intern,bRWtYc6,Training & Development,Issue,Tier 1,0 - None,1970-01-01 00:00:00.000000000,1 - Unsatisfied,1970-01-01 00:00:00.000000000
99998,99999,1107027351,2 - Regular,oDoCidPt,Employee Relations,Request,Tier 1,0 - None,1970-01-01 00:00:00.000000016,3 - Highly satisfied,1970-01-01 00:00:00.000000016
99999,100000,1104025414,2 - Regular,oDoCidPt,Employee Relations,Issue,Tier 1,1 - Low,1970-01-01 00:00:00.000000006,2 - Satisfied,1970-01-01 00:00:00.000000006


In [6]:
df_cases['CaseStatus'] = np.where(df_cases['TierLevel'] == "Tier 2", 'Under Review', df_cases['CaseStatus'])
df_cases['CaseStatus'] = np.where(df_cases['TierLevel'] == "Tier 1", 'Open', df_cases['CaseStatus'])
df_cases['CaseStatus'] = np.where(df_cases['SatisfactionScore'] != "0 - Unknown", 'Closed', df_cases['CaseStatus'])

df_cases['CaseID'] = df_cases['CaseID'].str.zfill(6)
df_cases['CaseID'] = 'HRHD' + df_cases['CaseID'].astype(str)

df_cases.tail(10)

Unnamed: 0,CaseID,RequestorID,RequestorType,CaseOwner,CaseType,CaseStatus,TierLevel,Priority,DateReceived,SatisfactionScore,DateClosed
99990,HRHD099991,1309061015,4 - Management,bRWtYc6,Training & Development,Closed,Tier 1,2 - Medium,1970-01-01 00:00:00.000000006,3 - Highly satisfied,1970-01-01 00:00:00.000000006
99991,HRHD099992,1192991000,2 - Regular,oDoCidPt,Employee Relations,Open,Tier 1,0 - None,1970-01-01 00:00:00.000000000,0 - Unknown,1970-01-01 00:00:00.000000000
99992,HRHD099993,1011022814,3 - Non-staff,vuo1zJHG,Attrition & Retention,Closed,Tier 1,1 - Low,1970-01-01 00:00:00.000000014,2 - Satisfied,1970-01-01 00:00:00.000000014
99993,HRHD099994,1001504432,4 - Management,QFu2Ydv1,Attrition & Retention,Closed,Tier 2,3 - High,1970-01-01 00:00:00.000000000,1 - Unsatisfied,1970-01-01 00:00:00.000000000
99994,HRHD099995,602000312,1 - Intern,bRWtYc6,Training & Development,Closed,Tier 1,0 - None,1970-01-01 00:00:00.000000000,1 - Unsatisfied,1970-01-01 00:00:00.000000000
99995,HRHD099996,1209049259,2 - Regular,tSjw2hYY,Recruitment & Selection,Closed,Tier 1,1 - Low,1970-01-01 00:00:00.000000004,1 - Unsatisfied,1970-01-01 00:00:00.000000004
99996,HRHD099997,1007020403,4 - Management,oDoCidPt,Employee Relations,Open,Tier 1,2 - Medium,1970-01-01 00:00:00.000000000,0 - Unknown,1970-01-01 00:00:00.000000000
99997,HRHD099998,1110029623,1 - Intern,bRWtYc6,Training & Development,Closed,Tier 1,0 - None,1970-01-01 00:00:00.000000000,1 - Unsatisfied,1970-01-01 00:00:00.000000000
99998,HRHD099999,1107027351,2 - Regular,oDoCidPt,Employee Relations,Closed,Tier 1,0 - None,1970-01-01 00:00:00.000000016,3 - Highly satisfied,1970-01-01 00:00:00.000000016
99999,HRHD100000,1104025414,2 - Regular,oDoCidPt,Employee Relations,Closed,Tier 1,1 - Low,1970-01-01 00:00:00.000000006,2 - Satisfied,1970-01-01 00:00:00.000000006


In [7]:
df_cases["DateReceived"] = pd.date_range(start='1992-01-01', end='2022-12-31', periods=len(df_cases))
df_cases['DateClosed'] = df_cases['DateReceived'] + pd.to_timedelta(pd.np.random.randint(7,size=len(df_cases)), unit='D')

df_cases['CaseStatus'] = np.where(df_cases['DateReceived'] < '2019-08-16', 'Closed', df_cases['CaseStatus'])
df_cases['CaseStatus'] = np.where((df_cases['DateReceived'] > '2019-08-16') & (df_cases['TierLevel'] == 'Tier 2'), 'Escalated', df_cases['CaseStatus'])


df_cases.head(20)

Unnamed: 0,CaseID,RequestorID,RequestorType,CaseOwner,CaseType,CaseStatus,TierLevel,Priority,DateReceived,SatisfactionScore,DateClosed
0,HRHD000001,1009919990,1 - Intern,XLbl6V9K,Recruitment & Selection,Closed,Tier 2,0 - None,2018-01-01 00:00:00.000000000,1 - Unsatisfied,2018-01-01 00:00:00.000000000
1,HRHD000002,1302053044,2 - Regular,Qwsfd5ND,Benefits & Compensation,Closed,Tier 1,1 - Low,2018-01-01 00:15:46.089460894,1 - Unsatisfied,2018-01-02 00:15:46.089460894
2,HRHD000003,1411071481,2 - Regular,m5kYwx5P,Benefits & Compensation,Closed,Tier 2,0 - None,2018-01-01 00:31:32.178921789,0 - Unknown,2018-01-05 00:31:32.178921789
3,HRHD000004,1306059197,4 - Management,XLbl6V9K,Recruitment & Selection,Closed,Tier 2,0 - None,2018-01-01 00:47:18.268382683,0 - Unknown,2018-01-05 00:47:18.268382683
4,HRHD000005,1407068885,1 - Intern,bRWtYc6,Training & Development,Closed,Tier 1,1 - Low,2018-01-01 01:03:04.357843578,1 - Unsatisfied,2018-01-02 01:03:04.357843578
5,HRHD000006,1301052347,4 - Management,m5kYwx5P,Benefits & Compensation,Closed,Tier 2,3 - High,2018-01-01 01:18:50.447304473,0 - Unknown,2018-01-03 01:18:50.447304473
6,HRHD000007,1406067865,3 - Non-staff,tSjw2hYY,Recruitment & Selection,Closed,Tier 1,3 - High,2018-01-01 01:34:36.536765367,0 - Unknown,2018-01-03 01:34:36.536765367
7,HRHD000008,1011022820,4 - Management,XLbl6V9K,Recruitment & Selection,Closed,Tier 2,0 - None,2018-01-01 01:50:22.626226262,0 - Unknown,2018-01-05 01:50:22.626226262
8,HRHD000009,1001109612,2 - Regular,XLbl6V9K,Recruitment & Selection,Closed,Tier 2,2 - Medium,2018-01-01 02:06:08.715687156,1 - Unsatisfied,2018-01-05 02:06:08.715687156
9,HRHD000010,1101023839,3 - Non-staff,Qwsfd5ND,Benefits & Compensation,Closed,Tier 1,1 - Low,2018-01-01 02:21:54.805148051,1 - Unsatisfied,2018-01-03 02:21:54.805148051


In [8]:
df_cases.tail(20)

Unnamed: 0,CaseID,RequestorID,RequestorType,CaseOwner,CaseType,CaseStatus,TierLevel,Priority,DateReceived,SatisfactionScore,DateClosed
99980,HRHD099981,1307059944,2 - Regular,QFu2Ydv1,Attrition & Retention,Escalated,Tier 2,2 - Medium,2020-12-30 19:00:24.300242992,3 - Highly satisfied,2021-01-04 19:00:24.300242992
99981,HRHD099982,1103024456,2 - Regular,oDoCidPt,Employee Relations,Closed,Tier 1,0 - None,2020-12-30 19:16:10.389703888,2 - Satisfied,2021-01-01 19:16:10.389703888
99982,HRHD099983,1012023152,1 - Intern,bRWtYc6,Training & Development,Closed,Tier 1,0 - None,2020-12-30 19:31:56.479164784,1 - Unsatisfied,2021-01-05 19:31:56.479164784
99983,HRHD099984,1206043417,3 - Non-staff,tSjw2hYY,Recruitment & Selection,Closed,Tier 1,3 - High,2020-12-30 19:47:42.568625680,1 - Unsatisfied,2021-01-03 19:47:42.568625680
99984,HRHD099985,1301052436,1 - Intern,Qwsfd5ND,Benefits & Compensation,Closed,Tier 1,3 - High,2020-12-30 20:03:28.658086576,3 - Highly satisfied,2020-12-31 20:03:28.658086576
99985,HRHD099986,1011022777,3 - Non-staff,oDoCidPt,Employee Relations,Closed,Tier 1,3 - High,2020-12-30 20:19:14.747547472,2 - Satisfied,2021-01-05 20:19:14.747547472
99986,HRHD099987,1408069503,2 - Regular,oDoCidPt,Employee Relations,Closed,Tier 1,1 - Low,2020-12-30 20:35:00.837008368,3 - Highly satisfied,2021-01-01 20:35:00.837008368
99987,HRHD099988,1107027551,3 - Non-staff,m5kYwx5P,Benefits & Compensation,Escalated,Tier 2,3 - High,2020-12-30 20:50:46.926469264,1 - Unsatisfied,2021-01-05 20:50:46.926469264
99988,HRHD099989,1405067138,3 - Non-staff,tSjw2hYY,Recruitment & Selection,Closed,Tier 1,3 - High,2020-12-30 21:06:33.015930160,3 - Highly satisfied,2020-12-30 21:06:33.015930160
99989,HRHD099990,1001644719,2 - Regular,XLbl6V9K,Recruitment & Selection,Escalated,Tier 2,2 - Medium,2020-12-30 21:22:19.105391040,1 - Unsatisfied,2021-01-03 21:22:19.105391040


In [9]:
df_cases.to_csv ('HR_Datasets/human-resources-data-set/HR_helpdesk_info.csv', index = None, header=True)