### FMLA Files

#### Set source path to import code

In [4]:
%pwd

'C:\\Users\\t0272m1\\Projects\\HR Analytics\\notebooks'

In [5]:
source_path = 'E:\HR-Analytics\source'
source_path

'E:\\HR-Analytics\\source'

In [6]:
import os
os.chdir(source_path)
%pwd

'E:\\HR-Analytics\\source'

In [7]:
ls

 Volume in drive E is DATA
 Volume Serial Number is AEFF-63BE

 Directory of E:\HR-Analytics\source

09/06/2019  10:21 AM    <DIR>          .
09/06/2019  10:21 AM    <DIR>          ..
05/02/2019  11:01 AM                 2 __init__.py
09/06/2019  10:25 AM    <DIR>          __pycache__
07/15/2019  02:57 PM            16,924 base_table.py
06/13/2019  11:21 AM            26,727 calendrical.py
09/06/2019  10:06 AM             3,984 config_jnap.yml
09/06/2019  10:06 AM            13,274 config_shap.yml
09/06/2019  10:21 AM             6,370 config_tac.yml
05/01/2019  11:07 AM               498 config_wap.yml
09/06/2019  10:06 AM             6,143 config_wtap.yml
09/06/2019  10:06 AM             6,317 database.py
09/05/2019  11:13 AM             9,793 main.py
09/06/2019  10:06 AM            45,081 model.py
08/27/2019  10:16 AM             3,072 pipeline.yml
05/01/2019  02:09 PM             3,673 pipeline_brap.py
05/01/2019  02:08 PM             3,670 pipeline_bvp.py
09/06/2019  10:06 AM     

#### Imports

In [23]:
import calendar
import datetime
import glob
import itertools
import jaydebeapi as jdb
import json
import math
import matplotlib
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas.io.sql as psql
import psycopg2
import random
import requests
import seaborn as sns
from sklearn.model_selection import train_test_split
from sqlalchemy import create_engine
import statsmodels as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
import urllib.request

In [9]:
# Internal Python Packages
from database import connect_greenplum
from database import write_frame_to_pg
from database import create_sqlalchemy_engine
from main import get_pipeline_config
from main import get_plant_config

#### Get specifications

In [10]:
pipeline_specs = get_pipeline_config(source_path)
pipeline_specs

{'datalake': {'schema': 'lab_datasci',
  'host': 'shbdmdwp001.servers.chrysler.com',
  'port': 5432,
  'user': 'datasci',
  'password': 'datasci_01',
  'database': 'odshawq'},
 'jdbc': {'driver': 'com.ibm.db2.jcc.DB2Driver',
  'server': 'jdbc:db2://SRVR1874.dbms.chrysler.com:18740/AUCERPTP',
  'user': 'datasci',
  'password': 'datasci_01',
  'jar_file': 'c:/installed/sqllib/java/db2jcc4.jar'},
 'holidays': {'calendar_us': ['2016-01-01',
   '2016-01-18',
   '2016-03-25',
   '2016-03-28',
   '2016-05-30',
   '2016-07-04',
   '2016-09-05',
   '2016-11-08',
   '2016-11-11',
   '2016-11-24',
   '2016-11-25',
   '2016-12-26',
   '2016-12-27',
   '2016-12-28',
   '2016-12-29',
   '2016-12-30',
   '2017-01-02',
   '2017-01-22',
   '2017-04-14',
   '2017-04-17',
   '2017-05-29',
   '2017-07-04',
   '2017-09-04',
   '2017-11-10',
   '2017-11-22',
   '2017-11-23',
   '2017-12-25',
   '2017-12-26',
   '2017-12-29',
   '2017-12-30',
   '2017-12-31',
   '2018-01-01',
   '2018-01-15',
   '2018-03-30'

In [11]:
pipeline_specs['datalake']

{'schema': 'lab_datasci',
 'host': 'shbdmdwp001.servers.chrysler.com',
 'port': 5432,
 'user': 'datasci',
 'password': 'datasci_01',
 'database': 'odshawq'}

In [12]:
plant_id = 'tac'
plant_id

'tac'

In [13]:
pipeline_specs['plant_id'] = plant_id
pipeline_specs['project_directory'] = source_path
pipeline_specs

{'datalake': {'schema': 'lab_datasci',
  'host': 'shbdmdwp001.servers.chrysler.com',
  'port': 5432,
  'user': 'datasci',
  'password': 'datasci_01',
  'database': 'odshawq'},
 'jdbc': {'driver': 'com.ibm.db2.jcc.DB2Driver',
  'server': 'jdbc:db2://SRVR1874.dbms.chrysler.com:18740/AUCERPTP',
  'user': 'datasci',
  'password': 'datasci_01',
  'jar_file': 'c:/installed/sqllib/java/db2jcc4.jar'},
 'holidays': {'calendar_us': ['2016-01-01',
   '2016-01-18',
   '2016-03-25',
   '2016-03-28',
   '2016-05-30',
   '2016-07-04',
   '2016-09-05',
   '2016-11-08',
   '2016-11-11',
   '2016-11-24',
   '2016-11-25',
   '2016-12-26',
   '2016-12-27',
   '2016-12-28',
   '2016-12-29',
   '2016-12-30',
   '2017-01-02',
   '2017-01-22',
   '2017-04-14',
   '2017-04-17',
   '2017-05-29',
   '2017-07-04',
   '2017-09-04',
   '2017-11-10',
   '2017-11-22',
   '2017-11-23',
   '2017-12-25',
   '2017-12-26',
   '2017-12-29',
   '2017-12-30',
   '2017-12-31',
   '2018-01-01',
   '2018-01-15',
   '2018-03-30'

In [14]:
plant_specs = get_plant_config(pipeline_specs)
plant_specs

{'plant': {'code': 2459,
  'market_id': 7,
  'shift_days': 6,
  'shift_hours': 10,
  'absence_codes': ['BERC',
   'BERE',
   'BERU',
   'BERX',
   'CARE',
   'CARU',
   'FMLA',
   'FMLD',
   'FMLU',
   'HOMD',
   'HOMF',
   'HOMU',
   'ILFE',
   'ILFU',
   'IPBE',
   'IPME',
   'IPNU',
   'IPSE',
   'JURE',
   'MISE',
   'MISU',
   'PERU',
   'PPAA',
   'PPAU',
   'TRAG',
   'WTRU'],
  'exclude_dates': ['2018-01-02',
   '2018-04-03',
   '2018-05-07',
   '2018-05-29',
   '2018-08-13',
   '2018-12-29',
   '2019-01-02',
   '2019-01-11',
   ['2019-01-14', '2019-01-18'],
   '2019-01-22',
   ['2019-05-13', '2019-06-04'],
   '2020-04-10',
   '2020-04-13',
   ['2020-07-13', '2020-07-17']]},
 'base_table': {'start_date': datetime.date(2017, 1, 1),
  'end_date': None,
  'write_table': True},
 'model': {'models': ['sarimax'],
  'target': 'absences_unplanned',
  'npreds': 6,
  'p_arima': 1,
  'd_arima': 0,
  'q_arima': 0,
  'features': ['actual_hours',
   'lost_hours',
   'absences_unplanned_rolli

#### Connect to SQL Alchemy

In [None]:
engine_dl = create_sqlalchemy_engine(pipeline_specs['datalake'])
engine_dl

#### Read in FMLA data

In [18]:
output_path = 'E:/HR-Analytics/data/FMLA'
output_path

'E:/HR-Analytics/data/FMLA'

In [19]:
os.chdir(output_path)
%pwd

'E:\\HR-Analytics\\data\\FMLA'

In [20]:
ls

 Volume in drive E is DATA
 Volume Serial Number is AEFF-63BE

 Directory of E:\HR-Analytics\data\FMLA

09/09/2019  11:26 AM    <DIR>          .
09/09/2019  11:26 AM    <DIR>          ..
09/09/2019  11:24 AM           904,031 FCA FMLA_Certs_20190311.xlsx
09/09/2019  11:24 AM           947,371 FCA FMLA_Certs_20190318.xlsx
09/09/2019  11:24 AM           987,258 FCA FMLA_Certs_20190325.xlsx
09/09/2019  11:23 AM           966,345 FCA FMLA_Certs_20190401.xlsx
09/09/2019  11:23 AM           997,392 FCA FMLA_Certs_20190408.xlsx
09/09/2019  11:23 AM         1,031,024 FCA FMLA_Certs_20190415.xlsx
09/09/2019  11:23 AM         1,048,774 FCA FMLA_Certs_20190422.xlsx
09/09/2019  11:23 AM         1,120,980 FCA FMLA_Certs_20190429.xlsx
09/09/2019  05:15 AM         1,074,869 FCA FMLA_Certs_20190506.xlsx
09/09/2019  05:15 AM         1,084,107 FCA FMLA_Certs_20190513.xlsx
09/09/2019  05:15 AM         1,033,493 FCA FMLA_Certs_20190520.xlsx
09/09/2019  05:15 AM         1,037,598 FCA FMLA_Certs_20190527.xl

In [39]:
root_file_name = 'Incapacities'
table_name = '_'.join(['abs', 'fmla', root_file_name.lower(), 'tbl'])
table_name

'abs_fmla_incapacities_tbl'

In [40]:
file_spec = '*' + root_file_name + '*'
df_all = pd.DataFrame()
for f in glob.glob(file_spec):
    print(f)
    df_csv = pd.read_excel(f)
    dt_str = f.split('.')[0][-8:]
    dt_str_formatted = dt_str[:4] + '-' + dt_str[4:6] + '-' + dt_str[6:8]
    df_csv['weekly_date'] = dt_str_formatted
    df_all = pd.concat([df_all, df_csv])

FCA FMLA_Incapacities_20190107.xlsx
FCA FMLA_Incapacities_20190114.xlsx
FCA FMLA_Incapacities_20190121.xlsx
FCA FMLA_Incapacities_20190128.xlsx
FCA FMLA_Incapacities_20190204.xlsx
FCA FMLA_Incapacities_20190211.xlsx
FCA FMLA_Incapacities_20190218.xlsx
FCA FMLA_Incapacities_20190225.xlsx
FCA FMLA_Incapacities_20190304.xlsx
FCA FMLA_Incapacities_20190311.xlsx
FCA FMLA_Incapacities_20190318.xlsx
FCA FMLA_Incapacities_20190325.xlsx
FCA FMLA_Incapacities_20190401.xlsx
FCA FMLA_Incapacities_20190408.xlsx
FCA FMLA_Incapacities_20190415.xlsx
FCA FMLA_Incapacities_20190422.xlsx
FCA FMLA_Incapacities_20190429.xlsx
FCA FMLA_Incapacities_20190506.xlsx


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  if __name__ == '__main__':


FCA FMLA_Incapacities_20190513.xlsx
FCA FMLA_Incapacities_20190520.xlsx
FCA FMLA_Incapacities_20190527.xlsx
FCA FMLA_Incapacities_20190603.xlsx
FCA FMLA_Incapacities_20190610.xlsx
FCA FMLA_Incapacities_20190617.xlsx
FCA FMLA_Incapacities_20190624.xlsx
FCA FMLA_Incapacities_20190701.xlsx
FCA FMLA_Incapacities_20190708.xlsx
FCA FMLA_Incapacities_20190715.xlsx
FCA FMLA_Incapacities_20190722.xlsx
FCA FMLA_Incapacities_20190729.xlsx
FCA FMLA_Incapacities_20190805.xlsx
FCA FMLA_Incapacities_20190812.xlsx
FCA FMLA_Incapacities_20190819.xlsx
FCA FMLA_Incapacities_20190826.xlsx
FCA FMLA_Incapacities_20190902.xlsx


In [41]:
df_all.columns

Index(['Absence Status', 'Approved Begin Date', 'Approved End Date',
       'As of Date', 'Case Number', 'Case Type', 'Client Contract',
       'Department ID', 'EE ID', 'Hours Used', 'Incident Duration',
       'Incident Frequency', 'Leave Sub Type', 'Leave Type', 'LeaveSub Type',
       'Location Code', 'Treatment Duration', 'Treatment Frequency',
       'Unit Name', 'Unit Number', 'Weeks Available – Policy',
       'Weeks Policy Max – Policy', 'weekly_date'],
      dtype='object')

In [53]:
df_all.columns = ['absence_status',
                  'approved_begin_date',
                  'approved_end_date',
                  'as_of_date',
                  'case_number',
                  'case_type',
                  'client_contract',
                  'department_id',
                  'cid',
                  'hours_used',
                  'incident_duration',
                  'incident_frequency',
                  'leave_sub_type',
                  'leave_type',
                  'leavesub_type',
                  'location_code',
                  'treatment_duration',
                  'treatment_frequency',
                  'unit_name',
                  'unit_number',
                  'weeks_available_policy',
                  'weeks_policy_max',
                  'weekly_date']

In [49]:
df_all.fillna(0, inplace=True)

In [64]:
df_all.sample(20)

Unnamed: 0,absence_status,approved_begin_date,approved_end_date,as_of_date,case_number,case_type,client_contract,department_id,cid,hours_used,...,leave_type,leavesub_type,location_code,treatment_duration,treatment_frequency,unit_name,unit_number,weeks_available_policy,weeks_policy_max,weekly_date
10263,Approved,2019-01-07 00:00:00,2019-08-27 00:00:00,2019-07-15,301918557880001IFN,Intermittent,2262,9510,278688,178.0,...,Employee Medical,0,2459,0,0,Toledo Assembly Plant,2459,8.8407,12.0,2019-07-15
5448,Conditional,0,0,2019-03-11,301921406230001IFN,Intermittent,2262,9190,795242,0.0,...,Family Leave,Family Medical,4025,0,0,Sterling Heights Assembly,4025,12.0,12.0,2019-03-11
4088,Approved,2019-03-09 00:00:00,2019-09-05 00:00:00,2019-08-12,301920616470001IFN,Intermittent,2262,9150,1362670,160.0,...,Employee Medical,0,4012,1 day,twice / 4 weeks,Jefferson North Assembly,4012,8.0,12.0,2019-08-12
6433,Approved,2019-01-15 00:00:00,2019-12-31 00:00:00,2019-04-01,301917562340001IFN,Intermittent,2262,3803,932316,72.0,...,Employee Medical,Employee Medical,3110,0,0,Center Line PDC,3110,10.2,12.0,2019-04-01
4439,Approved,2019-01-11 00:00:00,2019-07-01 00:00:00,2019-03-04,301917496180001IFN,Intermittent,2262,3310,1362710,82.3,...,Employee Medical,Employee Medical,4012,0,0,Jefferson North Assembly,4012,9.9425,12.0,2019-03-04
5213,Approved,2019-03-20 00:00:00,2019-12-31 00:00:00,2019-06-10,301920644220001IFN,Intermittent,2262,3381,1446286,52.5,...,Employee Medical,0,2459,0,0,Toledo Assembly Plant,2459,11.1251,12.0,2019-06-10
11043,Approved,2019-02-01 00:00:00,2019-12-31 00:00:00,2019-06-03,301917647170001IFN,Intermittent,2262,3330,1399399,84.0,...,Employee Medical,0,4015,0,0,Belvidere Assembly,4015,10.6001,12.0,2019-06-03
8718,Conditional,0,0,2019-07-29,301921494310001IFN,Intermittent,2262,9100,1448911,0.0,...,Employee Medical,0,5305,1 day,4 times / month,Kokomo Transmission,5305,11.75,12.0,2019-07-29
4984,Conditional,0,0,2019-01-28,301916747820001IFN,Intermittent,2262,3300,1361336,0.0,...,Family Leave,Family Medical,4012,0,0,Jefferson North Assembly,4012,12.0,12.0,2019-01-28
7089,Conditional,0,0,2019-03-18,301918421570001IFN,Intermittent,2262,3300,79310,0.0,...,Employee Medical,Employee Medical,4012,1 day,once / month,Jefferson North Assembly,4012,12.0,12.0,2019-03-18


In [55]:
fmla_path = '/'.join([output_path, table_name + '.csv'])
df_all.to_csv(fmla_path, index=False)

In [63]:
table_tag = table_name[4:][:-4]
table_tag

'fmla_incapacities'

In [61]:
pipeline_specs['test_flag'] = False

In [62]:
write_frame_to_pg(df_all, table_tag, pipeline_specs)

### End of Notebook