In [1]:
import numpy as np
import pandas as pd 
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('./Absenteeism_data_preprocessed.csv')
data.shape

(700, 15)

In [3]:
input_data = data.iloc[:,0:-1]
target_data = data.iloc[:,14:]

In [4]:
input_data = input_data.drop(['day_of_the_week', 'Distance to Work',
       'Daily Work Load Average'],axis=1)

In [5]:
x_train,x_test,y_train,y_test = train_test_split(input_data,target_data,train_size=0.8,shuffle=True,random_state=2)

In [6]:
print(x_train.shape,x_test.shape,x_test.shape[0]/x_train.shape[0])

(560, 11) (140, 11) 0.25


In [7]:
y_train = y_train.values.reshape(-1)
y_test = y_test.values.reshape(-1)

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [9]:
reg = LogisticRegression()

In [10]:
reg.fit(x_train,y_train)

In [11]:
reg.score(x_train,y_train)

0.7678571428571429

In [12]:
reg.intercept_

array([-1.54784133])

In [13]:
reg.coef_

array([[ 2.69429495,  0.53709366,  3.1836008 ,  0.69181705,  0.07383781,
         0.57599398, -0.21482804,  0.24080536,  0.04466213,  0.38611595,
        -0.32882936]])

In [14]:
input_data.columns

Index(['reason_1', 'reason_2', 'reason_3', 'reason_4', 'month',
       'Transportation Expense', 'Age', 'Body Mass Index', 'Education',
       'Children', 'Pets'],
      dtype='object')

In [15]:
summary_table = pd.DataFrame(columns=['feature_names'],data=input_data.columns)
summary_table['coeffecients'] = np.transpose(reg.coef_)

In [16]:
summary_table.index = summary_table.index+1
summary_table.loc[0] = ['intercept',reg.intercept_[0]]
summary_table = summary_table.sort_index()

In [17]:
summary_table['odds_ratio'] = np.exp(summary_table.coeffecients)

In [18]:
summary_table.sort_values('odds_ratio',ascending=False)

Unnamed: 0,feature_names,coeffecients,odds_ratio
3,reason_3,3.183601,24.133497
1,reason_1,2.694295,14.795084
4,reason_4,0.691817,1.997342
6,Transportation Expense,0.575994,1.778898
2,reason_2,0.537094,1.711027
10,Children,0.386116,1.471255
8,Body Mass Index,0.240805,1.272273
5,month,0.073838,1.076632
9,Education,0.044662,1.045675
7,Age,-0.214828,0.80668


In [19]:
reg.score(x_test,y_test)

0.7428571428571429

In [20]:
import pickle

In [21]:
with open('model','wb') as file:
    pickle.dump(reg,file)