### Import te libraries

In [1]:
import numpy as np
import pandas as pd


pd.options.display.float_format = '{:,.2f}'.format

### Import the data

In [2]:
raw_data = pd.read_csv('Absenteeism_preprocessed.csv')

In [3]:
raw_data

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours
0,0,0,0,1,7,7,289,36,33,239.55,30,0,2,1,4
1,0,0,0,0,7,14,118,13,50,239.55,31,0,1,0,0
2,0,0,0,1,7,15,179,51,38,239.55,31,0,0,0,2
3,1,0,0,0,7,16,279,5,39,239.55,24,0,2,0,4
4,0,0,0,1,7,23,289,36,33,239.55,30,0,2,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,1,0,0,0,5,23,179,22,40,237.66,22,1,2,0,8
696,1,0,0,0,5,23,225,26,28,237.66,24,0,1,2,3
697,1,0,0,0,5,24,330,16,28,237.66,25,1,0,0,8
698,0,0,0,1,5,24,235,16,32,237.66,25,1,0,0,2


#### The goal is to use the different columns to predict if an individual will be absent from work.

### Create the targets

In [4]:
# get the absenteeism time in hours median 
median = raw_data['Absenteeism Time in Hours'].median()
median

3.0

In [5]:
#Map each individual. if absenteeism time is more then the median then assign 1 if not assign 0
target = np.where(raw_data['Absenteeism Time in Hours']>3,1,0)
target

array([1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0,

In [6]:
#add thhe target into the data frame 
raw_data['Excessive absenteeism'] = target
raw_data

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Absenteeism Time in Hours,Excessive absenteeism
0,0,0,0,1,7,7,289,36,33,239.55,30,0,2,1,4,1
1,0,0,0,0,7,14,118,13,50,239.55,31,0,1,0,0,0
2,0,0,0,1,7,15,179,51,38,239.55,31,0,0,0,2,0
3,1,0,0,0,7,16,279,5,39,239.55,24,0,2,0,4,1
4,0,0,0,1,7,23,289,36,33,239.55,30,0,2,1,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,1,0,0,0,5,23,179,22,40,237.66,22,1,2,0,8,1
696,1,0,0,0,5,23,225,26,28,237.66,24,0,1,2,3,0
697,1,0,0,0,5,24,330,16,28,237.66,25,1,0,0,8,1
698,0,0,0,1,5,24,235,16,32,237.66,25,1,0,0,2,0


In [7]:
# drop the column Absenteeism Time in Hours and create a check point
data_with_target = raw_data.drop('Absenteeism Time in Hours', axis=1)
data_with_target

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Excessive absenteeism
0,0,0,0,1,7,7,289,36,33,239.55,30,0,2,1,1
1,0,0,0,0,7,14,118,13,50,239.55,31,0,1,0,0
2,0,0,0,1,7,15,179,51,38,239.55,31,0,0,0,0
3,1,0,0,0,7,16,279,5,39,239.55,24,0,2,0,1
4,0,0,0,1,7,23,289,36,33,239.55,30,0,2,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,1,0,0,0,5,23,179,22,40,237.66,22,1,2,0,1
696,1,0,0,0,5,23,225,26,28,237.66,24,0,1,2,0
697,1,0,0,0,5,24,330,16,28,237.66,25,1,0,0,1
698,0,0,0,1,5,24,235,16,32,237.66,25,1,0,0,0


In [8]:
#check to see if the dataset is balance. If in the target column 1 and 0 are balance
data_with_target['Excessive absenteeism'].sum()/data_with_target.shape[0]

0.45571428571428574

### Select the inputs

In [9]:
data_with_target

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets,Excessive absenteeism
0,0,0,0,1,7,7,289,36,33,239.55,30,0,2,1,1
1,0,0,0,0,7,14,118,13,50,239.55,31,0,1,0,0
2,0,0,0,1,7,15,179,51,38,239.55,31,0,0,0,0
3,1,0,0,0,7,16,279,5,39,239.55,24,0,2,0,1
4,0,0,0,1,7,23,289,36,33,239.55,30,0,2,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,1,0,0,0,5,23,179,22,40,237.66,22,1,2,0,1
696,1,0,0,0,5,23,225,26,28,237.66,24,0,1,2,0
697,1,0,0,0,5,24,330,16,28,237.66,25,1,0,0,1
698,0,0,0,1,5,24,235,16,32,237.66,25,1,0,0,0


In [10]:
# the inputs are every columns beside the Éxcessive absenteeism' columns
unscaled_inputs = data_with_target.iloc[:,:-1]
unscaled_inputs

Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets
0,0,0,0,1,7,7,289,36,33,239.55,30,0,2,1
1,0,0,0,0,7,14,118,13,50,239.55,31,0,1,0
2,0,0,0,1,7,15,179,51,38,239.55,31,0,0,0
3,1,0,0,0,7,16,279,5,39,239.55,24,0,2,0
4,0,0,0,1,7,23,289,36,33,239.55,30,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,1,0,0,0,5,23,179,22,40,237.66,22,1,2,0
696,1,0,0,0,5,23,225,26,28,237.66,24,0,1,2
697,1,0,0,0,5,24,330,16,28,237.66,25,1,0,0
698,0,0,0,1,5,24,235,16,32,237.66,25,1,0,0


### Standardize the data

In [11]:
# standardize the inputs

# standardization is one of the most common preprocessing tools
# since data of different magnitude (scale) can be biased towards high values,
# we want all inputs to be of similar magnitude
# this is a peculiarity of machine learning in general - most (but not all) algorithms do badly with unscaled data

# a very useful module we can use is StandardScaler 
# it has much more capabilities than the straightforward 'preprocessing' method
from sklearn.preprocessing import StandardScaler


# we will create a variable that will contain the scaling information for this particular dataset
# here's the full documentation: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html

# define scaler as an object
absenteeism_scaler = StandardScaler()

In [12]:
# import the libraries needed to create the Custom Scaler
# note that all of them are a part of the sklearn package
# moreover, one of them is actually the StandardScaler module, 
# so you can imagine that the Custom Scaler is build on it

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler

# create the Custom Scaler class

class CustomScaler(BaseEstimator,TransformerMixin): 
    
    # init or what information we need to declare a CustomScaler object
    # and what is calculated/declared as we do
    
    def __init__(self,columns,copy=True,with_mean=True,with_std=True):
        
        # scaler is nothing but a Standard Scaler object
        self.scaler = StandardScaler(copy,with_mean,with_std)
        # with some columns 'twist'
        self.columns = columns
        self.mean_ = None
        self.var_ = None
        
    
    # the fit method, which, again based on StandardScale
    
    def fit(self, X, y=None):
        self.scaler.fit(X[self.columns], y)
        self.mean_ = np.mean(X[self.columns])
        self.var_ = np.var(X[self.columns])
        return self
    
    # the transform method which does the actual scaling

    def transform(self, X, y=None, copy=None):
        
        # record the initial order of the columns
        init_col_order = X.columns
        
        # scale all features that you chose when creating the instance of the class
        X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]), columns=self.columns)
        
        # declare a variable containing all information that was not scaled
        X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
        
        # return a data frame which contains all scaled features and all 'not scaled' features
        # use the original order (that you recorded in the beginning)
        return pd.concat([X_not_scaled, X_scaled], axis=1)[init_col_order]

In [13]:
# check what are all columns that we've got
unscaled_inputs.columns.values

array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month Value',
       'Day of the Week', 'Transportation Expense', 'Distance to Work',
       'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets'], dtype=object)

In [14]:
# choose the columns to scale
# we later augmented this code and put it in comments
#columns_to_scale = ['Month Value','Day of the Week', 'Transportation Expense', 'Distance to Work',
       #'Age', 'Daily Work Load Average', 'Body Mass Index', 'Children', 'Pets']
    
# select the columns to omit
columns_to_omit = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4','Education']

In [15]:
# create the columns to scale, based on the columns to omit
# use list comprehension to iterate over the list

columns_to_scale = [x for x in unscaled_inputs.columns.values if x not in columns_to_omit]
columns_to_scale

['Month Value',
 'Day of the Week',
 'Transportation Expense',
 'Distance to Work',
 'Age',
 'Daily Work Load Average',
 'Body Mass Index',
 'Children',
 'Pets']

In [16]:
# declare a scaler object, specifying the columns you want to scale
absenteeism_scaler = CustomScaler(columns_to_scale)



In [17]:
# fit the data (calculate mean and standard deviation); they are automatically stored inside the object 
absenteeism_scaler.fit(unscaled_inputs)



CustomScaler(columns=['Month Value', 'Day of the Week',
                      'Transportation Expense', 'Distance to Work', 'Age',
                      'Daily Work Load Average', 'Body Mass Index', 'Children',
                      'Pets'],
             copy=None, with_mean=None, with_std=None)

In [18]:
# standardizes the data, using the transform method 
# in the last line, we fitted the data - in other words
# we found the internal parameters of a model that will be used to transform data. 
# transforming applies these parameters to our data
# note that when you get new data, you can just call 'scaler' again and transform it in the same way as now
scaled_inputs = absenteeism_scaler.transform(unscaled_inputs)

In [19]:
# the scaled_inputs are now an ndarray, because sklearn works with ndarrays
scaled_inputs


Unnamed: 0,Reason_1,Reason_2,Reason_3,Reason_4,Month Value,Day of the Week,Transportation Expense,Distance to Work,Age,Daily Work Load Average,Body Mass Index,Education,Children,Pets
0,0,0,0,1,0.18,-1.10,1.01,0.41,-0.54,-0.81,0.77,0,0.88,0.27
1,0,0,0,0,0.18,-0.23,-1.57,-1.14,2.13,-0.81,1.00,0,-0.02,-0.59
2,0,0,0,1,0.18,-0.10,-0.65,1.43,0.25,-0.81,1.00,0,-0.92,-0.59
3,1,0,0,0,0.18,0.02,0.85,-1.68,0.41,-0.81,-0.64,0,0.88,-0.59
4,0,0,0,1,0.18,0.90,1.01,0.41,-0.54,-0.81,0.77,0,0.88,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
695,1,0,0,0,-0.39,0.90,-0.65,-0.53,0.56,-0.85,-1.11,1,0.88,-0.59
696,1,0,0,0,-0.39,0.90,0.04,-0.26,-1.32,-0.85,-0.64,0,-0.02,1.13
697,1,0,0,0,-0.39,1.02,1.62,-0.94,-1.32,-0.85,-0.41,1,-0.92,-0.59
698,0,0,0,1,-0.39,1.02,0.19,-0.94,-0.69,-0.85,-0.41,1,-0.92,-0.59


In [20]:
# check the shape of the inputs
scaled_inputs.shape

(700, 14)

### Split the data into testing and training. Also shuffle it


#### import the relevant module


In [21]:
from sklearn.model_selection import train_test_split

#### Split

In [22]:
target = data_with_target['Excessive absenteeism']

x_train, x_test, y_train, y_test = train_test_split(scaled_inputs, target,
                                                   train_size = 0.8,
                                                   random_state= 20)

### Fit The model and get the accuracy

#### import the relevent module

In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [24]:
# define the regression
reg = LogisticRegression()

In [25]:
#fit the regression
reg.fit(x_train, y_train)

LogisticRegression()

In [26]:
#get the acccuracy of the model
reg.score(x_train,y_train)

0.775

##### The model accuracy is 0.79 almost 80%

### Get the accuracy manualy

In [27]:
#predictthe out out and compare it to the target
output_model = reg.predict(x_train)

In [28]:
output_model

array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,

In [29]:
y_train

346    0
91     1
299    1
129    1
695    1
      ..
218    1
223    1
271    0
474    1
355    0
Name: Excessive absenteeism, Length: 560, dtype: int64

In [30]:
#map the predict value and the target
output_model == y_train

346     True
91      True
299     True
129     True
695     True
       ...  
218     True
223    False
271     True
474    False
355     True
Name: Excessive absenteeism, Length: 560, dtype: bool

In [31]:
np.sum(output_model == y_train)

434

In [32]:
output_model.shape[0]

560

In [33]:
#get the %
np.sum(output_model == y_train)/output_model.shape[0]

0.775

Here we find the same accuracy as "reg.score"

In [34]:
#Check point
df_unscaled_inputs = unscaled_inputs.copy()

### Find the intercept and coefficients

In [35]:
reg.intercept_

array([-1.63616238])

In [36]:
reg.coef_

array([[ 2.79267567,  0.95497098,  3.10483492,  0.82973208,  0.15719727,
        -0.02688336,  0.61145996, -0.01749414, -0.17276895, -0.00744285,
         0.27992276, -0.22742715,  0.34715156, -0.27653973]])

In [37]:
#Create a Summary table

df_unscaled_inputs.columns.values

array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month Value',
       'Day of the Week', 'Transportation Expense', 'Distance to Work',
       'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education',
       'Children', 'Pets'], dtype=object)

In [38]:
summary_table = pd.DataFrame(columns=['Feature name'], data = df_unscaled_inputs.columns.values)

In [39]:
summary_table['Coefficient'] = np.transpose(reg.coef_)

In [40]:
summary_table

Unnamed: 0,Feature name,Coefficient
0,Reason_1,2.79
1,Reason_2,0.95
2,Reason_3,3.1
3,Reason_4,0.83
4,Month Value,0.16
5,Day of the Week,-0.03
6,Transportation Expense,0.61
7,Distance to Work,-0.02
8,Age,-0.17
9,Daily Work Load Average,-0.01


In [41]:
summary_table.index = summary_table.index + 1


In [42]:
summary_table.index

RangeIndex(start=1, stop=15, step=1)

In [43]:
summary_table.loc[0] = ['Intercept',reg.intercept_[0]]


In [44]:
summary_table = summary_table.sort_index()
summary_table

Unnamed: 0,Feature name,Coefficient
0,Intercept,-1.64
1,Reason_1,2.79
2,Reason_2,0.95
3,Reason_3,3.1
4,Reason_4,0.83
5,Month Value,0.16
6,Day of the Week,-0.03
7,Transportation Expense,0.61
8,Distance to Work,-0.02
9,Age,-0.17


### Interpreting the coeffients

In [45]:
summary_table['Odds_ratio'] = np.exp(summary_table.Coefficient)

In [46]:
summary_table

Unnamed: 0,Feature name,Coefficient,Odds_ratio
0,Intercept,-1.64,0.19
1,Reason_1,2.79,16.32
2,Reason_2,0.95,2.6
3,Reason_3,3.1,22.31
4,Reason_4,0.83,2.29
5,Month Value,0.16,1.17
6,Day of the Week,-0.03,0.97
7,Transportation Expense,0.61,1.84
8,Distance to Work,-0.02,0.98
9,Age,-0.17,0.84


In [47]:
#sort base the summary table base on the Odds ration
summary_table.sort_values('Odds_ratio',ascending=False )

Unnamed: 0,Feature name,Coefficient,Odds_ratio
3,Reason_3,3.1,22.31
1,Reason_1,2.79,16.32
2,Reason_2,0.95,2.6
4,Reason_4,0.83,2.29
7,Transportation Expense,0.61,1.84
13,Children,0.35,1.42
11,Body Mass Index,0.28,1.32
5,Month Value,0.16,1.17
10,Daily Work Load Average,-0.01,0.99
8,Distance to Work,-0.02,0.98


### Test the model

In [48]:
reg.score(x_test,y_test)

0.7428571428571429

In [49]:
#get tre probability
predicted_proba = reg.predict_proba(x_test)
predicted_proba

array([[0.71527538, 0.28472462],
       [0.5873289 , 0.4126711 ],
       [0.43693105, 0.56306895],
       [0.78839035, 0.21160965],
       [0.08630833, 0.91369167],
       [0.34048538, 0.65951462],
       [0.30803861, 0.69196139],
       [0.12555885, 0.87444115],
       [0.77996164, 0.22003836],
       [0.75251634, 0.24748366],
       [0.50361224, 0.49638776],
       [0.21496445, 0.78503555],
       [0.07651457, 0.92348543],
       [0.72685397, 0.27314603],
       [0.30703908, 0.69296092],
       [0.5340766 , 0.4659234 ],
       [0.54543036, 0.45456964],
       [0.55456488, 0.44543512],
       [0.38853468, 0.61146532],
       [0.05394443, 0.94605557],
       [0.70123091, 0.29876909],
       [0.78782932, 0.21217068],
       [0.40994581, 0.59005419],
       [0.4172791 , 0.5827209 ],
       [0.25122925, 0.74877075],
       [0.75604916, 0.24395084],
       [0.50920014, 0.49079986],
       [0.85495828, 0.14504172],
       [0.20358505, 0.79641495],
       [0.79117919, 0.20882081],
       [0.

In [50]:
predicted_proba.shape

(140, 2)

In [51]:
#get the probabily to get 1
predicted_proba[:,:-1]

array([[0.71527538],
       [0.5873289 ],
       [0.43693105],
       [0.78839035],
       [0.08630833],
       [0.34048538],
       [0.30803861],
       [0.12555885],
       [0.77996164],
       [0.75251634],
       [0.50361224],
       [0.21496445],
       [0.07651457],
       [0.72685397],
       [0.30703908],
       [0.5340766 ],
       [0.54543036],
       [0.55456488],
       [0.38853468],
       [0.05394443],
       [0.70123091],
       [0.78782932],
       [0.40994581],
       [0.4172791 ],
       [0.25122925],
       [0.75604916],
       [0.50920014],
       [0.85495828],
       [0.20358505],
       [0.79117919],
       [0.63307945],
       [0.33028371],
       [0.32039199],
       [0.47730239],
       [0.7917337 ],
       [0.46215432],
       [0.77130039],
       [0.25403153],
       [0.58738603],
       [0.40535893],
       [0.79867545],
       [0.54148838],
       [0.7615142 ],
       [0.60900291],
       [0.16999572],
       [0.41953673],
       [0.31138836],
       [0.713

### Save the model

In [52]:
#save the model is saving 'reg'
#we are going to use pickle
import pickle

with open('model_no_drop', 'wb') as file:
    pickle.dump(reg,file)

In [53]:
with open('scaler_no_drop', 'wb') as file:
    pickle.dump(absenteeism_scaler,file)