## Libraries and Modules Import

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

## Reading and Cleaning of the Training File

In [2]:
data = pd.read_excel("..\E-Commerce_train.xlsx",sheet_name="Sheet1") # dataframe creation
data = data.drop('ID',axis=1) # The ID column that is useless is removed
#--------------------------------------------------------------------------------------------
y = data['Reached.on.Time_Y.N'] # The target is established
X = data.drop('Reached.on.Time_Y.N',axis=1) # The inputs are set

## Preparing the Training Data for Prediction

In [3]:
# Identify which data are ordinal, nominal and scalable
ordinal = ['Gender','Product_importance'] 
nominal = ['Warehouse_block','Mode_of_Shipment']
scal = ['Cost_of_the_Product','Discount_offered','Weight_in_gms']

In [4]:
# Transformation of these data to numerical keys for their interpretation
transformer_for_ordinal = Pipeline([('ordinal',OrdinalEncoder(categories='auto'))])
transformer_for_nominal = Pipeline([('nominal',OneHotEncoder())])
transformer_for_scal = Pipeline([('scaler',MinMaxScaler())])
Transformer = ColumnTransformer(transformers=[('ordinal',transformer_for_ordinal, ordinal),
                                ('nominal',transformer_for_nominal, nominal),
                                ('scal',transformer_for_scal,scal)],
                                sparse_threshold=0)

In [5]:
# Transformation of the dataframe with the new numeric keys
data_tran = pd.DataFrame(Transformer.fit_transform(X))
data_tran['y'] = y # Column 'y' is added to complete the dataframe
X_tran = data_tran.drop(['y'], axis=1) # Training Inputs are reset 

## Reading and Cleaning of the Test File

In [6]:
test = pd.read_excel("..\E-Commerce_test.xlsx",sheet_name="Sheet1") # dataframe creation
test = test.drop('ID',axis=1) # The ID column that is useless is removed

## Preparing the Test Data for Prediction

In [7]:
# Identify which data are ordinal, nominal and scalable
ordinal_1 = ['Gender','Product_importance']
nominal_1 = ['Warehouse_block','Mode_of_Shipment']
scal_1 = ['Cost_of_the_Product','Discount_offered','Weight_in_gms']

In [8]:
# Transformation of these data to numerical keys for their interpretation
transformer_for_ordinal_1 = Pipeline([('ordinal',OrdinalEncoder(categories='auto'))])
transformer_for_nominal_1 = Pipeline([('nominal',OneHotEncoder())])
transformer_for_scal_1 = Pipeline([('scaler',MinMaxScaler())])
Transformer_1 = ColumnTransformer(transformers=[('ordinal',transformer_for_ordinal_1, ordinal_1),
                                                ('nominal',transformer_for_nominal_1, nominal_1),
                                                ('scal',transformer_for_scal_1,scal_1)],
                                                sparse_threshold=0)

In [9]:
# Transformation of the dataframe with the new numeric keys
test_tran = pd.DataFrame(Transformer_1.fit_transform(test))
X_test = test_tran # Test Inputs are reset 

## Prediction

In [10]:
modelLR = LogisticRegression() # Initialization of the predictive model

In [11]:
modelLR.fit(X_tran,y) # Model Training

In [12]:
y_pred = modelLR.predict(X_test) # Prediction of shipments on time

In [13]:
prediction = pd.DataFrame(y_pred) # change from array to dataframe

In [28]:
prediction.columns = ['pred'] # Renaming the column

In [31]:
prediction.to_csv('dhugueth.csv',index=False) # Export a CSV file