In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import classification_report, mean_squared_error

In [3]:
# Step 1: Load the dataset
dataset = pd.read_csv('online_shoppers_intention.csv')
dataset

Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,0,0.0,0,0.0,1,0.000000,0.200000,0.200000,0.000000,0.0,Feb,1,1,1,1,Returning_Visitor,False,False
1,0,0.0,0,0.0,2,64.000000,0.000000,0.100000,0.000000,0.0,Feb,2,2,1,2,Returning_Visitor,False,False
2,0,0.0,0,0.0,1,0.000000,0.200000,0.200000,0.000000,0.0,Feb,4,1,9,3,Returning_Visitor,False,False
3,0,0.0,0,0.0,2,2.666667,0.050000,0.140000,0.000000,0.0,Feb,3,2,2,4,Returning_Visitor,False,False
4,0,0.0,0,0.0,10,627.500000,0.020000,0.050000,0.000000,0.0,Feb,3,3,1,4,Returning_Visitor,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12325,3,145.0,0,0.0,53,1783.791667,0.007143,0.029031,12.241717,0.0,Dec,4,6,1,1,Returning_Visitor,True,False
12326,0,0.0,0,0.0,5,465.750000,0.000000,0.021333,0.000000,0.0,Nov,3,2,1,8,Returning_Visitor,True,False
12327,0,0.0,0,0.0,6,184.250000,0.083333,0.086667,0.000000,0.0,Nov,3,2,1,13,Returning_Visitor,True,False
12328,4,75.0,0,0.0,15,346.000000,0.000000,0.021053,0.000000,0.0,Nov,2,2,3,11,Returning_Visitor,False,False


In [9]:
# Check for missing values
dataset.isnull().sum()

Administrative             0
Administrative_Duration    0
Informational              0
Informational_Duration     0
ProductRelated             0
ProductRelated_Duration    0
BounceRates                0
ExitRates                  0
PageValues                 0
SpecialDay                 0
Month                      0
OperatingSystems           0
Browser                    0
Region                     0
TrafficType                0
VisitorType                0
Weekend                    0
Revenue                    0
dtype: int64

In [11]:
from sklearn.preprocessing import LabelEncoder
cat_vars = ['Month', 'VisitorType', 'Weekend', 'Revenue']
label_encoders = {}
for var in cat_vars:
    label_encoders[var] = LabelEncoder()
    dataset[var] = label_encoders[var].fit_transform(dataset[var])

In [12]:
# Separate the input features and target variables
X = dataset.drop(['Revenue', 'Weekend', 'Informational_Duration'], axis=1)
y_revenue = dataset['Revenue']
y_weekend = dataset['Weekend']
informational_duration = dataset['Informational_Duration']

In [13]:
# Split the dataset into training and testing sets
X_train, X_test, y_revenue_train, y_revenue_test, y_weekend_train, y_weekend_test, informational_duration_train, informational_duration_test = \
    train_test_split(X, y_revenue, y_weekend, informational_duration, test_size=0.3, random_state=42)

In [14]:
# Build the ensemble learning model (Random Forest)
ensemble_model = RandomForestClassifier()
ensemble_model.fit(X_train, y_revenue_train)

RandomForestClassifier()

In [15]:
# Make revenue predictions on the testing data
revenue_predictions = ensemble_model.predict(X_test)

In [16]:
# Evaluate revenue predictions
print('Revenue Prediction:')
print(classification_report(y_revenue_test, revenue_predictions))

Revenue Prediction:
              precision    recall  f1-score   support

           0       0.92      0.96      0.94      3124
           1       0.73      0.55      0.63       575

    accuracy                           0.90      3699
   macro avg       0.83      0.76      0.79      3699
weighted avg       0.89      0.90      0.89      3699



In [17]:
# Build the ensemble learning model (Random Forest) for predicting whether it falls on the weekend
weekend_model = RandomForestClassifier()
weekend_model.fit(X_train, y_weekend_train)

RandomForestClassifier()

In [18]:
# Make weekend predictions on the testing data
weekend_predictions = weekend_model.predict(X_test)

In [19]:
# Evaluate weekend predictions
print('Weekend Prediction:')
print(classification_report(y_weekend_test, weekend_predictions))

Weekend Prediction:
              precision    recall  f1-score   support

           0       0.78      0.98      0.87      2810
           1       0.65      0.12      0.20       889

    accuracy                           0.77      3699
   macro avg       0.71      0.55      0.54      3699
weighted avg       0.75      0.77      0.71      3699

