In [1]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.inspection import permutation_importance
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# from google.colab import files
# uploaded = files.upload()
from google.colab import drive
drive.mount('/content/drive')

# Load the dataset
data = pd.read_csv('/content/drive/My Drive/[[[[ECCE 2025 CUET]]]]/Implementation of Existing Model/online_shoppers_intention.csv')

# Define numerical and categorical columns
numerical_columns = ['Administrative', 'Administrative_Duration', 'Informational', 'Informational_Duration',
                     'ProductRelated', 'ProductRelated_Duration', 'BounceRates', 'ExitRates', 'PageValues', 'SpecialDay']
categorical_columns = ['Month', 'OperatingSystems', 'Browser', 'Region', 'TrafficType', 'VisitorType', 'Weekend']

# Separate features and target variable
X = data[numerical_columns + categorical_columns]
y = data['Revenue']  # Assuming 'Revenue' is the dependent variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Mounted at /content/drive


**Preprocess the data**

In [2]:
# Preprocess the data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'), categorical_columns) # Changes made here
    ]
)

X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

**Build the DL Model**

In [6]:
# Build the Deep Learning model
model = Sequential([
    Dense(256, activation='relu', input_dim=X_train_transformed.shape[1]),  # Change input_dim to X_train_transformed.shape[1]
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


**Train the Linear Regression model and make prediction**

In [7]:
# ipython-input-4-e7b69454a6bb
# Train the model
history = model.fit(X_train_transformed, y_train, validation_data=(X_test_transformed, y_test), epochs=50, batch_size=32, verbose=1) # Changed X_train and X_test to X_train_transformed and X_test_transformed

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test_transformed, y_test, verbose=0) # Changed X_test to X_test_transformed
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 1/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8698 - loss: 0.3479 - val_accuracy: 0.8832 - val_loss: 0.2659
Epoch 2/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8945 - loss: 0.2617 - val_accuracy: 0.8852 - val_loss: 0.2601
Epoch 3/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9010 - loss: 0.2423 - val_accuracy: 0.8897 - val_loss: 0.2578
Epoch 4/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8988 - loss: 0.2395 - val_accuracy: 0.8901 - val_loss: 0.2569
Epoch 5/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9095 - loss: 0.2235 - val_accuracy: 0.8889 - val_loss: 0.2548
Epoch 6/50
[1m309/309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9053 - loss: 0.2267 - val_accuracy: 0.8869 - val_loss: 0.2639
Epoch 7/50
[1m309/309[0m 

**Evaluate the model**

In [9]:
# ipython-input-8-59ed252f6e23
# Make predictions using the trained model
y_pred = model.predict(X_test_transformed)
y_pred_binary = (y_pred > 0.5).astype(int) # Convert probabilities to binary predictions (0 or 1)


# Calculate accuracy and print evaluation metrics
accuracy = accuracy_score(y_test, y_pred_binary) # Use binary predictions for accuracy calculation
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred_binary)) # Use binary predictions for classification report
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_binary)) # Use binary predictions for confusion matrix

[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Accuracy: 0.87
Classification Report:
              precision    recall  f1-score   support

       False       0.91      0.95      0.93      2055
        True       0.66      0.51      0.57       411

    accuracy                           0.87      2466
   macro avg       0.78      0.73      0.75      2466
weighted avg       0.87      0.87      0.87      2466

Confusion Matrix:
[[1948  107]
 [ 202  209]]


**Feature Importance (Coefficients)**

In [None]:
# Feature Importance (Coefficients)
feature_names = (
    numerical_columns +
    preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_columns).tolist()
)


# Instead of using feature_importances_ directly, you can use permutation importance
# This method works for any model type, including neural networks

# Calculate permutation importances
# Instead of 'accuracy', use a regression metric like 'neg_mean_squared_error' or 'r2'
result = permutation_importance(model, X_test_transformed, y_test, n_repeats=10, random_state=42, scoring='neg_mean_squared_error')
# OR
# result = permutation_importance(model, X_test_transformed, y_test, n_repeats=10, random_state=42, scoring='r2')

# Get feature importances
feature_importances = result.importances_mean

coefficients = pd.DataFrame({
    'Feature': feature_names,
    'Importance': feature_importances  # Use 'Importance' instead of 'Coefficient'
}).sort_values(by='Importance', ascending=False)  # Sort by importance

print("\nFeature Importance:")
print(coefficients)

[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [None]:
# Make predictions and convert to binary using a threshold (e.g., 0.5)
y_pred = model.predict(X_test_transformed) # Changed 'model' to 'dt_model'
y_pred_binary = np.where(y_pred > 0.5, 1, 0)  # Convert to 0 or 1 based on threshold

# Calculate classification metrics using the binary predictions
accuracy = accuracy_score(y_test, y_pred_binary)
precision = precision_score(y_test, y_pred_binary)
recall = recall_score(y_test, y_pred_binary)
f1 = f1_score(y_test, y_pred_binary)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.89
Precision: 0.78
Recall: 0.51
F1 Score: 0.62
