#### Machine Learning

In [None]:
from sklearn.model_selection import GridSearchCV

# Initialize the XGBoost Classifier
xgb_model = xgb.XGBClassifier(random_state=42)

# Define the parameter grid to search
param_grid = {
    'n_estimators': [50, 100, 150],  # Fewer trees to keep the model simpler
    'max_depth': [3, 4, 5],          # Shallow trees to prevent overfitting
    'learning_rate': [0.1, 0.01, 0.05],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    scoring='accuracy',  # or another scoring metric
    cv=3,
    verbose=1,
    n_jobs=-1
)

# Fit GridSearchCV
grid_search.fit(X_train_preprocessed, y_train)

# Retrieve the best model
best_model = grid_search.best_estimator_

# Predictions
train_preds = best_model.predict(X_train_preprocessed)
test_preds = best_model.predict(X_test_preprocessed)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Initializing models
log_reg = LogisticRegression(random_state=42)
decision_tree = DecisionTreeClassifier(random_state=42)
random_forest = RandomForestClassifier(random_state=42)

# A function to fit models, make predictions, and evaluate them
def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)
    
    # Plotting confusion matrix
    plt.figure(figsize=(6, 5))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'{model.__class__.__name__} Confusion Matrix')
    plt.show()
    
    print(f"{model.__class__.__name__} Classification Report:")
    print(class_report)
    return model

# Evaluating Logistic Regression
evaluate_model(log_reg, X_train_preprocessed, y_train, X_test_preprocessed, y_test)

# Evaluating Decision Tree
evaluate_model(decision_tree, X_train_preprocessed, y_train, X_test_preprocessed, y_test)

# Evaluating Random Forest
evaluate_model(random_forest, X_train_preprocessed, y_train, X_test_preprocessed, y_test)


In [None]:
# Function to plot confusion matrix
def plot_confusion_matrix(true_values, predictions, set_name):
    matrix = confusion_matrix(true_values, predictions)
    plt.figure(figsize=(6,5))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'{set_name} Confusion Matrix')
    plt.show()

# Function to print classification report
def print_classification_report(true_values, predictions, set_name):
    report = classification_report(true_values, predictions)
    print(f"{set_name} Classification Report:")
    print(report)

# Visualize and print reports for both sets
plot_confusion_matrix(y_train, train_preds, "Training")
print_classification_report(y_train, train_preds, "Training")

plot_confusion_matrix(y_test, test_preds, "Testing")
print_classification_report(y_test, test_preds, "Testing")

# Print best parameters
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)


In [None]:
# Create the final pipeline using the best model
final_pipeline = Pipeline(steps=[('preprocessing', preprocessor),
    ('classifier', xgb_model)
])

final_pipeline

In [None]:
# Fit the pipeline to your data
final_pipeline.fit(X_train, y_train)

In [None]:
X_train.columns

In [None]:
X_train.describe()

In [None]:
# 3) Save pipeline as pkl file
import joblib

joblib.dump(final_pipeline, 'final_ECommerce_model.pkl')


In [None]:
model = joblib.load('final_ECommerce_model.pkl')

import random

class SatisfactionFinder:
    def __init__(self, model, preprocessor, features, trials=50):
        self.model = model
        self.preprocessor = preprocessor
        self.features = features
        self.trials = trials

    def random_input(self):
        """Generate a random input within plausible ranges for each feature."""
        ranges = {
            'estimated_vs_actual_shipping': (-189, 146),
            'time_to_delivery': (-7, 208),  # Updated with correct range
            'payment_value': (0.0, 13664.08),  # Updated with correct range
            'order_item_id': (1.0, 21.0),  # Updated with correct range
            'late_delivery': (0, 1)  # Binary feature
        }

        # Generate a random value within each range
        return {feature: random.uniform(*ranges[feature]) if isinstance(ranges[feature][0], float)
                else random.randint(*ranges[feature]) for feature in self.features}

    def find_not_satisfied(self):
        """Loop to find a set of values that predict 'Not Satisfied'."""
        for _ in range(self.trials):
            # Generate random input
            user_data = self.random_input()

            # Convert to DataFrame
            input_df = pd.DataFrame([user_data])

            # Preprocess and predict
            input_preprocessed = self.preprocessor.transform(input_df)
            prediction = self.model.predict(input_preprocessed)

            # Check if prediction is 'Not Satisfied'
            if prediction[0] == 0:
                return user_data, "Not Satisfied"

        return None, "Not found"

# Assuming xgb_model, preprocessor, and top_6_features are previously defined
finder = SatisfactionFinder(xgb_model, preprocessor, ['estimated_vs_actual_shipping', 'time_to_delivery', 'payment_value', 'late_delivery'], trials=200)

# Find a 'Not Satisfied' prediction
user_data, result = finder.find_not_satisfied()

print("User Data:", user_data)
print("Result:", result)

In [None]:
model.predict(pd.DataFrame([{
    'estimated_vs_actual_shipping': 130,
    'time_to_delivery': 133, 
    'payment_value': 9591,
    'late_delivery': 1 
}], dtype=float))

In [None]:
model.predict(pd.DataFrame([{
    'estimated_vs_actual_shipping': 5,
    'time_to_delivery': 7,
    'payment_value': 300,
    'late_delivery': 0 
}], dtype=float))

#### Making app

In [None]:
%%writefile ECB.py

import streamlit as st
import joblib
import numpy as np

# Load your trained pipeline
model = joblib.load('final_ECommerce_model.pkl')

# Define the structure of your app
def main():
    st.title('Customer Satisfaction Prediction App')

   # Define inputs with appropriate ranges and default values based on your data
    estimated_vs_actual_shipping = st.number_input('Estimated vs Actual Shipping Days', min_value=-189, max_value=146, value=11)
    time_to_delivery = st.number_input('Time to Delivery', min_value=-7, max_value=208, value=9)
    payment_value = st.number_input('Payment Value', min_value=0.0, max_value=13664.08, value=107.78)
    late_delivery = st.number_input('Late Delivery', min_value=0, max_value=1, value=0) 

# Prediction button
    if st.button('Predict Satisfaction'):
        # Create an array with the input data
        # Make sure all inputs are included in the array in the correct order
        input_data = np.array([[estimated_vs_actual_shipping, time_to_delivery, payment_value, late_delivery]])

        # Get the prediction
        prediction = model.predict(input_data)

        # Output the prediction
        if prediction[0] == 1:
            st.success('The customer is satisfied.')
        else:
            st.error('The customer is not satisfied')

if __name__ == '__main__':
    main()

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML
import joblib
import pandas as pd

# Custom CSS to increase the font size and prevent collapsing
style = """
<style>
.widget-label { min-width: 25ex !important; }
.widget-label p { font-size: 16px !important; }
.slider-width { width: 100% !important; } /* Adjust the width as needed */
</style>
"""

# Display the custom CSS
display(HTML(style))

# Load your trained model
model = joblib.load('final_ECommerce_model.pkl')

# Define layout for the sliders
slider_layout = widgets.Layout(width='500px')  # Adjust the width as needed

# Create input widgets for user input with updated ranges and types
estimated_vs_actual_shipping = widgets.IntSlider(
    value=11, min=-189, max=146, step=1,
    description='Estimated vs Actual Shipping Days:',
    style={'description_width': 'initial'},  # Prevent collapsing
    layout=slider_layout
)

time_to_delivery = widgets.IntSlider(
    value=9, min=-7, max=208, step=1,
    description='Time to Delivery:',
    style={'description_width': 'initial'},  # Prevent collapsing
    layout=slider_layout
)

payment_value = widgets.FloatSlider(
    value=107.78, min=0.0, max=13664.08, step=0.01,
    description='Payment Value:',
    style={'description_width': 'initial'},  # Prevent collapsing
    layout=slider_layout
)

late_delivery = widgets.IntSlider(
    value=0, min=0, max=1, step=1,
    description='Late Delivery:',
    style={'description_width': 'initial'},  # Prevent collapsing
    layout=slider_layout
)

# Create a button widget for making predictions
predict_button = widgets.Button(description='Predict Satisfaction')

# Define a function to make predictions and display the result
def predict_satisfaction(b):
    # Collect values from widgets and create a DataFrame for prediction
    user_input = pd.DataFrame({
        'estimated_vs_actual_shipping': [estimated_vs_actual_shipping.value],
        'time_to_delivery': [time_to_delivery.value],
        'payment_value': [payment_value.value],
        'late_delivery': [late_delivery.value]
    })

    # Predict using the model
    prediction = model.predict(user_input)
    
    # Update the result label based on the prediction
    if prediction[0] == 1:
        result_label.value = 'The customer is satisfied.'
    else:
        result_label.value = 'The customer is not satisfied.'

# Attach the predict_satisfaction function to the button's click event
predict_button.on_click(predict_satisfaction)

# Create a label widget to display the prediction result
result_label = widgets.Label()

# Display the input widgets and the result label
input_widgets = [
    estimated_vs_actual_shipping,
    time_to_delivery,
    payment_value,
    late_delivery,
    predict_button,
    result_label  # This should also be included in the list to be displayed
]

for widget in input_widgets:
    display(widget)

### Types of customer behavior 
1. **Complex:** occurs when customers invest significant time and effort in evaluating products before making a purchase. High-involvement products, such as cars or expensive electronics, often trigger this type of behavior.
2. **Dissonance-reducing**: takes place when customers experience post-purchase anxiety or uncertainty about their decision. This can arise when consumers feel that they had to make a decision quickly, without sufficient time to weigh the pros and cons, or if their choice was informed by limited information.
3. **Habitual buying:** characterized by consumers relying on routines and habits when making purchasing decisions. This type of behavior is commonly found in less involving product categories, such as groceries or personal care items, where consumers are not as inclined to research products extensively before purchase. 
4. **Variety seeking:** arises when customers actively seek new experiences, products, or brands, even if satisfied with their current choices. This behavior typically occurs in categories where products are low-involvement, low-cost commodities, and consumers feel minimal risk in trying new options.

# Market Insights

# Customer Segmentation