# Import Lib


In [51]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.ensemble import BaggingClassifier,StackingClassifier, RandomForestClassifier,AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

# Creating the Dataset

In [52]:
data = {
    "Chess Piece": ["Queen", "Queen", "Queen", "bishop", "rook", "Queen", "Queen", "Queen", "bishop", "Queen",
                    "bishop", "Queen", "rook", "Queen", "rook", "rook", "Queen", "Queen", "bishop"],
    "Pen": ["Gel Pen", "Gel Pen", "Ball point Pen", "Gel Pen", "Fountian Pen", "Ball point Pen", "Ball point Pen",
            "Ball point Pen", "Ball point Pen", "Gel Pen", "Fountian Pen", "Ball point Pen", "Ball point Pen",
            "Gel Pen", "Gel Pen", "Ball point Pen", "Ball point Pen", "Ball point Pen", "Ball point Pen"],
    "Sport": ["Cricket", "Basketball", "Cricket", "Football", "Cricket", "Football", "Football", "Basketball",
              "Cricket", "Football", "Football", "Cricket", "Cricket", "Cricket", "Basketball", "Basketball",
              "Basketball", "Baseball", "Football"],
    "Ghosts": ["No", "Yes", "No", "Yes", "Yes", "Yes", "No", "No", "No", "Yes", "Yes", "No", "No", "No", "Yes",
               "Yes", "No", "No", "No"],
    "Game": ["Real-Time Games", "Real-Time Games", "GTA5", "Real-Time Games", "Real-Time Games", "Real-Time Games",
             "Pokémon GO", "Real-Time Games", "Real-Time Games", "Real-Time Games", "GTA5", "Real-Time Games",
             "Real-Time Games", "Need For Speed", "Real-Time Games", "GTA5", "GTA5", "Pokémon GO", "Real-Time Games"],
    "Pizza Topping": ["Mushrooms, Onions, Olives", "Onions, pineapple, Olives", "Mushrooms, Onions, Olives",
                      "pineapple, Olives", "Mushrooms, Onions", "Mushrooms, Onions, Olives",
                      "Pepperoni, Onions, pineapple", "pineapple, Olives", "Mushrooms, Onions, Olives",
                      "Mushrooms, Onions, pineapple", "Mushrooms", "Mushrooms, Onions, pineapple",
                      "Mushrooms, Onions, pineapple", "Mushrooms, Onions, pineapple",
                      "Pepperoni, Onions, pineapple", "Mushrooms, Onions, pineapple",
                      "Mushrooms, Onions, Olives", "Pepperoni, Onions, pineapple", "Mushrooms, Onions, Olives"]
}

df = pd.DataFrame(data)

# One-Hot Encoding for Pizza Topping

In [53]:
df['Pizza Topping'] = df['Pizza Topping'].apply(lambda x: x.split(', '))
mlb = MultiLabelBinarizer()
toppings = pd.DataFrame(mlb.fit_transform(df['Pizza Topping']), columns=mlb.classes_)
df = pd.concat([df.drop('Pizza Topping', axis=1), toppings], axis=1)

# Encoding Other Categorical Variables


In [54]:
label_encoders = {}
for column in df.columns[:-len(toppings.columns)]:  # Exclude the toppings columns
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Defining Features and Target


In [55]:
X = df.drop(toppings.columns, axis=1)
y = toppings

# Model Initialization and Storage


In [56]:
models = {}

ensemble_accuracies = {
    'Bagging': [],
    'Boosting': [],
    'Stacking': [],
    'Voting': []
}

# Input Mappings

In [57]:
mappings = {
    "Chess Piece": {0: "Queen", 1: "bishop", 2: "rook"},
    "Pen": {0: "Gel Pen", 1: "Ball point Pen", 2: "Fountian Pen"},
    "Sport": {0: "Cricket", 1: "Basketball", 2: "Football", 3: "Baseball"},
    "Ghosts": {0: "No", 1: "Yes"},
    "Game": {0: "GTA5", 1: "Need For Speed", 2: "Pokémon GO", 3: "Real-Time Games"}
}

# Collecting User Input


In [58]:
def get_user_input():
    user_data = {}
    for feature, mapping in mappings.items():
        print(f"\n{feature} Options:")
        for key, value in mapping.items():
            print(f"  {key}: {value}")
        while True:
            try:
                selection = int(input(f"Select {feature} (Enter the corresponding number): "))
                if selection in mapping:
                    user_data[feature] = selection
                    break
                else:
                    print("Invalid selection. Try again.")
            except ValueError:
                print("Please enter a number.")
    return user_data

user_data = get_user_input()


Chess Piece Options:
  0: Queen
  1: bishop
  2: rook
Select Chess Piece (Enter the corresponding number): 1

Pen Options:
  0: Gel Pen
  1: Ball point Pen
  2: Fountian Pen
Select Pen (Enter the corresponding number): 2

Sport Options:
  0: Cricket
  1: Basketball
  2: Football
  3: Baseball
Select Sport (Enter the corresponding number): 2

Ghosts Options:
  0: No
  1: Yes
Select Ghosts (Enter the corresponding number): 1

Game Options:
  0: GTA5
  1: Need For Speed
  2: Pokémon GO
  3: Real-Time Games
Select Game (Enter the corresponding number): 0


# Preprocess User Input


In [59]:
for feature, value in user_data.items():
    user_data[feature] = mappings[feature][value]
user_df = pd.DataFrame([user_data])
for column, le in label_encoders.items():
    user_df[column] = le.transform(user_df[column])

# Split and Model Training Bagging

Final Tuning Results:

Mushrooms -
Best Params: {'n_estimators': 5, 'estimator__max_depth': 5}, Avg Accuracy: 0.7167 ± 0.2105

Olives -
Best Params: {'n_estimators': 5, 'estimator__max_depth': 5}, Avg Accuracy: 0.3708 ± 0.1626

Onions -
Best Params: {'n_estimators': 5, 'estimator__max_depth': 5}, Avg Accuracy: 0.7000 ± 0.2646

Pepperoni -
Best Params: {'n_estimators': 5, 'estimator__max_depth': 5}, Avg Accuracy: 0.8292 ± 0.0776

pineapple -
Best Params: {'n_estimators': 5, 'estimator__max_depth': 5}, Avg Accuracy: 0.3375 ± 0.2190

param_grid = {
    'estimator__max_depth': [5, 10, 15, 20],  
    'n_estimators': [5, 10, 15, 20]   }

In [60]:
#Bagging
for topping in toppings:
    skf = StratifiedKFold(n_splits=8, shuffle=True, random_state=42)

    for train_index, test_index in skf.split(X, y[topping]):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[topping].iloc[train_index], y[topping].iloc[test_index]

        # Using Bagging Classifier with DecisionTree
        clf = BaggingClassifier(
            DecisionTreeClassifier(random_state=42, max_depth=5),
            n_estimators=5,
            random_state=42
        )

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        models[topping] = clf
        ensemble_accuracies['Bagging'].append(accuracy_score(y_test, y_pred))

def predict_user_input_output_BaggingClassifier(user_df):
    predictions = {topping: model.predict(user_df)[0] for topping, model in models.items()}
    sorted_predictions = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
    print("\nFinal Prediction (Top 1 to 3):")
    res=[]
    for i, (topping, prob) in enumerate(sorted_predictions[:3], start=1):
        if prob > 0.5:
            res.append(topping)
            print(f"  {i}. {topping}")
    return res
output_BaggingClassifier=predict_user_input_output_BaggingClassifier(user_df)





Final Prediction (Top 1 to 3):
  1. Mushrooms


# boosting


Final Tuning Results:

Mushrooms:
  AdaBoostClassifier - Best Params: {'n_estimators': 20, 'learning_rate': 0.5}, Avg Accuracy: 0.8750 ± 0.2165
  GradientBoostingClassifier - Best Params: {'n_estimators': 150, 'max_depth': 4, 'learning_rate': 0.01}, Avg Accuracy: 0.6250 ± 0.2320
  
Olives:
  AdaBoostClassifier - Best Params: {'n_estimators': 30, 'learning_rate': 0.1}, Avg Accuracy: 0.5000 ± 0.2041
  GradientBoostingClassifier - Best Params: {'n_estimators': 150, 'max_depth': 4, 'learning_rate': 0.01}, Avg Accuracy: 0.5625 ± 0.3111
  
Onions:
  AdaBoostClassifier - Best Params: {'n_estimators': 20, 'learning_rate': 0.01}, Avg Accuracy: 0.7083 ± 0.2465
  GradientBoostingClassifier - Best Params: {'n_estimators': 150, 'max_depth': 4, 'learning_rate': 0.01}, Avg Accuracy: 0.7500 ± 0.2041
  
Pepperoni:
  AdaBoostClassifier - Best Params: {'n_estimators': 20, 'learning_rate': 0.01}, Avg Accuracy: 0.9167 ± 0.1443
  GradientBoostingClassifier - Best Params: {'n_estimators': 150, 'max_depth': 4, 'learning_rate': 0.01}, Avg Accuracy: 0.9167 ± 0.1443
  
pineapple:
  AdaBoostClassifier - Best Params: {'n_estimators': 30, 'learning_rate': 0.1}, Avg Accuracy: 0.3542 ± 0.2273
  GradientBoostingClassifier - Best Params: {'n_estimators': 50, 'max_depth': 2, 'learning_rate': 0.1}, Avg Accuracy: 0.3333 ± 0.2764


ada_param_grid = {
    'n_estimators': [10, 20, 30],
    'learning_rate': [0.01, 0.1, 0.5]
}

gb_param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [2, 3, 4]
}



In [61]:
# Initialize lists to store accuracies
ada_accuracies = []
gb_accuracies = []

for topping in toppings:
    skf = StratifiedKFold(n_splits=8, shuffle=True, random_state=42)

    for train_index, test_index in skf.split(X, y[topping]):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[topping].iloc[train_index], y[topping].iloc[test_index]

        # Using AdaBoost with DecisionTree
        ada_boost = AdaBoostClassifier(
            DecisionTreeClassifier(max_depth=2),
            n_estimators=20,
            learning_rate=0.1,
            random_state=42
        )

        # Using GradientBoosting with shallow DecisionTree
        gb_boost = GradientBoostingClassifier(
            n_estimators=150,
            learning_rate=0.01,
            max_depth=2,
            random_state=42
        )

        # Fitting Models
        ada_boost.fit(X_train, y_train)
        gb_boost.fit(X_train, y_train)

        # Predictions
        ada_pred = ada_boost.predict(X_test)
        gb_pred = gb_boost.predict(X_test)

        # Store individual accuracies
        ada_accuracies.append(accuracy_score(y_test, ada_pred))
        gb_accuracies.append(accuracy_score(y_test, gb_pred))

        # Simple Majority Voting
        final_pred = np.round((ada_pred + gb_pred) / 2).astype(int)
        ensemble_accuracies['Boosting'].append(accuracy_score(y_test, final_pred))

        # Store the model
        models[topping] = (ada_boost, gb_boost)

# Calculate and print the average accuracies
ada_avg_accuracy = np.mean(ada_accuracies)
gb_avg_accuracy = np.mean(gb_accuracies)

print(f"\nAverage Accuracy for AdaBoostClassifier: {ada_avg_accuracy:.4f}")
print(f"Average Accuracy for GradientBoostingClassifier: {gb_avg_accuracy:.4f}")

def predict_user_input(user_df):
    predictions = {}
    for topping, (ada_boost, gb_boost) in models.items():
        preds = np.array([
            ada_boost.predict(user_df),
            gb_boost.predict(user_df)
        ])
        final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=preds)
        predictions[topping] = final_pred[0]

    sorted_predictions = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
    print("\nFinal Prediction (Top 1 to 3):")
    res=[]
    for i, (topping, prob) in enumerate(sorted_predictions[:3], start=1):
        res.append(topping)
        print(f"  {i}. {topping}")
    return res

output_Boosting = predict_user_input(user_df)





Average Accuracy for AdaBoostClassifier: 0.6833
Average Accuracy for GradientBoostingClassifier: 0.6250

Final Prediction (Top 1 to 3):
  1. Mushrooms
  2. Olives
  3. Onions


#Stacking

Final Tuning Results for Stacking Classifier:

Mushrooms:
  DecisionTreeClassifier - Best Params: {'max_depth': 20}
  KNeighborsClassifier - Best Params: {'n_neighbors': 5}
  MetaModel (LogisticRegression) - Best Params: {'C': 10}
  Avg Accuracy: 0.6190 ± 0.0753

Olives:
  DecisionTreeClassifier - Best Params: {''max_depth': 20}
  KNeighborsClassifier - Best Params: {'n_neighbors': 2}
  MetaModel (LogisticRegression) - Best Params: {'C': 0.1}
  Avg Accuracy: 0.5238 ± 0.2259

Onions:
  DecisionTreeClassifier - Best Params: {'min_samples_split': 10, 'max_depth': 15}
  KNeighborsClassifier - Best Params: {'n_neighbors': 5}
  MetaModel (LogisticRegression) - Best Params: {''C': 0.1}
  Avg Accuracy: 0.8571 ± 0.1650

Pepperoni:
  DecisionTreeClassifier - Best Params: {'max_depth': 20}
  KNeighborsClassifier - Best Params: { 'n_neighbors': 2}
  MetaModel (LogisticRegression) - Best Params: {'C': 0.1}
  Avg Accuracy: 0.8571 ± 0.1650

pineapple:
  DecisionTreeClassifier - Best Params: { 'max_depth': 20}
  KNeighborsClassifier - Best Params: {'n_neighbors': 2}
  MetaModel (LogisticRegression) - Best Params: {'C': 0.1}
  Avg Accuracy: 0.5714 ± 0.1214



  ## Parameter Grids for Tuning
dt_param_grid = {
    'max_depth': [10, 15, 20],
}

knn_param_grid = {
    'n_neighbors': [2, 3, 5],
}

meta_param_grid = {
    'C': [0.01, 0.1, 1, 10]
}

In [62]:
# Initialize lists to store accuracies
dt_accuracies = []
knn_accuracies = []

for topping in toppings:
    skf = StratifiedKFold(n_splits=6, shuffle=True, random_state=42)

    for train_index, test_index in skf.split(X, y[topping]):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[topping].iloc[train_index], y[topping].iloc[test_index]

        # Using simpler base models
        dt_clf = DecisionTreeClassifier(max_depth=20, random_state=42)
        knn_clf = KNeighborsClassifier(n_neighbors=2)

        # Meta-model
        meta_model = LogisticRegression(C=0.1, random_state=42)  # More regularization

        # Stacking Classifier
        stacking_clf = StackingClassifier(
            estimators=[
                ('dt', dt_clf),
                ('knn', knn_clf)
            ],
            final_estimator=meta_model,
            cv=5
        )

        # Fitting Base Models
        dt_clf.fit(X_train, y_train)
        knn_clf.fit(X_train, y_train)

        # Predictions for Base Models
        dt_pred = dt_clf.predict(X_test)
        knn_pred = knn_clf.predict(X_test)

        # Store individual accuracies for each base model
        dt_accuracies.append(accuracy_score(y_test, dt_pred))
        knn_accuracies.append(accuracy_score(y_test, knn_pred))

        # Fitting Stacking Classifier
        stacking_clf.fit(X_train, y_train)
        y_pred = stacking_clf.predict(X_test)

        # Store the model and accuracy
        models[topping] = stacking_clf
        ensemble_accuracies['Stacking'].append(accuracy_score(y_test, y_pred))

# Calculate and print the average accuracies
dt_avg_accuracy = np.mean(dt_accuracies)
knn_avg_accuracy = np.mean(knn_accuracies)

print(f"\nAverage Accuracy for DecisionTreeClassifier: {dt_avg_accuracy:.4f}")
print(f"Average Accuracy for KNeighborsClassifier: {knn_avg_accuracy:.4f}")

def predict_user_input(user_df):
    predictions = {}
    for topping, model in models.items():
        predictions[topping] = model.predict(user_df)[0]

    sorted_predictions = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
    print("\nFinal Prediction (Top 1 to 3):")
    res=[]
    for i, (topping, prob) in enumerate(sorted_predictions[:3], start=1):
        if prob > 0.5:
            res.append(topping)
            print(f"  {i}. {topping}")
    return res

output_Stacking = predict_user_input(user_df)





Average Accuracy for DecisionTreeClassifier: 0.6056
Average Accuracy for KNeighborsClassifier: 0.5417

Final Prediction (Top 1 to 3):
  1. Mushrooms
  2. Onions
  3. pineapple


#Voting

Final Tuning Results for Voting Classifier:

Mushrooms:
  
  RandomForestClassifier - Best Params: {'n_estimators': 50, 'min_samples_split': 2, 'max_depth': 7}
  
  GradientBoostingClassifier - Best Params: {'n_estimators': 150, 'max_depth': 7, 'learning_rate': 0.01}
  Avg Accuracy: 0.7708 ± 0.2348

Olives:
  
  RandomForestClassifier - Best Params: {'n_estimators': 150, 'min_samples_split': 10, 'max_depth': 5}
  
  GradientBoostingClassifier - Best Params: {'n_estimators': 150, 'max_depth': 7, 'learning_rate': 0.01}
  Avg Accuracy: 0.3750 ± 0.1614

Onions:
  
  RandomForestClassifier - Best Params: {'n_estimators': 150, 'min_samples_split': 10, 'max_depth': 5}
  
  GradientBoostingClassifier - Best Params: {'n_estimators': 150, 'max_depth': 7, 'learning_rate': 0.01}
  Avg Accuracy: 0.7500 ± 0.2041

Pepperoni:
  
  RandomForestClassifier - Best Params: {'n_estimators': 150, 'min_samples_split': 10, 'max_depth': 5}
  
  GradientBoostingClassifier - Best Params: {'n_estimators': 150, 'max_depth': 7, 'learning_rate': 0.01}
  Avg Accuracy: 0.8750 ± 0.1614

pineapple:
  
  RandomForestClassifier - Best Params: {'n_estimators': 150, 'min_samples_split': 10, 'max_depth': 5}
  
  GradientBoostingClassifier - Best Params: {'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.1}
  Avg Accuracy: 0.4167 ± 0.1863


  ## Parameter Grids for Tuning
rf_param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [5, 7, 10],
    'min_samples_split': [2, 5, 10]
}

gb_param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

In [63]:
# Initialize lists to store accuracies
rf_accuracies = []
gb_accuracies = []

for topping in toppings.columns:
    skf = StratifiedKFold(n_splits=8, shuffle=True, random_state=42)

    for train_index, test_index in skf.split(X, y[topping]):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[topping].iloc[train_index], y[topping].iloc[test_index]

        # Initialize Base Models
        rf_clf = RandomForestClassifier(random_state=42, n_estimators=150, max_depth=5)
        gb_clf = GradientBoostingClassifier(random_state=42, n_estimators=150, learning_rate=0.01, max_depth=7)

        # Voting Classifier with Hard Voting
        voting_clf = VotingClassifier(
            estimators=[
                ('rf', rf_clf),
                ('gb', gb_clf)
            ],
            voting='hard'
        )

        # Fitting Base Models Individually
        rf_clf.fit(X_train, y_train)
        gb_clf.fit(X_train, y_train)

        # Predictions for Base Models
        rf_pred = rf_clf.predict(X_test)
        gb_pred = gb_clf.predict(X_test)

        # Store individual accuracies for each base model
        rf_accuracies.append(accuracy_score(y_test, rf_pred))
        gb_accuracies.append(accuracy_score(y_test, gb_pred))

        # Fitting Voting Classifier
        voting_clf.fit(X_train, y_train)
        y_pred = voting_clf.predict(X_test)

        # Store the model and accuracy
        models[topping] = voting_clf
        ensemble_accuracies['Voting'].append(accuracy_score(y_test, y_pred))

# Calculate and print the average accuracies
rf_avg_accuracy = np.mean(rf_accuracies)
gb_avg_accuracy = np.mean(gb_accuracies)

print(f"\nAverage Accuracy for RandomForestClassifier: {rf_avg_accuracy:.4f}")
print(f"Average Accuracy for GradientBoostingClassifier: {gb_avg_accuracy:.4f}")

def predict_user_input(user_df):
    predictions = {}
    for topping, model in models.items():
        predictions[topping] = model.predict(user_df)[0]

    sorted_predictions = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
    print("\nFinal Prediction (Top 1 to 3):")
    res=[]
    for i, (topping, prob) in enumerate(sorted_predictions[:3], start=1):
        if prob > 0.5:
            res.append(topping)
            print(f"  {i}. {topping}")
    return res

output_Voting = predict_user_input(user_df)





Average Accuracy for RandomForestClassifier: 0.6833
Average Accuracy for GradientBoostingClassifier: 0.6750

Final Prediction (Top 1 to 3):
  1. Mushrooms


#Display Predictions - Output


In [64]:
print(f"""
output_BaggingClassifier : {output_BaggingClassifier}
Boosting output: {output_Boosting}
output_Stacking: {output_Stacking}
output_Voting:{output_Voting}""")

 
output_BaggingClassifier : ['Mushrooms']
Boosting output: ['Mushrooms', 'Olives', 'Onions']
output_Stacking: ['Mushrooms', 'Onions', 'pineapple']
output_Voting:['Mushrooms']


#Accuracy

In [65]:

# Calculate and Display Mean Accuracies
print("\nEnsemble Method  |  Accuracy")
print("=============================")
for method, accuracies in ensemble_accuracies.items():
    mean_accuracy = np.mean(accuracies) * 100
    print(f"{method:<15} |  {mean_accuracy:.1f}%")



Ensemble Method  |  Accuracy
Bagging         |  64.2%
Boosting        |  65.4%
Stacking        |  68.6%
Voting          |  63.7%


###This is not the output its just a idea
###if they choose other than the predicted what it maybe

In [66]:
from collections import Counter

# Combine all outputs
all_predictions = output_BaggingClassifier + output_Boosting + output_Stacking + output_Voting

# Count the occurrences of each topping
topping_counter = Counter(all_predictions)

# Get the top 3 most common toppings in descending order
top_3_toppings = topping_counter.most_common(3)  # Get the top 3 most common toppings

print("\nTop 3 Toppings Count from All Models (Descending Order):")
for topping, count in top_3_toppings:
    print(f"  {topping}: {count}")

# Get the most common topping
most_common_topping = top_3_toppings[0][0]  # Get the topping with the highest count

print(f"\nFinal Topping Prediction: {most_common_topping}")


Top 3 Toppings Count from All Models (Descending Order):
  Mushrooms: 4
  Onions: 2
  Olives: 1

Final Topping Prediction: Mushrooms
