In [1]:
import pandas as pd
import os 
import sys

p = os.path.abspath(os.path.join(os.path.dirname("__file__"), '..'))
sys.path.append(p)

from DecisionTree.ID3Tree import ID3Tree
from DecisionTree.Ruleset import Ruleset
from DecisionTree.Bootstrap_Aggregating import Bagging
from sklearn.model_selection import train_test_split


In [2]:
results = pd.DataFrame(columns=['ID3', 'Ruleset', 'Bagging', 'Actual'])
results.columns

Index(['ID3', 'Ruleset', 'Bagging', 'Actual'], dtype='object')

In [None]:
data_file = os.path.join(p, 'datasets', 'monte_carlo_data.csv')
data = pd.read_csv(data_file, delimiter=';')
data = data.drop(columns=['turn'])
data['played'] = data['played'].astype(int)
feature_names = data.columns[:-1].tolist()
data

Unnamed: 0,cel1,cel2,cel3,cel4,cel5,cel6,cel7,cel8,cel9,cel10,...,cel35,cel36,cel37,cel38,cel39,cel40,cel41,cel42,pieces,played
0,0,0,0,0,0,0,0,0,0,0,...,-1,0,-1,-1,1,1,-1,1,12,5
1,0,0,0,0,0,0,0,0,0,0,...,1,0,-1,1,1,-1,-1,-1,13,5
2,0,0,0,0,0,0,0,0,0,0,...,0,0,-1,-1,1,-1,1,1,7,5
3,0,0,0,0,0,-1,0,0,0,0,...,-1,-1,1,0,1,-1,1,-1,17,3
4,0,0,0,0,0,0,0,0,0,0,...,-1,0,0,0,0,0,-1,1,5,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30012,0,0,0,0,0,0,0,0,0,0,...,0,-1,1,-1,-1,0,1,1,10,3
30013,0,0,0,0,0,0,0,0,0,0,...,0,0,-1,-1,1,0,1,0,9,3
30014,0,0,0,0,0,0,0,0,0,0,...,0,1,1,-1,-1,1,-1,1,17,1
30015,0,0,0,0,0,0,1,0,0,0,...,-1,1,-1,-1,1,-1,-1,1,21,2


In [4]:
X, y = data.iloc[:, :-1], data.iloc[:, -1]
X = X.to_numpy().tolist()
y = y.to_numpy().tolist()
print(y)

[5, 5, 5, 3, 6, 4, 6, 3, 5, 2, 3, 2, 5, 4, 2, 4, 0, 3, 5, 4, 4, 4, 3, 3, 4, 4, 3, 5, 2, 4, 2, 5, 3, 4, 6, 1, 4, 4, 1, 2, 2, 1, 2, 1, 1, 5, 5, 3, 2, 6, 3, 1, 3, 2, 4, 3, 2, 4, 3, 6, 4, 2, 2, 6, 0, 1, 3, 2, 3, 3, 5, 4, 4, 1, 4, 1, 2, 0, 1, 5, 3, 1, 0, 1, 5, 3, 1, 2, 4, 0, 4, 1, 1, 4, 1, 5, 3, 6, 2, 1, 2, 4, 0, 4, 3, 3, 6, 3, 2, 3, 1, 2, 1, 3, 3, 4, 0, 6, 5, 3, 2, 3, 0, 0, 2, 4, 5, 5, 4, 3, 5, 3, 4, 2, 4, 4, 6, 1, 1, 5, 5, 4, 1, 3, 5, 6, 3, 4, 2, 3, 4, 4, 2, 4, 6, 4, 3, 2, 3, 3, 1, 1, 2, 3, 3, 2, 4, 3, 4, 4, 3, 5, 3, 4, 1, 3, 4, 0, 5, 1, 4, 2, 3, 2, 4, 2, 5, 3, 0, 0, 4, 5, 3, 4, 4, 5, 5, 2, 3, 3, 4, 5, 4, 1, 3, 5, 0, 4, 2, 6, 2, 2, 5, 1, 6, 4, 3, 5, 4, 3, 2, 4, 3, 1, 0, 3, 1, 3, 0, 4, 1, 3, 4, 2, 1, 6, 2, 2, 3, 6, 2, 4, 3, 3, 6, 3, 5, 5, 3, 4, 6, 2, 3, 4, 2, 3, 2, 3, 3, 6, 2, 4, 2, 2, 1, 3, 3, 4, 2, 5, 6, 3, 3, 3, 4, 1, 2, 5, 3, 5, 6, 4, 4, 5, 1, 2, 2, 0, 4, 4, 3, 2, 0, 3, 6, 4, 3, 3, 4, 1, 6, 5, 2, 4, 4, 2, 1, 3, 3, 0, 2, 3, 4, 6, 3, 3, 3, 3, 2, 2, 3, 3, 4, 3, 2, 2, 3, 5, 2, 2, 4, 3, 2, 

In [5]:
def attribute_vartypes(attribute):
    """
    Function to determine the attribute types of the dataset.
    :param X: The dataset
    :return: A list of attribute types
    """
    return 'continuous' if attribute == 'pieces' else 'categorical'

In [6]:
data = [x + [label] for x, label in zip(X, y)]
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)
type_map = {attribute : attribute_vartypes(attribute) for attribute in feature_names}
print(train_data[0])

[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, -1, 1, 0, -1, -1, -1, 1, -1, 1, 0, 1, 1, -1, -1, 1, -1, 1, 1, -1, -1, -1, 23, 2]


In [None]:
from concurrent.futures import ThreadPoolExecutor

def process_row(row, rules, ruleset, bagging):
    # ID3 Tree Prediction
    id3_pred = None
    for rule in rules:
        id3_pred = rule.predict(row)
        if id3_pred is not None:
            break
    if id3_pred is None:
        id3_pred = -1  # ERROR_CLASS

    # Ruleset Prediction
    ruleset_pred, _ = ruleset.predict(row)

    # Bagging Prediction
    bagging_pred, _ = bagging.predict(row)

    return [id3_pred, ruleset_pred, bagging_pred, row[-1]]

def generate_results():
    print("Training ID3 Tree")
    tree = ID3Tree(attributes=feature_names, data=train_data, default=0, type_map=type_map)
    tree.train()
    rules = tree.build_rules()

    print("Training Ruleset")
    ruleset = Ruleset(feature_names, train_data, 0, type_map)
    ruleset.train()

    print("Training Bagging")
    bagging = Bagging(feature_names, train_data, 0, type_map)
    bagging.train()

    print("Starting predictions with threading...")
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_row, row, rules, ruleset, bagging) for row in test_data]
        results_list = [f.result() for f in futures]

    results_df = pd.DataFrame(results_list, columns=["ID3_Pred", "Ruleset_Pred", "Bagging_Pred", "Actual"])
    results_df.to_csv(os.path.join('datasets', 'decision_tree_data.csv'), index=False, sep=',')

    print("Results saved to decision_tree_data.csv")


print ("DANGER: you are about to overwrite the file with the new data")
print ("This process will take a long time to run and will overwrite the file, potentially losing the already generated data")
string = input("Are you sure you want to continue? (yes/no): ")
if (string == "yes"): 
    generate_results()       # PLEASE BE CAREFUL WITH THIS FUNCTION, IT WILL TAKE A LONG TIME TO RUN AND WILL OVERWRITE THE FILE
#                            # POTENTIALLY LOSING THE ALREADY GENERATED DATA

DANGER: you are about to overwrite the file with the new data
This process will take a long time to run and will overwrite the file, potentially losing the already generated data
