In [None]:
import pandas as pd
import numpy as np
from collections import Counter
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
data = pd.read_csv('/content/tictac.csv')

# Preprocessing: Encode 'x', 'o', and 'b' to numeric values
data.replace({'x': 1, 'o': -1, 'b': 0}, inplace=True)

# Split the data into features and target
X = data.drop(columns=['class'])
y = data['class']

In [None]:
# Split dataset into training and testing set (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
def entropy(y):
    if len(y) == 0:
        return 0
    counts = Counter(y)
    probabilities = [count / len(y) for count in counts.values()]
    return -sum(p * math.log2(p) for p in probabilities if p > 0)

def information_gain(y, y_left, y_right):
    p = len(y_left) / len(y)
    return entropy(y) - p * entropy(y_left) - (1 - p) * entropy(y_right)

class DecisionTree:
    def __init__(self, criterion='entropy', max_depth=None):
        self.criterion = criterion
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def _build_tree(self, X, y, depth=0):
        if len(set(y)) == 1 or (self.max_depth is not None and depth >= self.max_depth):
            return Counter(y).most_common(1)[0][0]

        best_gain = 0
        best_split = None
        for column in X.columns:
            values = X[column].unique()
            for value in values:
                left_mask = X[column] == value
                right_mask = ~left_mask
                y_left, y_right = y[left_mask], y[right_mask]
                gain = information_gain(y, y_left, y_right) if self.criterion == 'entropy' else gini_index(y, y_left, y_right)
                if gain > best_gain:
                    best_gain = gain
                    best_split = (column, value)

        if best_split is None:
            return Counter(y).most_common(1)[0][0]

        column, value = best_split
        left_mask = X[column] == value
        right_mask = ~left_mask
        left_tree = self._build_tree(X[left_mask], y[left_mask], depth + 1)
        right_tree = self._build_tree(X[right_mask], y[right_mask], depth + 1)
        return (column, value, left_tree, right_tree)

    def predict(self, X):
        return [self._predict_one(row) for _, row in X.iterrows()]

    def _predict_one(self, row):
        node = self.tree
        while isinstance(node, tuple):
            column, value, left_tree, right_tree = node
            node = left_tree if row[column] == value else right_tree
        return node

# Training the decision tree model
dt_entropy = DecisionTree(criterion='entropy')
dt_entropy.fit(X_train, y_train)

# Predictions
y_pred_entropy = dt_entropy.predict(X_test)
print("Entropy-based Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_entropy))
print("Accuracy:", accuracy_score(y_test, y_pred_entropy))


Entropy-based Decision Tree Classification Report:
              precision    recall  f1-score   support

       False       0.93      0.93      0.93        67
        True       0.96      0.96      0.96       125

    accuracy                           0.95       192
   macro avg       0.94      0.94      0.94       192
weighted avg       0.95      0.95      0.95       192

Accuracy: 0.9479166666666666


In [None]:
def gini_index(y, y_left, y_right):
    def gini(y):
        m = len(y)
        return 1.0 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))

    m = len(y)
    m_left, m_right = len(y_left), len(y_right)
    return (m_left / m) * gini(y_left) + (m_right / m) * gini(y_right)


In [None]:
def information_gain(y, y_left, y_right):
    def entropy(y):
        m = len(y)
        return -sum((np.sum(y == c) / m) * np.log2(np.sum(y == c) / m) for c in np.unique(y))

    m = len(y)
    m_left, m_right = len(y_left), len(y_right)
    return entropy(y) - (m_left / m) * entropy(y_left) - (m_right / m) * entropy(y_right)


In [None]:
class DecisionTree:
    def __init__(self, criterion='gini', max_depth=None):
        self.criterion = criterion
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def _build_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

        if n_labels == 1 or n_samples <= 1 or (self.max_depth is not None and depth >= self.max_depth):
            leaf_value = self._most_common_label(y)
            return Node(value=leaf_value)

        best_gain = -1
        split_idx, split_thresh = None, None

        for feature_idx in range(n_features):
            X_column = X[:, feature_idx]
            thresholds = np.unique(X_column)
            for threshold in thresholds:
                left_mask = X_column <= threshold
                right_mask = ~left_mask
                y_left, y_right = y[left_mask], y[right_mask]
                gain = information_gain(y, y_left, y_right) if self.criterion == 'entropy' else gini_index(y, y_left, y_right)
                if gain > best_gain:
                    best_gain = gain
                    split_idx = feature_idx
                    split_thresh = threshold

        if best_gain == -1:
            leaf_value = self._most_common_label(y)
            return Node(value=leaf_value)

        left_mask = X[:, split_idx] <= split_thresh
        right_mask = ~left_mask
        left_subtree = self._build_tree(X[left_mask, :], y[left_mask], depth + 1)
        right_subtree = self._build_tree(X[right_mask, :], y[right_mask], depth + 1)
        return Node(split_idx, split_thresh, left_subtree, right_subtree)

    def _most_common_label(self, y):
        return np.bincount(y).argmax()

class Node:
    def __init__(self, feature_idx=None, threshold=None, left=None, right=None, value=None):
        self.feature_idx = feature_idx
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

    def is_leaf_node(self):
        return self.value is not None


In [None]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}
df = pd.DataFrame(data)

# Correct indexing using .iloc for integer-based indexing
try:
    print(df.iloc[:, 0])  # Access all rows in the first column
except Exception as e:
    print(f"Error with iloc: {e}")

# Correct indexing using .loc for label-based indexing
try:
    print(df.loc[:, 'A'])  # Access all rows in column 'A'
except Exception as e:
    print(f"Error with loc: {e}")

# Slicing example
try:
    print(df.iloc[:, :2])  # Access all rows and the first two columns
except Exception as e:
    print(f"Error with slicing: {e}")


0    1
1    2
2    3
Name: A, dtype: int64
0    1
1    2
2    3
Name: A, dtype: int64
   A  B
0  1  4
1  2  5
2  3  6


In [None]:
from sklearn.tree import DecisionTreeClassifier

# Training scikit-learn decision tree classifier with entropy-based criterion
sklearn_dt_entropy = DecisionTreeClassifier(criterion='entropy')
sklearn_dt_entropy.fit(X_train, y_train)
y_pred_sklearn_entropy = sklearn_dt_entropy.predict(X_test)
print("scikit-learn Entropy-based Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_sklearn_entropy))
print("Accuracy:", accuracy_score(y_test, y_pred_sklearn_entropy))

# Training scikit-learn decision tree classifier with Gini index
sklearn_dt_gini = DecisionTreeClassifier(criterion='gini')
sklearn_dt_gini.fit(X_train, y_train)
y_pred_sklearn_gini = sklearn_dt_gini.predict(X_test)
print("scikit-learn Gini Index-based Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_sklearn_gini))
print("Accuracy:", accuracy_score(y_test, y_pred_sklearn_gini))


scikit-learn Entropy-based Decision Tree Classification Report:
              precision    recall  f1-score   support

       False       0.98      0.96      0.97        67
        True       0.98      0.99      0.98       125

    accuracy                           0.98       192
   macro avg       0.98      0.97      0.98       192
weighted avg       0.98      0.98      0.98       192

Accuracy: 0.9791666666666666
scikit-learn Gini Index-based Decision Tree Classification Report:
              precision    recall  f1-score   support

       False       0.97      0.96      0.96        67
        True       0.98      0.98      0.98       125

    accuracy                           0.97       192
   macro avg       0.97      0.97      0.97       192
weighted avg       0.97      0.97      0.97       192

Accuracy: 0.9739583333333334
