# ID3

In [3]:
import numpy as np
import pandas as pd
from collections import Counter

# Fungsi untuk menghitung entropy
def entropy(y):
    counts = np.bincount(y)
    probabilities = counts / len(y)
    return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

# Fungsi untuk membagi dataset berdasarkan split
def split_dataset(X, y, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold
    right_mask = ~left_mask
    return X[left_mask], y[left_mask], X[right_mask], y[right_mask]

# Fungsi untuk menghitung Information Gain
def information_gain(X, y, feature_index, threshold):
    parent_entropy = entropy(y)
    X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
    if len(y_left) == 0 or len(y_right) == 0:
        return 0
    child_entropy = (len(y_left) / len(y)) * entropy(y_left) + (len(y_right) / len(y)) * entropy(y_right)
    return parent_entropy - child_entropy

# Fungsi untuk mencari splitting terbaik
def best_split(X, y):
    best_gain = 0
    best_feature = None
    best_threshold = None
    n_features = X.shape[1]
    for feature_index in range(n_features):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            gain = information_gain(X, y, feature_index, threshold)
            if gain > best_gain:
                best_gain = gain
                best_feature = feature_index
                best_threshold = threshold
    return best_feature, best_threshold

# Kelas untuk Decision Tree menggunakan ID3
class DecisionTreeID3:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y, depth=0):
        if len(np.unique(y)) == 1:
            return np.unique(y)[0]
        if self.max_depth is not None and depth >= self.max_depth:
            return Counter(y).most_common(1)[0][0]
        feature, threshold = best_split(X, y)
        if feature is None:
            return Counter(y).most_common(1)[0][0]
        X_left, y_left, X_right, y_right = split_dataset(X, y, feature, threshold)
        self.tree = {
            'feature': feature,
            'threshold': threshold,
            'left': self.fit(X_left, y_left, depth + 1),
            'right': self.fit(X_right, y_right, depth + 1)
        }
        return self.tree

    def predict_one(self, x, tree):
        if not isinstance(tree, dict):
            return tree
        feature = tree['feature']
        threshold = tree['threshold']
        if x[feature] <= threshold:
            return self.predict_one(x, tree['left'])
        else:
            return self.predict_one(x, tree['right'])

    def predict(self, X):
        return np.array([self.predict_one(x, self.tree) for x in X])

# Memuat dataset Iris dari scikit-learn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

iris = datasets.load_iris()
X, y = iris.data, iris.target  # Menggunakan fitur numerik secara langsung

# Membagi dataset menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat dan melatih model ID3
model = DecisionTreeID3()
model.fit(X_train, y_train)

# Memprediksi dataset uji
y_pred = model.predict(X_test)

# Menghitung akurasi
accuracy = np.mean(y_pred == y_test)
print(f'Akurasi Model ID3: {accuracy * 100:.2f}%')

# Contoh prediksi satu data
sample_data = np.array([X_test[0]])  # Mengambil satu data dari dataset uji
predicted_class = model.predict(sample_data)[0]
print(f'Prediksi untuk sampel: {sample_data[0]} adalah kelas {iris.target_names[predicted_class]}')

Akurasi Model ID3: 100.00%
Prediksi untuk sampel: [6.1 2.8 4.7 1.2] adalah kelas versicolor


# C4.5

In [4]:
import numpy as np
import pandas as pd
from collections import Counter

# Fungsi untuk menghitung entropy
def entropy(y):
    counts = np.bincount(y)
    probabilities = counts / len(y)
    return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

# Fungsi untuk membagi dataset berdasarkan split
def split_dataset(X, y, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold
    right_mask = ~left_mask
    return X[left_mask], y[left_mask], X[right_mask], y[right_mask]

# Fungsi untuk menghitung Information Gain
def information_gain(X, y, feature_index, threshold):
    parent_entropy = entropy(y)
    X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
    if len(y_left) == 0 or len(y_right) == 0:
        return 0
    child_entropy = (len(y_left) / len(y)) * entropy(y_left) + (len(y_right) / len(y)) * entropy(y_right)
    return parent_entropy - child_entropy

# Fungsi untuk menghitung Split Information
def split_information(X, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold
    right_mask = ~left_mask
    left_ratio = np.sum(left_mask) / len(X)
    right_ratio = np.sum(right_mask) / len(X)
    return - (left_ratio * np.log2(left_ratio) + right_ratio * np.log2(right_ratio)) if left_ratio > 0 and right_ratio > 0 else 1

# Fungsi untuk menghitung Gain Ratio
def gain_ratio(X, y, feature_index, threshold):
    ig = information_gain(X, y, feature_index, threshold)
    si = split_information(X, feature_index, threshold)
    return ig / si if si != 0 else 0

# Fungsi untuk mencari splitting terbaik berdasarkan Gain Ratio
def best_split(X, y):
    best_gain_ratio = 0
    best_feature = None
    best_threshold = None
    n_features = X.shape[1]
    for feature_index in range(n_features):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            gr = gain_ratio(X, y, feature_index, threshold)
            if gr > best_gain_ratio:
                best_gain_ratio = gr
                best_feature = feature_index
                best_threshold = threshold
    return best_feature, best_threshold

# Kelas untuk Decision Tree menggunakan C4.5
class DecisionTreeC45:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y, depth=0):
        if len(np.unique(y)) == 1:
            return np.unique(y)[0]
        if self.max_depth is not None and depth >= self.max_depth:
            return Counter(y).most_common(1)[0][0]
        feature, threshold = best_split(X, y)
        if feature is None:
            return Counter(y).most_common(1)[0][0]
        X_left, y_left, X_right, y_right = split_dataset(X, y, feature, threshold)
        self.tree = {
            'feature': feature,
            'threshold': threshold,
            'left': self.fit(X_left, y_left, depth + 1),
            'right': self.fit(X_right, y_right, depth + 1)
        }
        return self.tree

    def predict_one(self, x, tree):
        if not isinstance(tree, dict):
            return tree
        feature = tree['feature']
        threshold = tree['threshold']
        if x[feature] <= threshold:
            return self.predict_one(x, tree['left'])
        else:
            return self.predict_one(x, tree['right'])

    def predict(self, X):
        return np.array([self.predict_one(x, self.tree) for x in X])

# Memuat dataset Iris dari scikit-learn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

iris = datasets.load_iris()
X, y = iris.data, iris.target  # Menggunakan fitur numerik secara langsung

# Membagi dataset menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat dan melatih model C4.5
model = DecisionTreeC45()
model.fit(X_train, y_train)

# Memprediksi dataset uji
y_pred = model.predict(X_test)

# Menghitung akurasi
accuracy = np.mean(y_pred == y_test)
print(f'Akurasi Model C4.5: {accuracy * 100:.2f}%')

# Contoh prediksi satu data
sample_data = np.array([X_test[0]])  # Mengambil satu data dari dataset uji
predicted_class = model.predict(sample_data)[0]
print(f'Prediksi untuk sampel: {sample_data[0]} adalah kelas {iris.target_names[predicted_class]}')

Akurasi Model C4.5: 100.00%
Prediksi untuk sampel: [6.1 2.8 4.7 1.2] adalah kelas versicolor


# CART

In [5]:
import numpy as np
import pandas as pd
from collections import Counter

# Fungsi untuk menghitung Gini Index
def gini_index(y):
    counts = np.bincount(y)
    probabilities = counts / len(y)
    return 1 - np.sum([p**2 for p in probabilities if p > 0])

# Fungsi untuk membagi dataset berdasarkan split
def split_dataset(X, y, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold
    right_mask = ~left_mask
    return X[left_mask], y[left_mask], X[right_mask], y[right_mask]

# Fungsi untuk menghitung Gini Gain
def gini_gain(X, y, feature_index, threshold):
    parent_gini = gini_index(y)
    X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index, threshold)
    if len(y_left) == 0 or len(y_right) == 0:
        return 0
    child_gini = (len(y_left) / len(y)) * gini_index(y_left) + (len(y_right) / len(y)) * gini_index(y_right)
    return parent_gini - child_gini

# Fungsi untuk mencari splitting terbaik berdasarkan Gini Gain
def best_split(X, y):
    best_gini_gain = 0
    best_feature = None
    best_threshold = None
    n_features = X.shape[1]
    for feature_index in range(n_features):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            gain = gini_gain(X, y, feature_index, threshold)
            if gain > best_gini_gain:
                best_gini_gain = gain
                best_feature = feature_index
                best_threshold = threshold
    return best_feature, best_threshold

# Kelas untuk Decision Tree menggunakan CART
class DecisionTreeCART:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y, depth=0):
        if len(np.unique(y)) == 1:
            return np.unique(y)[0]
        if self.max_depth is not None and depth >= self.max_depth:
            return Counter(y).most_common(1)[0][0]
        feature, threshold = best_split(X, y)
        if feature is None:
            return Counter(y).most_common(1)[0][0]
        X_left, y_left, X_right, y_right = split_dataset(X, y, feature, threshold)
        self.tree = {
            'feature': feature,
            'threshold': threshold,
            'left': self.fit(X_left, y_left, depth + 1),
            'right': self.fit(X_right, y_right, depth + 1)
        }
        return self.tree

    def predict_one(self, x, tree):
        if not isinstance(tree, dict):
            return tree
        feature = tree['feature']
        threshold = tree['threshold']
        if x[feature] <= threshold:
            return self.predict_one(x, tree['left'])
        else:
            return self.predict_one(x, tree['right'])

    def predict(self, X):
        return np.array([self.predict_one(x, self.tree) for x in X])

# Memuat dataset Iris dari scikit-learn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

iris = datasets.load_iris()
X, y = iris.data, iris.target  # Menggunakan fitur numerik secara langsung

# Membagi dataset menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat dan melatih model CART
model = DecisionTreeCART()
model.fit(X_train, y_train)

# Memprediksi dataset uji
y_pred = model.predict(X_test)

# Menghitung akurasi
accuracy = np.mean(y_pred == y_test)
print(f'Akurasi Model CART: {accuracy * 100:.2f}%')

# Contoh prediksi satu data
sample_data = np.array([X_test[0]])  # Mengambil satu data dari dataset uji
predicted_class = model.predict(sample_data)[0]
print(f'Prediksi untuk sampel: {sample_data[0]} adalah kelas {iris.target_names[predicted_class]}')


Akurasi Model CART: 100.00%
Prediksi untuk sampel: [6.1 2.8 4.7 1.2] adalah kelas versicolor
