# **Code of Decision Tree:**

---






In [None]:
import numpy as np
from tensorflow.keras.datasets import cifar10
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Enhanced feature extraction
def extract_features(images):
    features = []
    for img in images:
        # Convert to grayscale
        grayscale = np.mean(img, axis=2).astype(np.uint8)

        # Grayscale histogram
        hist_gray, _ = np.histogram(grayscale, bins=16, range=(0, 256))
        hist_gray = hist_gray / hist_gray.sum()

        # RGB histograms
        hist_r, _ = np.histogram(img[:, :, 0], bins=16, range=(0, 256))
        hist_g, _ = np.histogram(img[:, :, 1], bins=16, range=(0, 256))
        hist_b, _ = np.histogram(img[:, :, 2], bins=16, range=(0, 256))
        hist_r, hist_g, hist_b = hist_r / hist_r.sum(), hist_g / hist_g.sum(), hist_b / hist_b.sum()

        # Statistical features
        mean_gray = np.mean(grayscale)
        std_gray = np.std(grayscale)

        # Combine features
        features.append(np.hstack((hist_gray, hist_r, hist_g, hist_b, mean_gray, std_gray)))

    return np.array(features)

# Extract features for the complete dataset
x_train_features = extract_features(x_train)  # Use the full training set
x_test_features = extract_features(x_test)
y_train = y_train.flatten()
y_test = y_test.flatten()

# Standardize features
scaler = StandardScaler()
x_train_features = scaler.fit_transform(x_train_features)
x_test_features = scaler.transform(x_test_features)

# Decision tree utilities
def compute_entropy(y):
    classes, counts = np.unique(y, return_counts=True)
    probabilities = counts / counts.sum()
    entropy = -np.sum(probabilities * np.log2(probabilities + 1e-8))
    return entropy

def split_dataset(X, node_indices, feature):
    threshold = np.median(X[node_indices, feature])  # Use median as the threshold
    left_indices = [i for i in node_indices if X[i, feature] <= threshold]
    right_indices = [i for i in node_indices if X[i, feature] > threshold]
    return np.array(left_indices, dtype=int), np.array(right_indices, dtype=int)

# Code Assignment starts here
def compute_information_gain(X, y, node_indices, feature):
    left_indices, right_indices = split_dataset(X, node_indices, feature)
    y_node = y[node_indices]
    y_left = y[left_indices]
    y_right = y[right_indices]

    entropy_before = compute_entropy(y_node)
    left_weight = len(y_left) / len(y_node)
    right_weight = len(y_right) / len(y_node)
    entropy_after = left_weight * compute_entropy(y_left) + right_weight * compute_entropy(y_right)

    information_gain = entropy_before - entropy_after
    return information_gain

def get_best_split(X, y, node_indices):
    num_features = X.shape[1]
    best_feature = -1
    max_information_gain = -float('inf')

    if len(np.unique(y[node_indices])) == 1:
        return best_feature

    for feature in range(num_features):
        information_gain = compute_information_gain(X, y, node_indices, feature)
        if information_gain > max_information_gain:
            max_information_gain = information_gain
            best_feature = feature

    return best_feature
# Code Assignment stops here

def build_tree_recursive(X, y, node_indices, max_depth, current_depth):
    # Stop recursion if max depth is reached or no samples are available
    if current_depth == max_depth or len(node_indices) == 0:
        return np.bincount(y[node_indices]).argmax() if len(node_indices) > 0 else -1

    # Find the best feature to split
    best_feature = get_best_split(X, y, node_indices)
    if best_feature == -1:
        return np.bincount(y[node_indices]).argmax()

    # Split dataset
    left_indices, right_indices = split_dataset(X, node_indices, best_feature)

    # Recursively build left and right subtrees
    left_child = build_tree_recursive(X, y, left_indices, max_depth, current_depth + 1)
    right_child = build_tree_recursive(X, y, right_indices, max_depth, current_depth + 1)

    # Return the tree structure as a tuple
    return (best_feature, left_child, right_child)

# Build the decision tree
node_indices = np.arange(len(x_train_features))
decision_tree = build_tree_recursive(x_train_features, y_train, node_indices, max_depth=6, current_depth=0)  # Increased max_depth

# Function to predict using the decision tree
def predict(tree, x):
    while isinstance(tree, tuple):  # Traverse the tree until reaching a leaf
        feature, left_child, right_child = tree
        threshold = np.median(x_train_features[:, feature])
        if x[feature] <= threshold:
            tree = left_child
        else:
            tree = right_child
    return tree  # Return the class label at the leaf

# Predict and evaluate
y_test_pred = [predict(decision_tree, x_test_features[i]) for i in range(len(x_test_features))]
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Test Accuracy: 23.65%


# **Code of Random Forest:**

---



In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

# flatten dataset
x_train_flatten = x_train.reshape(x_train.shape[0], -1)
x_test_flatten = x_test.reshape(x_test.shape[0], -1)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_flatten)
x_test_scaled = scaler.transform(x_test_flatten)

# use PCA
pca = PCA(n_components=0.95)
x_train_pca = pca.fit_transform(x_train_scaled)
x_test_pca = pca.transform(x_test_scaled)

# set parameters
clf_rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=25,
    max_features='sqrt',
    min_samples_split=5,
    min_samples_leaf=3,
    random_state=42
)
clf_rf.fit(x_train_pca, y_train)

y_test_rf_pred = clf_rf.predict(x_test_pca)
test_rf_accuracy = accuracy_score(y_test, y_test_rf_pred)
print(f"Test Accuracy with RF: {test_rf_accuracy * 100:.2f}%")


Test Accuracy with All Features (RF): 46.86%
