# Implement Logistic Regression or Decision Tree without using scikit-learn. Show how the pruning works in the case of DT and Random forest algorithms

In [None]:
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import networkx as nx
import math
from models import LogisticRegressionScratch, DecisionTreeScratch, RandomForestScratch, DecisionTreeNode

In [None]:
# Let's create some simple data for binary classification
np.random.seed(0)
X = np.array([[0.5, 1.5], [1.0, 1.0], [1.5, 0.5], [3.0, 3.5], [2.0, 2.0], [3.5, 2.5]])
y = np.array([0, 0, 0, 1, 1, 1])

# Train our logistic regression model
model = LogisticRegressionScratch(lr=0.1, n_iter=1000)
model.fit(X, y)

# Get predictions and probabilities
predictions = model.predict(X)
probabilities = model.predict_proba(X)

print("Predictions:", predictions)
print("Probabilities:", probabilities)
print("Actual labels:", y)

## Decision Tree from Scratch
A decision tree classifier using entropy for splitting.

In [None]:
# The DecisionTreeScratch and DecisionTreeNode classes are now imported from the models.py file.

In [None]:
# Let's test our decision tree on a simple dataset
X_dt = np.array([[0.5, 1.5], [1.0, 1.0], [1.5, 0.5], [3.0, 3.5], [2.0, 2.0], [3.5, 2.5]])
y_dt = np.array([0, 0, 0, 1, 1, 1])

# Create and train the decision tree
dt_model = DecisionTreeScratch(max_depth=3)
dt_model.fit(X_dt, y_dt)

# Make predictions
dt_predictions = dt_model.predict(X_dt)

print("Decision Tree Predictions:", dt_predictions)
print("Actual labels:", y_dt)

## Decision Tree Pruning
Pruning helps prevent overfitting by removing branches that have little predictive power.

In [None]:
import matplotlib.pyplot as plt
import networkx as nx

# Function to visualize the tree using networkx
def visualize_tree(node, graph=None, parent=None, edge_label=None, node_id=0):
    if graph is None:
        graph = nx.DiGraph()
    label = f"Leaf: {node.value}" if node.value is not None else f"X[{node.feature}] <= {node.threshold:.2f}"
    graph.add_node(node_id, label=label)
    if parent is not None:
        graph.add_edge(parent, node_id, label=edge_label)
    next_id = node_id + 1
    if node.left:
        next_id = visualize_tree(node.left, graph, node_id, 'True', next_id)
    if node.right:
        next_id = visualize_tree(node.right, graph, node_id, 'False', next_id)
    return next_id if graph is not None else graph

# Helper to plot the tree graph
def plot_tree_graph(graph):
    pos = nx.nx_pydot.graphviz_layout(graph, prog='dot')
    labels = nx.get_node_attributes(graph, 'label')
    nx.draw(graph, pos, labels=labels, with_labels=True, arrows=True, node_size=2000, node_color='lightblue')
    edge_labels = nx.get_edge_attributes(graph, 'label')
    nx.draw_networkx_edge_labels(graph, pos, edge_labels=edge_labels)
    plt.show()

# Pruning function: tries to simplify the tree if it doesn't hurt accuracy
def prune_tree(node, X_val, y_val):
    if node.value is not None:
        return node
    node.left = prune_tree(node.left, X_val, y_val)
    node.right = prune_tree(node.right, X_val, y_val)
    if node.left.value is not None and node.right.value is not None:
        leaf_value = Counter(y_val).most_common(1)[0][0]
        temp_node = DecisionTreeNode(value=leaf_value)
        def predict_with_node(X):
            return np.array([leaf_value for _ in X])
        def predict_with_subtree(X):
            return np.array([DecisionTreeScratch().predict_one(x, node) for x in X])
        acc_subtree = np.mean(predict_with_subtree(X_val) == y_val)
        acc_leaf = np.mean(predict_with_node(X_val) == y_val)
        print(f"Pruning check: subtree acc={acc_subtree:.2f}, leaf acc={acc_leaf:.2f}")
        if acc_leaf >= acc_subtree:
            print("Pruned a branch!")
            return temp_node
    return node

# Visualize tree before pruning
G_before = nx.DiGraph()
visualize_tree(dt_model.root, G_before)
print("Tree before pruning:")
plot_tree_graph(G_before)

# Prune the tree (using same data for simplicity)
dt_model.root = prune_tree(dt_model.root, X_dt, y_dt)

# Visualize tree after pruning
G_after = nx.DiGraph()
visualize_tree(dt_model.root, G_after)
print("Tree after pruning:")
plot_tree_graph(G_after)

## Random Forest from Scratch
Random Forest combines multiple decision trees trained on bootstrapped samples and random feature subsets.

In [None]:
# The RandomForestScratch class is now imported from the models.py file.

In [None]:
# Let's test our random forest on the same dataset
rf_model = RandomForestScratch(n_trees=5, max_depth=3)
rf_model.fit(X_dt, y_dt)
rf_predictions = rf_model.predict(X_dt)

print("Random Forest Predictions:", rf_predictions)
print("Actual labels:", y_dt)

## Next Steps: Streamlit Integration
You can now build a user interface and deploy these models in real time using Streamlit.