In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error

# Linear Regression
class LinearRegressionFromScratch:
    def __init__(self, learning_rate=0.01, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        # Gradient descent
        for _ in range(self.iterations):
            y_predicted = np.dot(X, self.weights) + self.bias
            
            # Compute gradients
            dw = (1/n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1/n_samples) * np.sum(y_predicted - y)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
        
        return self
    
    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

# Logistic Regression
class LogisticRegressionFromScratch:
    def __init__(self, learning_rate=0.01, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None
        self.bias = None
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        # Gradient descent
        for _ in range(self.iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)
            
            # Compute gradients
            dw = (1/n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1/n_samples) * np.sum(y_predicted - y)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
        
        return self
    
    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return [1 if i > 0.5 else 0 for i in y_predicted]

# Decision Tree
class DecisionTreeClassifier:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None
    
    def _entropy(self, y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        return -np.sum(probabilities * np.log2(probabilities + 1e-10))
    
    def _information_gain(self, parent, left_child, right_child):
        parent_entropy = self._entropy(parent)
        left_entropy = self._entropy(left_child)
        right_entropy = self._entropy(right_child)
        
        n = len(parent)
        n_l, n_r = len(left_child), len(right_child)
        
        # Weighted entropy
        child_entropy = (n_l/n) * left_entropy + (n_r/n) * right_entropy
        return parent_entropy - child_entropy
    
    def _split(self, X, feature_idx, threshold):
        left_mask = X[:, feature_idx] <= threshold
        right_mask = ~left_mask
        return left_mask, right_mask
    
    def _build_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_classes = len(np.unique(y))
        
        # Stopping criteria
        if (depth >= self.max_depth or 
            n_classes == 1 or 
            n_samples < 2):
            leaf_value = np.bincount(y).argmax()
            return {'value': leaf_value}
        
        # Find best split
        best_gain = -1
        best_feature, best_threshold = None, None
        
        for feature_idx in range(n_features):
            thresholds = np.unique(X[:, feature_idx])
            for threshold in thresholds:
                left_mask, right_mask = self._split(X, feature_idx, threshold)
                
                if len(left_mask) == 0 or len(right_mask) == 0:
                    continue
                
                gain = self._information_gain(
                    y, 
                    y[left_mask], 
                    y[right_mask]
                )
                
                if gain > best_gain:
                    best_gain = gain
                    best_feature = feature_idx
                    best_threshold = threshold
        
        # Recursively build tree
        left_mask, right_mask = self._split(X, best_feature, best_threshold)
        left_subtree = self._build_tree(
            X[left_mask], 
            y[left_mask], 
            depth + 1
        )
        right_subtree = self._build_tree(
            X[right_mask], 
            y[right_mask], 
            depth + 1
        )
        
        return {
            'feature': best_feature,
            'threshold': best_threshold,
            'left': left_subtree,
            'right': right_subtree
        }
    
    def fit(self, X, y):
        self.tree = self._build_tree(X, y)
        return self
    
    def _predict_single(self, x, tree):
        if 'value' in tree:
            return tree['value']
        
        feature_val = x[tree['feature']]
        if feature_val <= tree['threshold']:
            return self._predict_single(x, tree['left'])
        else:
            return self._predict_single(x, tree['right'])
    
    def predict(self, X):
        return [self._predict_single(x, self.tree) for x in X]

# Demonstration of usage
def ml_algorithm_demo():
    # Generate sample data
    np.random.seed(42)
    
    # Linear Regression Example
    X_reg = np.random.rand(100, 1)
    y_reg = 2 * X_reg + 1 + np.random.normal(0, 0.1, (100, 1))
    y_reg = y_reg.reshape(-1)
    lr_model = LinearRegressionFromScratch()
    lr_model.fit(X_reg, y_reg)
    y_pred_reg = lr_model.predict(X_reg)
    print("Linear Regression MSE:", mean_squared_error(y_reg, y_pred_reg))
    
    # Logistic Regression Example
    X_class = np.random.randn(100, 2)
    y_class = (X_class[:, 0] + X_class[:, 1] > 0).astype(int)
    
    log_reg = LogisticRegressionFromScratch()
    log_reg.fit(X_class, y_class)
    y_pred_class = log_reg.predict(X_class)
    print("Logistic Regression Accuracy:", accuracy_score(y_class, y_pred_class))
    
    # Decision Tree Example
    X_tree = np.random.randn(100, 2)
    y_tree = (X_tree[:, 0] > 0).astype(int)
    
    dt = DecisionTreeClassifier(max_depth=5)
    dt.fit(X_tree, y_tree)
    y_pred_tree = dt.predict(X_tree)
    print("Decision Tree Accuracy:", accuracy_score(y_tree, y_pred_tree))

# Run the demonstration
ml_algorithm_demo()

ValueError: non-broadcastable output operand with shape (1,) doesn't match the broadcast shape (1,100)