In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.pipeline import Pipeline
import seaborn as sns
from sklearn.ensemble import VotingClassifier
from sklearn.base import BaseEstimator, ClassifierMixin
from scipy.spatial.distance import cdist


# Custom RBF Network implementation
class RBFNetwork(BaseEstimator, ClassifierMixin):
    def __init__(self, n_centers=52, sigma=1.0, learning_rate=0.01, epochs=100, batch_size=10, 
                 initialization='kmeans', center_learning_rate=0.01):
        self.n_centers = n_centers
        self.sigma = sigma
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.initialization = initialization
        self.center_learning_rate = center_learning_rate
        self.centers = None
        self.weights = None
        self.classes_ = None
        self.n_classes = None
        self.sigmas = None
        
    def _initialize_centers(self, X, y):
        n_samples, n_features = X.shape
        
        if self.initialization == 'random':
            # Random initialization
            idx = np.random.choice(n_samples, self.n_centers, replace=False)
            self.centers = X[idx]
            
        elif self.initialization == 'kmeans':
            # Use K-means for center initialization
            from sklearn.cluster import KMeans
            kmeans = KMeans(n_clusters=self.n_centers, random_state=42)
            kmeans.fit(X)
            self.centers = kmeans.cluster_centers_
            
        elif self.initialization == 'per_class':
            # Initialize centers from each class
            centers_per_class = max(1, self.n_centers // self.n_classes)
            centers = []
            
            for i in range(self.n_classes):
                class_samples = X[y == i]
                if len(class_samples) == 0:
                    continue
                    
                if len(class_samples) < centers_per_class:
                    # If we have fewer samples than required centers, use all samples
                    centers.append(class_samples)
                else:
                    # Randomly select centers_per_class samples
                    idx = np.random.choice(len(class_samples), centers_per_class, replace=False)
                    centers.append(class_samples[idx])
                    
            self.centers = np.vstack(centers)
            if len(self.centers) < self.n_centers:
                # If we couldn't get enough centers, fill with random samples
                remaining = self.n_centers - len(self.centers)
                idx = np.random.choice(n_samples, remaining, replace=False)
                self.centers = np.vstack([self.centers, X[idx]])
        
        # Initialize sigmas (widths) for each center
        # Start with a constant sigma for all centers
        self.sigmas = np.ones(self.n_centers) * self.sigma
        
    def _compute_rbf_outputs(self, X):
        # Compute RBF activations for each center
        # Shape: (n_samples, n_centers)
        distances = cdist(X, self.centers)
        # Apply RBF function using individual sigmas for each center
        return np.exp(-0.5 * np.square(distances) / np.square(self.sigmas.reshape(1, -1)))
    
    def fit(self, X, y):
        # Convert y to numerical if needed
        self.classes_ = np.unique(y)
        self.n_classes = len(self.classes_)
        y_numeric = np.zeros(len(y), dtype=int)
        for i, cls in enumerate(self.classes_):
            y_numeric[y == cls] = i
        
        # One-hot encode the target
        y_onehot = np.zeros((len(y), self.n_classes))
        for i in range(len(y)):
            y_onehot[i, y_numeric[i]] = 1
        
        # Initialize centers
        self._initialize_centers(X, y_numeric)
        
        # Initialize weights (n_centers x n_classes)
        self.weights = np.random.normal(0, 0.1, (self.n_centers, self.n_classes))
        
        # Training loop
        n_samples = X.shape[0]
        for epoch in range(self.epochs):
            # Shuffle the data
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X_shuffled = X[indices]
            y_onehot_shuffled = y_onehot[indices]
            
            # Mini-batch training
            for i in range(0, n_samples, self.batch_size):
                end = min(i + self.batch_size, n_samples)
                X_batch = X_shuffled[i:end]
                y_batch = y_onehot_shuffled[i:end]
                
                # Forward pass
                rbf_outputs = self._compute_rbf_outputs(X_batch)
                y_pred = np.dot(rbf_outputs, self.weights)
                
                # Compute error and deltas
                error = y_batch - y_pred
                weight_deltas = self.learning_rate * np.dot(rbf_outputs.T, error)
                
                # Update weights
                self.weights += weight_deltas
                
                # Two-phase learning: Update centers if needed
                if self.center_learning_rate > 0:
                    # Compute the contribution of each center to the error
                    center_deltas = np.zeros_like(self.centers)
                    sigma_deltas = np.zeros_like(self.sigmas)
                    
                    for j in range(self.n_centers):
                        # For each center, calculate how it affects the output error
                        for k in range(len(X_batch)):
                            # Compute the influence of this center on the output
                            rbf_value = rbf_outputs[k, j]
                            delta = np.sum(error[k] * self.weights[j])
                            
                            # Gradient for center update
                            diff = X_batch[k] - self.centers[j]
                            center_gradient = delta * rbf_value * diff / (self.sigmas[j] ** 2)
                            center_deltas[j] += center_gradient
                            
                            # Gradient for sigma update
                            dist_squared = np.sum(np.square(diff))
                            sigma_gradient = delta * rbf_value * dist_squared / (self.sigmas[j] ** 3)
                            sigma_deltas[j] += sigma_gradient
                    
                    # Update centers and sigmas
                    self.centers += self.center_learning_rate * center_deltas / len(X_batch)
                    self.sigmas += self.center_learning_rate * sigma_deltas / len(X_batch)
                    
            # Optional: Add validation to monitor progress and early stopping
        
        return self
    
    def predict_proba(self, X):
        rbf_outputs = self._compute_rbf_outputs(X)
        raw_output = np.dot(rbf_outputs, self.weights)
        
        # Apply softmax to get probabilities
        exp_scores = np.exp(raw_output - np.max(raw_output, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
    def predict(self, X):
        proba = self.predict_proba(X)
        return self.classes_[np.argmax(proba, axis=1)]


# Function to load and prepare data
def load_data(train_data_path='train_data.txt', test_data_path='test_data.txt'):
    """
    Load and preprocess the multi-font character recognition data.
    
    Returns:
        X_train: Training features
        y_train: Training labels
        X_test: Test features
        y_test: Test labels
    """
    # Manually construct the dataset as per provided format
    # For demonstration, I'll generate sample data based on the description
    # In practice, you would load from the text files
    
    # Function to parse the data from text files
    def parse_data_file(file_path):
        X = []
        y = []
        
        with open(file_path, 'r') as f:
            lines = f.readlines()
            
        i = 0
        while i < len(lines):
            # Skip non-data lines
            if len(lines[i].strip()) < 5 or "Pattern" in lines[i]:
                i += 1
                continue
                
            # Find line starting with "Inputs Outputs" or containing data
            if "Inputs Outputs" in lines[i]:
                i += 1
                continue
                
            # Try to parse the data line
            try:
                parts = lines[i].strip().split()
                if len(parts) >= 40:  # Expect 14 inputs + 26 outputs
                    inputs = [float(val) for val in parts[:14]]
                    outputs = [int(val) for val in parts[14:40]]
                    
                    X.append(inputs)
                    # Convert one-hot to class index (0-25 for A-Z)
                    y.append(np.argmax(outputs))
            except:
                pass
                
            i += 1
        
        return np.array(X), np.array(y)
    
    try:
        # Try to load from the actual files
        X_train, y_train = parse_data_file('train_data_3.txt')
        X_test, y_test = parse_data_file('test_data_3.txt')
        
        print(f"Loaded data: Train shapes X:{X_train.shape}, y:{y_train.shape}, Test shapes X:{X_test.shape}, y:{y_test.shape}")
        
    except Exception as e:
        print(f"Error loading data: {e}")
        print("Generating synthetic data for demonstration...")
        
        # Generate synthetic data if file loading fails
        # This is just for demonstration purposes
        np.random.seed(42)
        
        # 78 training samples (26 letters x 3 fonts)
        X_train = np.random.rand(78, 14) * 20
        y_train = np.repeat(np.arange(26), 3)
        
        # 78 test samples (26 letters x 3 fonts)
        X_test = np.random.rand(78, 14) * 20
        y_test = np.repeat(np.arange(26), 3)
    
    return X_train, y_train, X_test, y_test


# Main function to run experiments

def experiment_mlp(X_train, y_train, X_test, y_test):
    """
    Run different MLP experiments.
    
    Returns:
        Dictionary with MLP results
    """
    results = {}
    
    # 1. Basic MLP with one hidden layer
    print("  1.1 Testing basic MLP architecture...")
    mlp_basic = MLPClassifier(
        hidden_layer_sizes=(28,),
        activation='relu',
        solver='adam',
        alpha=0.0001,
        batch_size='auto',
        learning_rate='adaptive',
        max_iter=1000,
        random_state=42
    )
    
    mlp_basic.fit(X_train, y_train)
    y_pred = mlp_basic.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    Basic MLP accuracy: {accuracy:.4f}")
    results['basic'] = {
        'model': mlp_basic,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 2. MLP with two hidden layers
    print("  1.2 Testing two-layer MLP architecture...")
    mlp_two_layer = MLPClassifier(
        hidden_layer_sizes=(28, 14),
        activation='relu',
        solver='adam',
        alpha=0.0001,
        batch_size='auto',
        learning_rate='adaptive',
        max_iter=1000,
        random_state=42
    )
    
    mlp_two_layer.fit(X_train, y_train)
    y_pred = mlp_two_layer.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    Two-layer MLP accuracy: {accuracy:.4f}")
    results['two_layer'] = {
        'model': mlp_two_layer,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 3. MLP with regularization
    print("  1.3 Testing MLP with regularization...")
    mlp_reg = MLPClassifier(
        hidden_layer_sizes=(28, 14),
        activation='relu',
        solver='adam',
        alpha=0.01,  # L2 regularization
        batch_size='auto',
        learning_rate='adaptive',
        max_iter=1000,
        random_state=42
    )
    
    mlp_reg.fit(X_train, y_train)
    y_pred = mlp_reg.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    MLP with regularization accuracy: {accuracy:.4f}")
    results['regularized'] = {'model': mlp_reg,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 4. Find best MLP parameters using grid search
    print("  1.4 Finding optimal MLP parameters with grid search...")
    
    param_grid = {
        'hidden_layer_sizes': [(28,), (28, 14), (56,), (56, 28)],
        'activation': ['relu', 'tanh'],
        'alpha': [0.0001, 0.001, 0.01],
        'learning_rate': ['constant', 'adaptive'],
    }
    
    grid_search = GridSearchCV(
        MLPClassifier(max_iter=1000, random_state=42),
        param_grid=param_grid,
        cv=3,
        n_jobs=-1,
        verbose=0
    )
    
    grid_search.fit(X_train, y_train)
    best_mlp = grid_search.best_estimator_
    y_pred = best_mlp.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    Optimal MLP accuracy: {accuracy:.4f}")
    print(f"    Best MLP parameters: {grid_search.best_params_}")
    
    results['optimal'] = {
        'model': best_mlp,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True),
        'best_params': grid_search.best_params_
    }
    
    return results


def experiment_rbf(X_train, y_train, X_test, y_test):
    """
    Run different RBF Network experiments.
    
    Returns:
        Dictionary with RBF results
    """
    results = {}
    
    # 1. Basic RBF
    print("  2.1 Testing basic RBF Network...")
    rbf_basic = RBFNetwork(
        n_centers=52,  # 2 centers per class
        sigma=1.0,
        learning_rate=0.01,
        epochs=100,
        batch_size=10,
        initialization='random'
    )
    
    rbf_basic.fit(X_train, y_train)
    y_pred = rbf_basic.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    Basic RBF accuracy: {accuracy:.4f}")
    results['basic'] = {
        'model': rbf_basic,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 2. RBF with K-means initialization
    print("  2.2 Testing RBF with K-means initialization...")
    rbf_kmeans = RBFNetwork(
        n_centers=52,
        sigma=1.0,
        learning_rate=0.01,
        epochs=100,
        batch_size=10,
        initialization='kmeans'
    )
    
    rbf_kmeans.fit(X_train, y_train)
    y_pred = rbf_kmeans.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    RBF with K-means initialization accuracy: {accuracy:.4f}")
    results['kmeans_init'] = {
        'model': rbf_kmeans,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 3. RBF with per-class center initialization
    print("  2.3 Testing RBF with per-class initialization...")
    rbf_per_class = RBFNetwork(
        n_centers=52,  # 2 centers per class
        sigma=1.0,
        learning_rate=0.01,
        epochs=100,
        batch_size=10,
        initialization='per_class'
    )
    
    rbf_per_class.fit(X_train, y_train)
    y_pred = rbf_per_class.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    RBF with per-class initialization accuracy: {accuracy:.4f}")
    results['per_class_init'] = {
        'model': rbf_per_class,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 4. RBF with adaptive centers (two-phase learning)
    print("  2.4 Testing RBF with adaptive centers...")
    rbf_adaptive = RBFNetwork(
        n_centers=52,
        sigma=1.0,
        learning_rate=0.01,
        epochs=100,
        batch_size=10,
        initialization='kmeans',
        center_learning_rate=0.001  # Enable center adaptation
    )
    
    rbf_adaptive.fit(X_train, y_train)
    y_pred = rbf_adaptive.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    RBF with adaptive centers accuracy: {accuracy:.4f}")
    results['adaptive'] = {
        'model': rbf_adaptive,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 5. Experiment with different numbers of centers
    print("  2.5 Testing different numbers of RBF centers...")
    center_counts = [26, 52, 78]  # 1x, 2x, 3x the number of classes
    for n_centers in center_counts:
       
        rbf = RBFNetwork(
            n_centers=n_centers,
            sigma=1.0,
            learning_rate=0.01,
            epochs=100,
            batch_size=10,
            initialization='kmeans'
        )
        
        rbf.fit(X_train, y_train)
        y_pred = rbf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        
        print(f"    RBF with {n_centers} centers accuracy: {accuracy:.4f}")
        results[f'centers_{n_centers}'] = {
            'model': rbf,
            'accuracy': accuracy,
            'y_pred': y_pred,
            'report': classification_report(y_test, y_pred, output_dict=True)
        }
    
    return results


def experiment_svm(X_train, y_train, X_test, y_test):
    """
    Run different SVM experiments.
    
    Returns:
        Dictionary with SVM results
    """
    results = {}
    
    # 1. Linear SVM
    print("  3.1 Testing Linear SVM...")
    svm_linear = SVC(kernel='linear', C=1.0, random_state=42)
    
    svm_linear.fit(X_train, y_train)
    y_pred = svm_linear.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    Linear SVM accuracy: {accuracy:.4f}")
    results['linear'] = {
        'model': svm_linear,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 2. RBF SVM
    print("  3.2 Testing RBF SVM...")
    svm_rbf = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
    
    svm_rbf.fit(X_train, y_train)
    y_pred = svm_rbf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    RBF SVM accuracy: {accuracy:.4f}")
    results['rbf'] = {
        'model': svm_rbf,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 3. SVM with different C values
    print("  3.3 Testing SVM with different C values...")
    c_values = [0.1, 1.0, 10.0, 100.0]
    for c in c_values:
        svm = SVC(kernel='rbf', C=c, gamma='scale', random_state=42)
        
        svm.fit(X_train, y_train)
        y_pred = svm.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        
        print(f"    SVM with C={c} accuracy: {accuracy:.4f}")
        results[f'c_{c}'] = {
            'model': svm,
            'accuracy': accuracy,
            'y_pred': y_pred,
            'report': classification_report(y_test, y_pred, output_dict=True)
        }
    
    # 4. One-vs-Rest Strategy
    print("  3.4 Testing One-vs-Rest SVM...")
    ovr_svm = OneVsRestClassifier(SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42))
    
    ovr_svm.fit(X_train, y_train)
    y_pred = ovr_svm.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    One-vs-Rest SVM accuracy: {accuracy:.4f}")
    results['ovr'] = {
        'model': ovr_svm,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 5. One-vs-One Strategy
    print("  3.5 Testing One-vs-One SVM...")
    ovo_svm = OneVsOneClassifier(SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42))
    
    ovo_svm.fit(X_train, y_train)
    y_pred = ovo_svm.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    One-vs-One SVM accuracy: {accuracy:.4f}")
    results['ovo'] = {
        'model': ovo_svm,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True)
    }
    
    # 6. Grid search for best SVM parameters
    print("  3.6 Finding optimal SVM parameters with grid search...")
    
    param_grid = {
        'kernel': ['rbf', 'poly'],
        'C': [0.1, 1, 10, 100],
        'gamma': ['scale', 'auto', 0.1, 0.01]
    }
    
    grid_search = GridSearchCV(
        SVC(random_state=42),
        param_grid=param_grid,
        cv=3,
        n_jobs=-1,
        verbose=0
    )
    
    grid_search.fit(X_train, y_train)
    best_svm = grid_search.best_estimator_
    y_pred = best_svm.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"    Optimal SVM accuracy: {accuracy:.4f}")
    print(f"    Best SVM parameters: {grid_search.best_params_}")
    
    results['optimal'] = {
        'model': best_svm,
        'accuracy': accuracy,
        'y_pred': y_pred,
        'report': classification_report(y_test, y_pred, output_dict=True),
        'best_params': grid_search.best_params_
    }
    
    return results


def compare_models(results):
    """
    Compare all models and visualize results.
    
    Args:
        results: Dictionary with results of all experiments
    """
    # 1. Create a summary table of accuracies
    print("\nModel Accuracy Comparison:")
    print("-" * 60)
    print(f"{'Model':<30} {'Accuracy':<10}")
    print("-" * 60)
    
    accuracies = {}
    
    for model_type, model_results in results.items():
        for variant, variant_results in model_results.items():
            model_name = f"{model_type} ({variant})"
            accuracy = variant_results['accuracy']
            accuracies[model_name] = accuracy
            print(f"{model_name:<30} {accuracy:.4f}")
    
    print("-" * 60)
    
    # 2. Visualize accuracies
    plt.figure(figsize=(12, 6))
    models = list(accuracies.keys())
    accs = list(accuracies.values())
    
    # Sort by accuracy
    sorted_indices = np.argsort(accs)[::-1]  # Descending order
    sorted_models = [models[i] for i in sorted_indices]
    sorted_accs = [accs[i] for i in sorted_indices]
    
    plt.bar(sorted_models, sorted_accs, color='skyblue')
    plt.xlabel('Model')
    plt.ylabel('Accuracy')
    plt.title('Model Accuracy Comparison')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.savefig('model_comparison.png')
    plt.close()
    
    # 3. Find the best model overall
    best_model_name = sorted_models[0]
    best_accuracy = sorted_accs[0]
    print(f"\nBest Model: {best_model_name} with accuracy {best_accuracy:.4f}")
    
    # 4. Visualize confusion matrix for best model
    model_type, variant = best_model_name.split(' (')
    variant = variant.rstrip(')')
    
    best_results = results[model_type][variant]
    y_true = results[model_type][variant]['y_pred']  # Assuming same test set for all
    y_pred = best_results['y_pred']
    
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title(f'Confusion Matrix - {best_model_name}')
    plt.tight_layout()
    plt.savefig('best_model_confusion_matrix.png')
    plt.close()
    
    # 5. Visualize classification report for best model
    report = best_results['report']
    
    # Extract metrics per class
    class_precision = [report[str(i)]['precision'] for i in range(26)]
    class_recall = [report[str(i)]['recall'] for i in range(26)]
    # Continuing from line 741
    class_f1 = [report[str(i)]['f1-score'] for i in range(26)]
    
    # Create dataframe for visualization
    class_labels = [chr(65 + i) for i in range(26)]  # A-Z
    metrics_df = pd.DataFrame({
        'Class': class_labels,
        'Precision': class_precision,
        'Recall': class_recall,
        'F1-Score': class_f1
    })
    
    # Plot metrics
    plt.figure(figsize=(12, 6))
    
    metrics_df.plot(x='Class', y=['Precision', 'Recall', 'F1-Score'], kind='bar', figsize=(12, 6))
    plt.title(f'Classification Metrics by Class - {best_model_name}')
    plt.xlabel('Character Class')
    plt.ylabel('Score')
    plt.ylim(0, 1.0)
    plt.legend(loc='lower right')
    plt.tight_layout()
    plt.savefig('best_model_class_metrics.png')
    plt.close()
    
    # 6. Compare best models of each type
    best_models = {}
    for model_type, model_results in results.items():
        best_variant = max(model_results.items(), key=lambda x: x[1]['accuracy'])
        best_models[model_type] = best_variant[1]
    
    print("\nBest Model of Each Type:")
    print("-" * 60)
    print(f"{'Model Type':<15} {'Best Variant':<15} {'Accuracy':<10}")
    print("-" * 60)
    
    for model_type, model_results in best_models.items():
        variant = next((k for k, v in results[model_type].items() if v == model_results), "")
        print(f"{model_type:<15} {variant:<15} {model_results['accuracy']:.4f}")
    
    print("-" * 60)
    
    # 7. Create an ensemble of the best models
    print("\n5. Creating an ensemble of the best models...")
    
    # Get best model instances
    best_mlp = best_models['MLP']['model']
    best_rbf = best_models['RBF']['model']
    best_svm = best_models['SVM']['model']
    
    # Create a voting classifier
    ensemble = VotingClassifier(
        estimators=[
            ('mlp', best_mlp),
            ('rbf', best_rbf),
            ('svm', best_svm)
        ],
        voting='hard'
    )
    
    # Need to refit since we already fit the individual models
    ensemble.fit(X_train, y_train)
    y_pred_ensemble = ensemble.predict(X_test)
    accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)
    
    print(f"Ensemble model accuracy: {accuracy_ensemble:.4f}")
    
    # Compare with best individual model
    if accuracy_ensemble > best_accuracy:
        print(f"Ensemble outperforms best individual model by {accuracy_ensemble - best_accuracy:.4f}")
    else:
        print(f"Best individual model outperforms ensemble by {best_accuracy - accuracy_ensemble:.4f}")
    
    # 8. Additional visualizations
    
    # Visualize learning curves for best MLP
    if 'MLP' in best_models:
        plt.figure(figsize=(10, 6))
        plt.plot(best_mlp.loss_curve_)
        plt.title('MLP Learning Curve')
        plt.xlabel('Iterations')
        plt.ylabel('Loss')
        plt.grid(True)
        plt.savefig('mlp_learning_curve.png')
        plt.close()
    
    # Visualize decision boundaries (only if 2D features available or using PCA)
    # Not implemented here since we have 14-dimensional data
    
    return {
        'accuracies': accuracies,
        'best_model_name': best_model_name,
        'best_accuracy': best_accuracy,
        'best_models': best_models,
        'ensemble_accuracy': accuracy_ensemble
    }

In [5]:
"""
Main function to run the entire experiment pipeline.
"""
print("Multi-Font Character Recognition Experiment")
print("===========================================")

# 1. Load and prepare data
print("\nLoading data...")
X_train, y_train, X_test, y_test = load_data()

Multi-Font Character Recognition Experiment

Loading data...
Loaded data: Train shapes X:(78, 14), y:(78,), Test shapes X:(78, 14), y:(78,)


In [6]:
# Convert class indices to letters for better readability
class_names = [chr(65 + i) for i in range(26)]  # A-Z

# 2. Display data information
print(f"\nTraining set: {X_train.shape[0]} samples, {X_train.shape[1]} features")
print(f"Test set: {X_test.shape[0]} samples, {X_test.shape[1]} features")
print(f"Number of classes: {len(np.unique(y_train))}")

# 3. Explore data
print("\nExploring data...")

# Class distribution
train_class_counts = np.bincount(y_train, minlength=26)
test_class_counts = np.bincount(y_test, minlength=26)

# Visualize class distribution
plt.figure(figsize=(12, 6))
bar_width = 0.35
x = np.arange(26)

plt.bar(x - bar_width/2, train_class_counts, bar_width, label='Train')
plt.bar(x + bar_width/2, test_class_counts, bar_width, label='Test')
plt.xlabel('Character Class')
plt.ylabel('Number of Samples')
plt.title('Class Distribution in Training and Test Sets')
plt.xticks(x, class_names)
plt.legend()
plt.tight_layout()
plt.savefig('class_distribution.png')
plt.close()


Training set: 78 samples, 14 features
Test set: 78 samples, 14 features
Number of classes: 26

Exploring data...


In [7]:
"""
Run experiments with MLP, RBF, and SVM models.

Args:
    X_train: Training features
    y_train: Training labels
    X_test: Test features
    y_test: Test labels

Returns:
    Dictionary with results of all experiments
"""
results = {}

print("Starting experiments with MLP, RBF, and SVM for multi-font character recognition...")

# Create copies of the data for potential different preprocessing
X_train_scaled = StandardScaler().fit_transform(X_train)
X_test_scaled = StandardScaler().fit_transform(X_test)

Starting experiments with MLP, RBF, and SVM for multi-font character recognition...


In [8]:
# 1. MLP Experiments
print("\n1. Running MLP experiments...")
mlp_results = experiment_mlp(X_train_scaled, y_train, X_test_scaled, y_test)
results['MLP'] = mlp_results


1. Running MLP experiments...
  1.1 Testing basic MLP architecture...
    Basic MLP accuracy: 0.8462
  1.2 Testing two-layer MLP architecture...
    Two-layer MLP accuracy: 0.8590
  1.3 Testing MLP with regularization...
    MLP with regularization accuracy: 0.8590
  1.4 Finding optimal MLP parameters with grid search...
    Optimal MLP accuracy: 0.8846
    Best MLP parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (56,), 'learning_rate': 'constant'}


In [9]:
# 2. RBF Experiments
print("\n2. Running RBF Network experiments...")
rbf_results = experiment_rbf(X_train_scaled, y_train, X_test_scaled, y_test)
results['RBF'] = rbf_results


2. Running RBF Network experiments...
  2.1 Testing basic RBF Network...
    Basic RBF accuracy: 0.8846
  2.2 Testing RBF with K-means initialization...


[WinError 2] The system cannot find the file specified
  File "C:\Users\NM TRADERS\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\NM TRADERS\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\NM TRADERS\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\NM TRADERS\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


    RBF with K-means initialization accuracy: 0.8718
  2.3 Testing RBF with per-class initialization...
    RBF with per-class initialization accuracy: 0.8462
  2.4 Testing RBF with adaptive centers...




    RBF with adaptive centers accuracy: 0.8718
  2.5 Testing different numbers of RBF centers...




    RBF with 26 centers accuracy: 0.7564


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


    RBF with 52 centers accuracy: 0.8718


  return fit_method(estimator, *args, **kwargs)


    RBF with 78 centers accuracy: 0.8718


In [10]:
# 3. SVM Experiments
print("\n3. Running SVM experiments...")
svm_results = experiment_svm(X_train_scaled, y_train, X_test_scaled, y_test)
results['SVM'] = svm_results


3. Running SVM experiments...
  3.1 Testing Linear SVM...
    Linear SVM accuracy: 0.8718
  3.2 Testing RBF SVM...
    RBF SVM accuracy: 0.8333
  3.3 Testing SVM with different C values...
    SVM with C=0.1 accuracy: 0.8077
    SVM with C=1.0 accuracy: 0.8333
    SVM with C=10.0 accuracy: 0.8590
    SVM with C=100.0 accuracy: 0.8718
  3.4 Testing One-vs-Rest SVM...
    One-vs-Rest SVM accuracy: 0.8462
  3.5 Testing One-vs-One SVM...
    One-vs-One SVM accuracy: 0.8333
  3.6 Finding optimal SVM parameters with grid search...
    Optimal SVM accuracy: 0.8590
    Best SVM parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [19]:
# 4. Compare all models
print("\n4. Comparing all models...")
compare_models(results)


4. Comparing all models...

Model Accuracy Comparison:
------------------------------------------------------------
Model                          Accuracy  
------------------------------------------------------------
MLP (basic)                    0.8462
MLP (two_layer)                0.8590
MLP (regularized)              0.8590
MLP (optimal)                  0.8846
RBF (basic)                    0.8846
RBF (kmeans_init)              0.8718
RBF (per_class_init)           0.8462
RBF (adaptive)                 0.8718
RBF (centers_26)               0.7564
RBF (centers_52)               0.8718
RBF (centers_78)               0.8718
SVM (linear)                   0.8718
SVM (rbf)                      0.8333
SVM (c_0.1)                    0.8077
SVM (c_1.0)                    0.8333
SVM (c_10.0)                   0.8590
SVM (c_100.0)                  0.8718
SVM (ovr)                      0.8462
SVM (ovo)                      0.8333
SVM (optimal)                  0.8590
--------------------

{'accuracies': {'MLP (basic)': 0.8461538461538461,
  'MLP (two_layer)': 0.8589743589743589,
  'MLP (regularized)': 0.8589743589743589,
  'MLP (optimal)': 0.8846153846153846,
  'RBF (basic)': 0.8846153846153846,
  'RBF (kmeans_init)': 0.8717948717948718,
  'RBF (per_class_init)': 0.8461538461538461,
  'RBF (adaptive)': 0.8717948717948718,
  'RBF (centers_26)': 0.7564102564102564,
  'RBF (centers_52)': 0.8717948717948718,
  'RBF (centers_78)': 0.8717948717948718,
  'SVM (linear)': 0.8717948717948718,
  'SVM (rbf)': 0.8333333333333334,
  'SVM (c_0.1)': 0.8076923076923077,
  'SVM (c_1.0)': 0.8333333333333334,
  'SVM (c_10.0)': 0.8589743589743589,
  'SVM (c_100.0)': 0.8717948717948718,
  'SVM (ovr)': 0.8461538461538461,
  'SVM (ovo)': 0.8333333333333334,
  'SVM (optimal)': 0.8589743589743589},
 'best_model_name': 'MLP (optimal)',
 'best_accuracy': 0.8846153846153846,
 'best_models': {'MLP': {'model': MLPClassifier(hidden_layer_sizes=(56,), max_iter=1000, random_state=42),
   'accuracy': 0.8

<Figure size 1200x600 with 0 Axes>