<a href="https://colab.research.google.com/github/abar-1/SDR-ML-Project/blob/decisionTree/manus_decision_tree_gaussian_impulse_combo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Manus Impulse and Gaussian Decision Tree - QAM-16 Constellation Classifier

This program implements a decision tree classifier for QAM-16 constellation
classification with multiclass output from 0 to 15. It handles complex number
inputs with both Gaussian and impulse noise by extracting appropriate features.
"""

import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import tree
import seaborn as sns

# Set random seed for reproducibility
np.random.seed(42)

def generate_qam16_constellation():
    """
    Generate QAM-16 constellation points

    Returns:
        constellation: Array of 16 complex constellation points
        mapping: Dictionary mapping constellation points to labels (0-15)
    """
    # 16-QAM constellation points arranged in a 4x4 grid
    constellation = np.array([
        -3-3j, -3-1j, -3+1j, -3+3j,
        -1-3j, -1-1j, -1+1j, -1+3j,
        1-3j,  1-1j,  1+1j,  1+3j,
        3-3j,  3-1j,  3+1j,  3+3j
    ]) / np.sqrt(10)  # Normalize energy

    # Create mapping from constellation points to labels
    mapping = {complex(point): i for i, point in enumerate(constellation)}

    return constellation, mapping

def add_gaussian_noise(signal, snr_db):
    """
    Add Gaussian noise to complex signal based on SNR

    Args:
        signal: Complex signal array
        snr_db: Signal-to-noise ratio in dB

    Returns:
        Signal with Gaussian noise
    """
    # Calculate signal power
    signal_power = np.mean(np.abs(signal)**2)

    # Calculate noise power based on SNR
    snr_linear = 10**(snr_db/10)
    noise_power = signal_power / snr_linear

    # Generate complex Gaussian noise
    noise = np.sqrt(noise_power/2) * (np.random.randn(*signal.shape) +
                                     1j * np.random.randn(*signal.shape))

    # Add noise to signal
    return signal + noise
def add_impulse_noise(signal, impulse_prob=0.05, impulse_amplitude=5.0):
    """
    Add impulse noise to complex signal

    Args:
        signal: Complex signal array
        impulse_prob: Probability of impulse occurrence (0-1)
        impulse_amplitude: Relative amplitude of impulses

    Returns:
        Signal with impulse noise
    """
    # Create a mask for impulse locations
    impulse_mask = np.random.random(signal.shape) < impulse_prob

    # Generate complex impulses with random phases
    impulse_phases = np.random.uniform(0, 2*np.pi, size=np.sum(impulse_mask))
    impulses = impulse_amplitude * np.exp(1j * impulse_phases)

    # Create noisy signal (copy to avoid modifying original)
    noisy_signal = signal.copy()

    # Apply impulses to the masked positions
    noisy_signal.flat[np.flatnonzero(impulse_mask)] = impulses

    return noisy_signal

def add_combined_noise(signal, snr_db, impulse_prob, impulse_amplitude):
    """
    Add both Gaussian and impulse noise to complex signal

    Args:
        signal: Complex signal array
        snr_db: Signal-to-noise ratio in dB for Gaussian noise
        impulse_prob: Probability of impulse occurrence (0-1)
        impulse_amplitude: Relative amplitude of impulses

    Returns:
        Signal with both Gaussian and impulse noise
    """
    # First add Gaussian noise
    noisy_signal = add_gaussian_noise(signal, snr_db)

    # Then add impulse noise
    noisy_signal = add_impulse_noise(noisy_signal, impulse_prob, impulse_amplitude)

    return noisy_signal

def generate_dataset(num_samples=10000, noise_types=None):
    """
    Generate QAM-16 dataset with configurable noise types

    Args:
        num_samples: Number of samples to generate
        noise_types: Dictionary with noise configuration for each type
                    If None, generates all noise types with default settings

    Returns:
        datasets: Dictionary of datasets for each noise type
        y: Labels (0-15)
    """
    # Default noise configurations if none provided
    if noise_types is None:
        noise_types = {
            "Clean": {"gaussian": False, "impulse": False},
            "Gaussian": {"gaussian": True, "impulse": False, "snr_db_range": (10, 30)},
            "Impulse": {"gaussian": False, "impulse": True, "impulse_prob_range": (0.01, 0.1), "impulse_amplitude_range": (2.0, 5.0)},
            "Combined": {"gaussian": True, "impulse": True, "snr_db_range": (10, 30), "impulse_prob_range": (0.01, 0.1), "impulse_amplitude_range": (2.0, 5.0)}
        }

    # Generate constellation
    constellation, mapping = generate_qam16_constellation()

    # Generate random indices for constellation points
    indices = np.random.randint(0, 16, size=num_samples)
    y = indices.copy()

    # Initialize dictionary to store datasets
    datasets = {}

    # Generate clean signals (no noise)
    X_clean = np.zeros(num_samples, dtype=complex)
    for i in range(num_samples):
        X_clean[i] = constellation[indices[i]]

    # Generate datasets for each noise type
    for noise_name, config in noise_types.items():
        X_noisy = X_clean.copy()

        if noise_name == "Clean":
            # No noise to add
            pass

        elif noise_name == "Gaussian" and config["gaussian"]:
            # Generate random SNR values
            snr_range = config["snr_db_range"]
            snr_values = np.random.uniform(snr_range[0], snr_range[1], size=num_samples)

            # Add Gaussian noise
            for i in range(num_samples):
                X_noisy[i] = add_gaussian_noise(np.array([X_clean[i]]), snr_values[i])[0]

        elif noise_name == "Impulse" and config["impulse"]:
            # Generate random impulse noise parameters
            prob_range = config["impulse_prob_range"]
            amp_range = config["impulse_amplitude_range"]
            probs = np.random.uniform(prob_range[0], prob_range[1], size=num_samples)
            amps = np.random.uniform(amp_range[0], amp_range[1], size=num_samples)

            # Add impulse noise
            for i in range(num_samples):
                X_noisy[i] = add_impulse_noise(np.array([X_clean[i]]), probs[i], amps[i])[0]

        elif noise_name == "Combined" and config["gaussian"] and config["impulse"]:
            # Generate random parameters for both noise types
            snr_range = config["snr_db_range"]
            prob_range = config["impulse_prob_range"]
            amp_range = config["impulse_amplitude_range"]

            snr_values = np.random.uniform(snr_range[0], snr_range[1], size=num_samples)
            probs = np.random.uniform(prob_range[0], prob_range[1], size=num_samples)
            amps = np.random.uniform(amp_range[0], amp_range[1], size=num_samples)

            # Add combined noise
            for i in range(num_samples):
                X_noisy[i] = add_combined_noise(
                    np.array([X_clean[i]]),
                    snr_values[i],
                    probs[i],
                    amps[i]
                )[0]

        # Extract features for this noise type
        X_features = extract_features(X_noisy)

        # Store in datasets dictionary
        datasets[noise_name] = {
            "complex": X_noisy,
            "features": X_features
        }

    return datasets, y
def extract_features(X_complex):
    """
    Extract features from complex inputs for decision tree

    Extracts features that are robust to both Gaussian and impulse noise.

    Args:
        X_complex: Array of complex inputs

    Returns:
        X_features: Array of extracted features
    """
    # Extract real part
    real_part = X_complex.real

    # Extract imaginary part
    imag_part = X_complex.imag

    # Calculate magnitude
    magnitude = np.abs(X_complex)

    # Calculate phase (angle)
    phase = np.angle(X_complex)

    # Calculate additional features that help with noise robustness
    # Log magnitude can help reduce the impact of extreme values
    log_magnitude = np.log1p(magnitude)  # log(1+x) to handle zeros

    # Rank-based features (percentile of value within the batch)
    # These are more robust to outliers caused by impulse noise
    real_rank = np.argsort(np.argsort(real_part)) / len(real_part)
    imag_rank = np.argsort(np.argsort(imag_part)) / len(imag_part)

    # Calculate quadrant (helps with classification boundaries)
    quadrant = np.zeros_like(real_part, dtype=int)
    quadrant[(real_part >= 0) & (imag_part >= 0)] = 0  # First quadrant
    quadrant[(real_part < 0) & (imag_part >= 0)] = 1   # Second quadrant
    quadrant[(real_part < 0) & (imag_part < 0)] = 2    # Third quadrant
    quadrant[(real_part >= 0) & (imag_part < 0)] = 3   # Fourth quadrant

    # Combine features
    X_features = np.column_stack((
        real_part, imag_part, magnitude, phase,
        log_magnitude, real_rank, imag_rank, quadrant
    ))

    return X_features

def visualize_constellation(X_complex, y, title="QAM-16 Constellation"):
    """
    Visualize the QAM-16 constellation points

    Args:
        X_complex: Complex input samples
        y: Labels (0-15)
        title: Plot title
    """
    plt.figure(figsize=(10, 8))

    # Scatter plot of the complex signals
    for i in range(16):
        mask = y == i
        plt.scatter(X_complex[mask].real, X_complex[mask].imag, label=f'{i}', alpha=0.6)

    plt.grid(True)
    plt.xlabel('Real Part')
    plt.ylabel('Imaginary Part')
    plt.title(title)
    plt.legend()
    plt.savefig(f'qam16_constellation_{title.replace(" ", "_").lower()}.png')
    plt.close()

def visualize_decision_tree(clf, feature_names, class_names, title="Decision Tree"):
    """
    Visualize the decision tree

    Args:
        clf: Trained decision tree classifier
        feature_names: Names of features
        class_names: Names of classes
        title: Plot title
    """
    plt.figure(figsize=(20, 10))
    tree.plot_tree(clf,
                   feature_names=feature_names,
                   class_names=class_names,
                   filled=True,
                   rounded=True,
                   fontsize=10)
    plt.title(title)
    plt.savefig(f'qam16_decision_tree_{title.replace(" ", "_").lower()}.png')
    plt.close()

def plot_confusion_matrix(y_true, y_pred, title="Confusion Matrix"):
    """
    Plot confusion matrix

    Args:
        y_true: True labels
        y_pred: Predicted labels
        title: Plot title
    """
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig(f'qam16_confusion_matrix_{title.replace(" ", "_").lower()}.png')
    plt.close()

    return cm
def compare_noise_types(datasets, y, test_size=0.2):
    """
    Compare model performance across different noise types

    Args:
        datasets: Dictionary of datasets for each noise type
        y: Labels (0-15)
        test_size: Proportion of data to use for testing

    Returns:
        results: Dictionary of accuracy scores for different noise types
    """
    # Initialize results dictionary
    results = {
        "train_on": {},
        "cross_performance": {}
    }

    # Train a model on each noise type
    for train_noise in datasets.keys():
        print(f"\nTraining on {train_noise} noise...")

        # Get features for this noise type
        X_features = datasets[train_noise]["features"]

        # Split dataset
        X_train, X_test, y_train, y_test = train_test_split(
            X_features, y, test_size=test_size, random_state=42
        )

        # Create and train decision tree
        clf = DecisionTreeClassifier(
            max_depth=12,
            min_samples_split=8,
            min_samples_leaf=4,
            random_state=42
        )
        clf.fit(X_train, y_train)

        # Store model in results
        results["train_on"][train_noise] = {
            "model": clf,
            "test_data": (X_test, y_test)
        }

        # Test on all noise types
        noise_accuracies = {}
        for test_noise in datasets.keys():
            # Get features for test noise type
            X_test_noise = datasets[test_noise]["features"]

            # Split to use same test indices
            _, X_test_noise, _, y_test_noise = train_test_split(
                X_test_noise, y, test_size=test_size, random_state=42
            )

            # Predict
            y_pred = clf.predict(X_test_noise)

            # Calculate accuracy
            accuracy = accuracy_score(y_test_noise, y_pred)
            noise_accuracies[test_noise] = accuracy * 100

            print(f"  Tested on {test_noise} noise: {accuracy * 100:.2f}%")

        # Store accuracies
        results["cross_performance"][train_noise] = noise_accuracies

    # Plot cross-performance matrix
    plot_cross_performance(results["cross_performance"])

    return results

def plot_cross_performance(cross_performance):
    """
    Plot matrix of model performance across different noise types

    Args:
        cross_performance: Dictionary of accuracy scores for different noise types
    """
    # Get noise types
    noise_types = list(cross_performance.keys())

    # Create matrix of results
    matrix = np.zeros((len(noise_types), len(noise_types)))
    for i, train_noise in enumerate(noise_types):
        for j, test_noise in enumerate(noise_types):
            matrix[i, j] = cross_performance[train_noise][test_noise]

    # Plot matrix
    plt.figure(figsize=(12, 10))
    sns.heatmap(matrix, annot=True, fmt='.1f', cmap='YlGn',
                xticklabels=noise_types, yticklabels=noise_types)
    plt.title('Cross-Performance Matrix (Accuracy %)')
    plt.xlabel('Testing Noise Type')
    plt.ylabel('Training Noise Type')
    plt.savefig('qam16_cross_performance_matrix.png')
    plt.close()
def compare_classifiers(datasets, y, test_size=0.2):
    """
    Compare different classifier types on combined noise

    Args:
        datasets: Dictionary of datasets for each noise type
        y: Labels (0-15)
        test_size: Proportion of data to use for testing

    Returns:
        results: Dictionary of accuracy scores for different classifiers
    """
    # Get combined noise dataset
    X_features = datasets["Combined"]["features"]

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(
        X_features, y, test_size=test_size, random_state=42
    )

    # Define classifiers to compare
    classifiers = {
        "Decision Tree (max_depth=8)": DecisionTreeClassifier(
            max_depth=8, random_state=42
        ),
        "Decision Tree (max_depth=12)": DecisionTreeClassifier(
            max_depth=12, random_state=42
        ),
        "Decision Tree (max_depth=16)": DecisionTreeClassifier(
            max_depth=16, random_state=42
        ),
        "Random Forest (10 trees)": RandomForestClassifier(
            n_estimators=10, max_depth=12, random_state=42
        ),
        "Random Forest (50 trees)": RandomForestClassifier(
            n_estimators=50, max_depth=12, random_state=42
        )
    }

    # Train and evaluate each classifier
    results = {}
    for name, clf in classifiers.items():
        print(f"\nTraining {name}...")

        # Train classifier
        clf.fit(X_train, y_train)

        # Evaluate on test set
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        results[name] = accuracy * 100

        print(f"  Test accuracy: {accuracy * 100:.2f}%")

        # Plot confusion matrix for this classifier
        plot_confusion_matrix(y_test, y_pred, title=f"Confusion Matrix - {name}")

        # If it's a decision tree, visualize it
        if "Decision Tree" in name:
            feature_names = [
                'Real', 'Imaginary', 'Magnitude', 'Phase',
                'Log Magnitude', 'Real Rank', 'Imag Rank', 'Quadrant'
            ]
            class_names = [str(i) for i in range(16)]
            visualize_decision_tree(clf, feature_names, class_names, title=name)

    # Plot comparison of classifiers
    plt.figure(figsize=(12, 6))
    names = list(results.keys())
    accuracies = [results[name] for name in names]
    plt.bar(range(len(names)), accuracies)
    plt.xticks(range(len(names)), names, rotation=45, ha='right')
    plt.ylabel('Accuracy (%)')
    plt.title('Classifier Performance Comparison')
    plt.ylim(0, 100)
    plt.tight_layout()
    for i, v in enumerate(accuracies):
        plt.text(i, v + 1, f"{v:.1f}%", ha='center')
    plt.savefig('qam16_classifier_comparison.png')
    plt.close()

    return results
def evaluate_feature_importance(clf, feature_names):
    """
    Evaluate and visualize feature importance

    Args:
        clf: Trained classifier with feature_importances_ attribute
        feature_names: Names of features
    """
    # Get feature importances
    importances = clf.feature_importances_

    # Sort features by importance
    indices = np.argsort(importances)[::-1]

    # Print feature ranking
    print("\nFeature ranking:")
    for i, idx in enumerate(indices):
        print(f"{i+1}. {feature_names[idx]} ({importances[idx]:.4f})")

    # Plot feature importances
    plt.figure(figsize=(12, 6))
    plt.bar(range(len(feature_names)), importances[indices])
    plt.xticks(range(len(feature_names)), [feature_names[i] for i in indices], rotation=45, ha='right')
    plt.xlabel('Features')
    plt.ylabel('Importance')
    plt.title('Feature Importance')
    plt.tight_layout()
    plt.savefig('qam16_feature_importance.png')
    plt.close()

def main():
    """Main function to run the QAM-16 decision tree classifier with combined noise"""
    print("=== Manus Impulse and Gaussian Decision Tree - QAM-16 Classifier ===")

    # Generate datasets with different noise types
    print("\nGenerating QAM-16 datasets with different noise types...")
    datasets, y = generate_dataset(num_samples=20000)

    # Visualize constellations for each noise type
    print("Visualizing QAM-16 constellations...")
    for noise_name, dataset in datasets.items():
        # Visualize a subset of points
        visualize_constellation(
            dataset["complex"][:2000],
            y[:2000],
            title=f"QAM-16 Constellation with {noise_name} Noise"
        )

    # Compare performance across noise types
    print("\nComparing performance across noise types...")
    results = compare_noise_types(datasets, y)

    # Compare different classifier types on combined noise
    print("\nComparing different classifier types on combined noise...")
    classifier_results = compare_classifiers(datasets, y)

    # Evaluate feature importance for the best classifier
    print("\nEvaluating feature importance...")
    best_clf = results["train_on"]["Combined"]["model"]
    feature_names = [
        'Real', 'Imaginary', 'Magnitude', 'Phase',
        'Log Magnitude', 'Real Rank', 'Imag Rank', 'Quadrant'
    ]
    evaluate_feature_importance(best_clf, feature_names)

    print("\nResults saved as PNG files:")
    print("- QAM-16 constellation visualizations for each noise type")
    print("- Decision tree visualizations")
    print("- Confusion matrices")
    print("- Cross-performance matrix")
    print("- Classifier comparison")
    print("- Feature importance")

if __name__ == "__main__":
    main()


=== Manus Impulse and Gaussian Decision Tree - QAM-16 Classifier ===

Generating QAM-16 datasets with different noise types...
Visualizing QAM-16 constellations...

Comparing performance across noise types...

Training on Clean noise...
  Tested on Clean noise: 99.98%
  Tested on Gaussian noise: 96.05%
  Tested on Impulse noise: 93.77%
  Tested on Combined noise: 89.05%

Training on Gaussian noise...
  Tested on Clean noise: 98.85%
  Tested on Gaussian noise: 97.32%
  Tested on Impulse noise: 93.08%
  Tested on Combined noise: 90.50%

Training on Impulse noise...
  Tested on Clean noise: 99.05%
  Tested on Gaussian noise: 75.78%
  Tested on Impulse noise: 94.97%
  Tested on Combined noise: 72.78%

Training on Combined noise...
  Tested on Clean noise: 98.55%
  Tested on Gaussian noise: 96.33%
  Tested on Impulse noise: 94.45%
  Tested on Combined noise: 91.30%

Comparing different classifier types on combined noise...

Training Decision Tree (max_depth=8)...
  Test accuracy: 91.72%

Tr