Manus Impulse and Gaussian Decision Tree - QAM-16 Constellation Classifier

This program implements a decision tree classifier for QAM-16 constellation
classification with multiclass output from 0 to 15. It handles complex number
inputs with both Gaussian and impulse noise by extracting appropriate features.

In [25]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import tree
import seaborn as sns
from typing import Tuple, Dict, Any

In [26]:
# Set random seed for reproducibility
np.random.seed(42)

Creates the 16 complex numbers that represent the ideal QAM-16 constellation points. These points are arranged in a 4x4 grid in the complex plane.
Normalizes the energy of the constellation points.
Returns the array of constellation points and a dictionary that maps each complex constellation point to its corresponding integer label (0 to 15)

In [27]:
# --- Data Generation ---
def generate_qam16_constellation() -> Tuple[np.ndarray, Dict[complex, int]]:
    """Generates the 16-QAM constellation and its mapping."""
    constellation_points = np.array([
        -3-3j, -3-1j, -3+1j, -3+3j,
        -1-3j, -1-1j, -1+1j, -1+3j,
        1-3j,  1-1j,  1+1j,  1+3j,
        3-3j,  3-1j,  3+1j,  3+3j
    ]) / np.sqrt(10)  # Normalize energy
    mapping = {complex(point): i for i, point in enumerate(constellation_points)}
    return constellation_points, mapping

# Example usage (moved outside the function definition):
constellation, mapping = generate_qam16_constellation()
print("Constellation Points:")
print(constellation)
print("\nMapping (Complex Point -> Index):")
print(mapping)

Constellation Points:
[-0.9486833 -0.9486833j  -0.9486833 -0.31622777j -0.9486833 +0.31622777j
 -0.9486833 +0.9486833j  -0.31622777-0.9486833j  -0.31622777-0.31622777j
 -0.31622777+0.31622777j -0.31622777+0.9486833j   0.31622777-0.9486833j
  0.31622777-0.31622777j  0.31622777+0.31622777j  0.31622777+0.9486833j
  0.9486833 -0.9486833j   0.9486833 -0.31622777j  0.9486833 +0.31622777j
  0.9486833 +0.9486833j ]

Mapping (Complex Point -> Index):
{(-0.9486832980505138-0.9486832980505138j): 0, (-0.9486832980505138-0.31622776601683794j): 1, (-0.9486832980505138+0.31622776601683794j): 2, (-0.9486832980505138+0.9486832980505138j): 3, (-0.31622776601683794-0.9486832980505138j): 4, (-0.31622776601683794-0.31622776601683794j): 5, (-0.31622776601683794+0.31622776601683794j): 6, (-0.31622776601683794+0.9486832980505138j): 7, (0.31622776601683794-0.9486832980505138j): 8, (0.31622776601683794-0.31622776601683794j): 9, (0.31622776601683794+0.31622776601683794j): 10, (0.31622776601683794+0.9486832980505

add_gaussian_noise(): Adds complex Gaussian noise to a given signal. The amount of noise is controlled by the Signal-to-Noise Ratio (SNR) in dB.

In [28]:
def add_gaussian_noise(signal: np.ndarray, snr_db: float) -> np.ndarray:
    """Adds Gaussian noise to a complex signal based on SNR."""
    signal_power = np.mean(np.abs(signal)**2)
    snr_linear = 10**(snr_db/10)
    noise_power = signal_power / snr_linear
    noise_std = np.sqrt(noise_power/2)
    noise = noise_std * (np.random.randn(*signal.shape) + 1j * np.random.randn(*signal.shape))
    return signal + noise

# Example usage:
# Create a sample signal (replace with your actual signal)
sample_signal = np.array([1+1j, 2-1j, -0.5+0.8j, -1-2j])
snr_db_value = 20  # Example SNR in dB

noisy_signal = add_gaussian_noise(sample_signal, snr_db_value)
print("Original Signal:")
print(sample_signal)
print("\nNoisy Signal (with SNR = {} dB):".format(snr_db_value))
print(noisy_signal)

Original Signal:
[ 1. +1.j   2. -1.j  -0.5+0.8j -1. -2.j ]

Noisy Signal (with SNR = 20 dB):
[ 1.06305042+0.97027774j  1.98244942-1.02972018j -0.41778564+1.00045741j
 -0.80667417-1.90258566j]


add_impulse_noise(): Adds impulse noise to a signal. Impulse noise introduces sudden, high-amplitude disturbances with a specified probability and amplitude.

In [29]:
def add_impulse_noise(signal: np.ndarray, impulse_prob: float = 0.05, impulse_amplitude: float = 5.0) -> np.ndarray:

    # Create a mask for impulse locations
    impulse_mask = np.random.random(signal.shape) < impulse_prob

    # Generate complex impulses with random phases
    impulse_phases = np.random.uniform(0, 2*np.pi, size=np.sum(impulse_mask))
    impulses = impulse_amplitude * np.exp(1j * impulse_phases)

    # Create noisy signal (copy to avoid modifying original)
    noisy_signal = signal.copy()

    # Apply impulses to the masked positions
    noisy_signal.flat[np.flatnonzero(impulse_mask)] = impulses

    return noisy_signal

# Example usage:
# Create a sample complex signal
sample_signal = np.array([1+0j, 0.5+0.5j, -1-1j, 0-2j, 2+1.5j])

# Add impulse noise
noisy_signal = add_impulse_noise(sample_signal, impulse_prob=0.1, impulse_amplitude=3.0)

print("Original Signal:")
print(sample_signal)
print("\nNoisy Signal with Impulse Noise:")
print(noisy_signal)

Original Signal:
[ 1. +0.j   0.5+0.5j -1. -1.j   0. -2.j   2. +1.5j]

Noisy Signal with Impulse Noise:
[ 1.21898415+2.7411818j  0.5       +0.5j       -1.        -1.j
  0.        -2.j         2.        +1.5j      ]


add_impulse_noise(): Adds impulse noise to a signal. Impulse noise introduces sudden, high-amplitude disturbances with a specified probability and amplitude.

In [30]:
def add_combined_noise(signal: np.ndarray, snr_db: float, impulse_prob: float, impulse_amplitude: float) -> np.ndarray:
    """Adds both Gaussian and impulse noise to a complex signal."""
    noisy_signal = add_gaussian_noise(signal, snr_db)
    noisy_signal = add_impulse_noise(noisy_signal, impulse_prob, impulse_amplitude)
    return noisy_signal

print("add_combined_noise:")
print(noisy_signal)

add_combined_noise:
[ 1.21898415+2.7411818j  0.5       +0.5j       -1.        -1.j
  0.        -2.j         2.        +1.5j      ]


generate_dataset():

Creates a dataset of QAM-16 symbols with different types of noise.
num_samples: Specifies the total number of data points to generate.
noise_types: A dictionary that configures which noise types to generate and their parameters (SNR range for Gaussian, probability and amplitude range for impulse). If None, it uses default configurations for "Clean", "Gaussian", "Impulse", and "Combined" noise.
For each specified noise type:
Generates clean QAM-16 symbols by randomly selecting from the constellation.
Adds the corresponding noise based on the configuration.
Extracts features from the noisy complex signals using the extract_features() function.
Returns a dictionary datasets where keys are noise types and values are dictionaries containing the noisy complex signals (complex) and the extracted features (features), along with the original labels y.

In [31]:
def generate_dataset(num_samples: int = 10000, noise_types: Dict[str, Dict[str, Any]] = None) -> Tuple[Dict[str, Dict[str, np.ndarray]], np.ndarray]:
    """Generates a QAM-16 dataset with configurable noise types."""
    if noise_types is None:
        noise_types = {
            "Clean": {"gaussian": False, "impulse": False},
            "Gaussian": {"gaussian": True, "impulse": False, "snr_db_range": (10, 30)},
            "Impulse": {"gaussian": False, "impulse": True, "impulse_prob_range": (0.01, 0.1), "impulse_amplitude_range": (2.0, 5.0)},
            "Combined": {"gaussian": True, "impulse": True, "snr_db_range": (10, 30), "impulse_prob_range": (0.01, 0.1), "impulse_amplitude_range": (2.0, 5.0)}
        }

    constellation, _ = generate_qam16_constellation()
    indices = np.random.randint(0, 16, size=num_samples)
    labels = indices.copy()
    clean_signals = constellation[indices]
    datasets = {}
    for noise_name, config in noise_types.items():
        noisy_signals = clean_signals.copy()

        if noise_name == "Gaussian" and config.get("gaussian"):
            snr_range = config.get("snr_db_range", (20, 20))
            snr_values = np.random.uniform(snr_range[0], snr_range[1], size=num_samples)
            noisy_signals = np.array([add_gaussian_noise(np.array([s]), snr)[0] for s, snr in zip(clean_signals, snr_values)])

        elif noise_name == "Impulse" and config.get("impulse"):
            prob_range = config.get("impulse_prob_range", (0.05, 0.05))
            amp_range = config.get("impulse_amplitude_range", (3.0, 3.0))
            probs = np.random.uniform(prob_range[0], prob_range[1], size=num_samples)
            amps = np.random.uniform(amp_range[0], amp_range[1], size=num_samples)
            noisy_signals = np.array([add_impulse_noise(np.array([s]), p, a)[0] for s, p, a in zip(clean_signals, probs, amps)])

        elif noise_name == "Combined" and config.get("gaussian") and config.get("impulse"):
            snr_range = config.get("snr_db_range", (20, 20))
            prob_range = config.get("impulse_prob_range", (0.05, 0.05))
            amp_range = config.get("impulse_amplitude_range", (3.0, 3.0))
            snr_values = np.random.uniform(snr_range[0], snr_range[1], size=num_samples)
            probs = np.random.uniform(prob_range[0], prob_range[1], size=num_samples)
            amps = np.random.uniform(amp_range[0], amp_range[1], size=num_samples)
            noisy_signals = np.array([add_combined_noise(np.array([s]), snr, p, a)[0] for s, snr, p, a in zip(clean_signals, snr_values, probs, amps)])

        features = extract_features(noisy_signals)
        datasets[noise_name] = {"complex": noisy_signals, "features": features}

    return datasets, labels

# Example usage:
if __name__ == "__main__":
    datasets, labels = generate_dataset(num_samples=5000)

    print("generate_dataset:")
    print("Datasets:")
    for noise_type, data in datasets.items():
        print(f"  {noise_type}:")
        print(f"    Complex Signals Shape: {data['complex'].shape}")
        print(f"    Features Shape: {data['features'].shape}")

    print("\nLabels:")
    print(f"  Shape: {labels.shape}")
    print(f"  First 10 labels: {labels[:10]}")

generate_dataset:
Datasets:
  Clean:
    Complex Signals Shape: (5000,)
    Features Shape: (5000, 2)
  Gaussian:
    Complex Signals Shape: (5000,)
    Features Shape: (5000, 2)
  Impulse:
    Complex Signals Shape: (5000,)
    Features Shape: (5000, 2)
  Combined:
    Complex Signals Shape: (5000,)
    Features Shape: (5000, 2)

Labels:
  Shape: (5000,)
  First 10 labels: [11  9  5 12 11  8  0 10 10 14]


extract_features():

Takes an array of complex numbers as input.
Extracts several features from each complex number that are intended to be useful for classification and robust to noise:
Real part
Imaginary part
Magnitude
Phase (angle)
Logarithm of the magnitude (can help with robustness to large impulse noise values)
Rank of the real part within the batch (normalized to 0-1) - robust to outliers.
Rank of the imaginary part within the batch (normalized to 0-1) - robust to outliers.
Quadrant of the complex number in the complex plane.
Returns a 2D NumPy array where each row represents a sample and each column represents a feature.

In [32]:
# --- Feature Extraction ---
def extract_features(X_complex: np.ndarray) -> np.ndarray:
    """Extracts features from complex inputs robust to noise."""
    real_part = X_complex.real
    imag_part = X_complex.imag
    magnitude = np.abs(X_complex)
    phase = np.angle(X_complex)
    log_magnitude = np.log1p(magnitude)

    real_rank = np.argsort(np.argsort(real_part)) / len(real_part)
    imag_rank = np.argsort(np.argsort(imag_part)) / len(imag_part)

    quadrant = np.zeros_like(real_part, dtype=int)
    quadrant[(real_part >= 0) & (imag_part >= 0)] = 0
    quadrant[(real_part < 0) & (imag_part >= 0)] = 1
    quadrant[(real_part < 0) & (imag_part < 0)] = 2
    quadrant[(real_part >= 0) & (imag_part < 0)] = 3

    return np.column_stack((real_part, imag_part, magnitude, phase,
                             log_magnitude, real_rank, imag_rank, quadrant))

 Visualization Functions (visualize_constellation, visualize_decision_tree, plot_confusion_matrix, plot_cross_performance, plot_classifier_comparison, plot_feature_importance):

visualize_constellation(): Plots the received (noisy) QAM-16 constellation points, color-coded by their true labels.
visualize_decision_tree(): Visualizes the structure of a trained decision tree classifier. Requires graphviz to be installed for more complex trees.
plot_confusion_matrix(): Generates and displays a confusion matrix, which shows the counts of true vs. predicted labels.
plot_cross_performance(): Visualizes a heatmap showing the accuracy of a model trained on one noise type and tested on another.
plot_classifier_comparison(): Creates a bar plot comparing the accuracy of different classifier models on the combined noise dataset.
plot_feature_importance(): Displays a bar plot showing the importance of each feature as determined by a trained classifier (if the classifier supports feature importance).

In [33]:
# --- Visualization ---
def visualize_constellation(X_complex: np.ndarray, y: np.ndarray, title: str = "QAM-16 Constellation", filename_suffix: str = "") -> None:
    """Visualizes the QAM-16 constellation points."""
    plt.figure(figsize=(10, 8))
    for i in range(16):
        mask = y == i
        plt.scatter(X_complex[mask].real, X_complex[mask].imag, label=f'{i}', alpha=0.6)
    plt.grid(True)
    plt.xlabel('Real Part')
    plt.ylabel('Imaginary Part')
    plt.title(title)
    plt.legend(fontsize='small')
    plt.savefig(f'qam16_constellation_{title.replace(" ", "_").lower()}{filename_suffix}.png')
    plt.close()

In [34]:
def main():
    """Main function to generate and visualize the QAM-16 constellation with different noise types."""
    num_samples = 1000
    datasets, labels = generate_dataset(num_samples=num_samples)
    constellation, original_labels = generate_qam16_constellation()
    original_labels_array = np.array(list(original_labels.values()))

    print("--- Visualizing QAM-16 Constellations ---")

    # Visualize the clean constellation
    visualize_constellation(constellation, original_labels_array, title="Clean QAM-16 Constellation")
    print("Saved: qam16_constellation_clean_qam-16_constellation.png")

    # Visualize constellations with different noise types
    for noise_type, data in datasets.items():
        visualize_constellation(data['complex'], labels, title=f"{noise_type} QAM-16 Constellation")
        print(f"Saved: qam16_constellation_{noise_type.lower()}_qam-16_constellation.png")

if __name__ == "__main__":
    main()

--- Visualizing QAM-16 Constellations ---
Saved: qam16_constellation_clean_qam-16_constellation.png
Saved: qam16_constellation_clean_qam-16_constellation.png
Saved: qam16_constellation_gaussian_qam-16_constellation.png
Saved: qam16_constellation_impulse_qam-16_constellation.png
Saved: qam16_constellation_combined_qam-16_constellation.png


In [35]:
def visualize_decision_tree(clf: DecisionTreeClassifier, feature_names: list[str], class_names: list[str], title: str = "Decision Tree") -> None:
    """Visualizes the decision tree."""
    plt.figure(figsize=(20, 10))
    tree.plot_tree(clf, feature_names=feature_names, class_names=class_names, filled=True, rounded=True, fontsize=10)
    plt.title(title)
    plt.savefig(f'qam16_decision_tree_{title.replace(" ", "_").lower()}.png')
    plt.close()

In [36]:
def main():
    """Main function to generate a dataset, train a Decision Tree, and visualize it."""
    num_samples = 2000
    datasets, labels = generate_dataset(num_samples=num_samples, noise_types={"Gaussian": {"gaussian": True, "impulse": False, "snr_db_range": (15, 20)}})

    # Use the Gaussian noisy dataset for training
    gaussian_data = datasets.get("Gaussian")
    if gaussian_data:
        X = gaussian_data['features']
        y = labels

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Initialize and train the Decision Tree classifier
        clf = DecisionTreeClassifier(max_depth=5)  # You can adjust hyperparameters
        clf.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Decision Tree Accuracy on Test Set: {accuracy:.4f}")

        # Define feature names and class names
        feature_names = ["real", "imag", "magnitude", "phase", "log_magnitude", "real_rank", "imag_rank", "quadrant"]
        class_names = [str(i) for i in range(16)]  # QAM-16 has 16 classes

        # Visualize the trained Decision Tree
        visualize_decision_tree(clf, feature_names=feature_names, class_names=class_names, title="QAM-16 Decision Tree (Gaussian Noise)")
    else:
        print("Error: Gaussian noisy dataset not found.")

if __name__ == "__main__":
    main()

Decision Tree Accuracy on Test Set: 0.8017


In [37]:
def plot_confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray, title: str = "Confusion Matrix") -> np.ndarray:
    """Plots the confusion matrix."""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.arange(16), yticklabels=np.arange(16))
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig(f'qam16_confusion_matrix_{title.replace(" ", "_").lower()}.png')
    plt.close()
    return cm

    print(f"Confusion matrix saved to: qam16_confusion_matrix_{title.replace(' ', '_').lower()}.png")
    print("Confusion Matrix:")
    print(cm)


In [38]:
def main():
    """Main function to generate a dataset, train a Decision Tree, make predictions, and plot the confusion matrix."""
    num_samples = 2000
    noise_type = "Gaussian"
    datasets, labels = generate_dataset(num_samples=num_samples, noise_types={noise_type: {"gaussian": True, "impulse": False, "snr_db_range": (15, 20)}})

    gaussian_data = datasets.get(noise_type)
    if gaussian_data:
        X = gaussian_data['features']
        y = labels

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        clf = DecisionTreeClassifier(max_depth=5, random_state=42)
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Decision Tree Accuracy on Test Set ({noise_type} Noise): {accuracy:.4f}")

        plot_confusion_matrix(y_test, y_pred, title=f"Confusion Matrix (Decision Tree, {noise_type} Noise)")
    else:
        print(f"Error: {noise_type} noisy dataset not found.")

if __name__ == "__main__":
    main()

Decision Tree Accuracy on Test Set (Gaussian Noise): 0.8083


In [39]:
def plot_cross_performance(cross_performance: Dict[str, Dict[str, float]]) -> None:
    """Plots the matrix of model performance across different noise types."""
    noise_types = list(cross_performance.keys())
    matrix = np.array([[cross_performance[train][test] for test in noise_types] for train in noise_types])
    plt.figure(figsize=(12, 10))
    sns.heatmap(matrix, annot=True, fmt='.1f', cmap='YlGn', xticklabels=noise_types, yticklabels=noise_types)
    plt.title('Cross-Performance Matrix (Accuracy %)')
    plt.xlabel('Testing Noise Type')
    plt.ylabel('Training Noise Type')
    plt.savefig('qam16_cross_performance_matrix.png')
    plt.close()
    print("Cross-Performance Matrix plot saved to: qam16_cross_performance_matrix.png")
    print("Cross-Performance Data:")
    for train_type, test_results in cross_performance.items():
        print(f"  Trained on {train_type}:")
        for test_type, accuracy in test_results.items():
            print(f"    Tested on {test_type}: {accuracy:.1f}%")


In [40]:
def main():
    """Main function to generate datasets, train a Decision Tree on each, and evaluate cross-performance."""
    num_samples = 1000
    datasets, labels = generate_dataset(num_samples=num_samples)
    noise_types = list(datasets.keys())
    cross_performance = {train_type: {} for train_type in noise_types}

    print("--- Cross-Performance Evaluation ---")

    for train_type in noise_types:
        train_data = datasets[train_type]['features']
        train_labels = labels

        # Train a Decision Tree classifier
        clf = DecisionTreeClassifier(max_depth=10, random_state=42)
        clf.fit(train_data, train_labels)
        print(f"\nTrained Decision Tree on: {train_type}")

        for test_type in noise_types:
            test_data = datasets[test_type]['features']
            test_labels = labels

            # Make predictions
            y_pred = clf.predict(test_data)
            accuracy = accuracy_score(test_labels, y_pred) * 100
            cross_performance[train_type][test_type] = accuracy
            print(f"  Evaluated on {test_type}: Accuracy = {accuracy:.1f}%")

    print("\n--- Cross-Performance Matrix ---")
    plot_cross_performance(cross_performance)

if __name__ == "__main__":
    main()

--- Cross-Performance Evaluation ---

Trained Decision Tree on: Clean
  Evaluated on Clean: Accuracy = 100.0%
  Evaluated on Gaussian: Accuracy = 96.5%
  Evaluated on Impulse: Accuracy = 94.6%
  Evaluated on Combined: Accuracy = 91.8%

Trained Decision Tree on: Gaussian
  Evaluated on Clean: Accuracy = 99.2%
  Evaluated on Gaussian: Accuracy = 100.0%
  Evaluated on Impulse: Accuracy = 94.6%
  Evaluated on Combined: Accuracy = 91.0%

Trained Decision Tree on: Impulse
  Evaluated on Clean: Accuracy = 99.4%
  Evaluated on Gaussian: Accuracy = 82.8%
  Evaluated on Impulse: Accuracy = 99.8%
  Evaluated on Combined: Accuracy = 79.0%

Trained Decision Tree on: Combined
  Evaluated on Clean: Accuracy = 96.6%
  Evaluated on Gaussian: Accuracy = 93.1%
  Evaluated on Impulse: Accuracy = 95.5%
  Evaluated on Combined: Accuracy = 99.6%

--- Cross-Performance Matrix ---
Cross-Performance Matrix plot saved to: qam16_cross_performance_matrix.png
Cross-Performance Data:
  Trained on Clean:
    Tested o

In [41]:
def plot_classifier_comparison(results: Dict[str, float]) -> None:
    """Plots the comparison of different classifier accuracies."""
    names = list(results.keys())
    accuracies = list(results.values())
    plt.figure(figsize=(12, 6))
    plt.bar(range(len(names)), accuracies)
    plt.xticks(range(len(names)), names, rotation=45, ha='right')
    plt.ylabel('Accuracy (%)')
    plt.title('Classifier Performance Comparison')
    plt.ylim(0, 100)
    plt.tight_layout()
    for i, v in enumerate(accuracies):
        plt.text(i, v + 1, f"{v:.1f}%", ha='center')
    plt.savefig('qam16_classifier_comparison.png')
    plt.close()

    print("Classifier comparison plot saved to: qam16_classifier_comparison.png")
    print("Classifier Accuracies:")
    for name, accuracy in results.items():
        print(f"  {name}: {accuracy:.1f}%")

In [42]:
def main():
    """Main function to generate a dataset and compare the performance of different classifiers."""
    num_samples = 1000
    noise_type = "Gaussian_Moderate"  # Choose a noise type for comparison
    datasets, labels = generate_dataset(num_samples=num_samples, noise_types={
        noise_type: {"gaussian": True, "impulse": False, "snr_db_range": (18, 22)}
    })

    data = datasets.get(noise_type)
    if data:
        X = data['features']
        y = labels
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        classifiers = {
            "Decision Tree": DecisionTreeClassifier(max_depth=10, random_state=42),

        }

        results = {}
        print(f"--- Classifier Comparison on {noise_type} Noise ---")
        for name, clf in classifiers.items():
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred) * 100
            results[name] = accuracy
            print(f"{name}: Accuracy = {accuracy:.1f}%")

        print("\n--- Classifier Performance Comparison ---")
        plot_classifier_comparison(results)

    else:
        print(f"Error: {noise_type} dataset not found.")

if __name__ == "__main__":
    main()

--- Classifier Comparison on Gaussian_Moderate Noise ---
Decision Tree: Accuracy = 99.3%

--- Classifier Performance Comparison ---
Classifier comparison plot saved to: qam16_classifier_comparison.png
Classifier Accuracies:
  Decision Tree: 99.3%


In [43]:
def plot_feature_importance(clf: Any, feature_names: list[str]) -> None:
    """Plots the feature importances of a trained classifier."""
    if hasattr(clf, 'feature_importances_'):
        importances = clf.feature_importances_
        indices = np.argsort(importances)[::-1]
        plt.figure(figsize=(12, 6))
        plt.bar(range(len(feature_names)), importances[indices])
        plt.xticks(range(len(feature_names)), [feature_names[i] for i in indices], rotation=45, ha='right')
        plt.xlabel('Features')
        plt.ylabel('Importance')
        plt.title('Feature Importance')
        plt.tight_layout()
        plt.savefig('qam16_feature_importance.png')
        plt.close()
    else:
        print("Classifier does not have feature_importances_ attribute.")


In [44]:
def main():
    """Main function to generate a dataset, train a Decision Tree, and plot feature importance."""
    num_samples = 1000
    noise_type = "Gaussian_Moderate"  # Choose a noise type
    datasets, labels = generate_dataset(num_samples=num_samples, noise_types={
        noise_type: {"gaussian": True, "impulse": False, "snr_db_range": (18, 22)}
    })

    data = datasets.get(noise_type)
    if data:
        X = data['features']
        y = labels
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Train a Decision Tree classifier (it has feature_importances_)
        clf = DecisionTreeClassifier(max_depth=10, random_state=42)
        clf.fit(X_train, y_train)

        # Define feature names (must match the order in extract_features)
        feature_names = ["real", "imag", "magnitude", "phase", "log_magnitude", "real_rank", "imag_rank", "quadrant"]

        # Plot feature importance
        plot_feature_importance(clf, feature_names)

    else:
        print(f"Error: {noise_type} dataset not found.")

if __name__ == "__main__":
    main()

In [45]:
# --- Model Training and Evaluation ---
def train_and_evaluate_classifier(X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray, classifier: Any) -> Tuple[Any, float, np.ndarray]:
    """Trains and evaluates a classifier."""
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return classifier, accuracy, y_pred

In [46]:
def compare_noise_types(datasets: Dict[str, Dict[str, np.ndarray]], y: np.ndarray, test_size: float = 0.2) -> Dict[str, Dict[str, float]]:
    """Compares decision tree model performance across different noise types."""
    results = {"train_on": {}, "cross_performance": {}}

    for train_noise, train_data in datasets.items():
        print(f"\nTraining on {train_noise} noise...")
        X_train_all, X_test_all, y_train, y_test = train_test_split(
            train_data["features"], y, test_size=test_size, random_state=42
        )
        clf = DecisionTreeClassifier(max_depth=12, min_samples_split=8, min_samples_leaf=4, random_state=42)
        trained_clf, train_accuracy, _ = train_and_evaluate_classifier(X_train_all, X_test_all, y_train, y_test, clf)
        results["train_on"][train_noise] = {"model": trained_clf, "accuracy": train_accuracy * 100}
        print(f"  Training accuracy: {train_accuracy * 100:.2f}%")

        noise_accuracies = {}
        for test_noise, test_data in datasets.items():
            X_test_noise_all = test_data["features"]
            _, X_test_noise, _, y_test_consistent = train_test_split(
                X_test_noise_all, y, test_size=test_size, random_state=42
            )
            y_pred_cross = trained_clf.predict(X_test_noise)
            cross_accuracy = accuracy_score(y_test_consistent, y_pred_cross)
            noise_accuracies[test_noise] = cross_accuracy * 100
            print(f"  Tested on {test_noise} noise: {cross_accuracy * 100:.2f}%")
        results["cross_performance"][train_noise] = noise_accuracies

    plot_cross_performance(results["cross_performance"])
    return results

In [47]:
# Assume extract_features and generate_dataset functions are defined as in previous examples
def extract_features(X_complex: np.ndarray) -> np.ndarray:
    """Placeholder for feature extraction function."""
    return np.column_stack((X_complex.real, X_complex.imag))

def generate_dataset(num_samples: int = 1000, noise_types: Dict[str, Dict[str, Any]] = None) -> Tuple[Dict[str, Dict[str, np.ndarray]], np.ndarray]:
    """Placeholder for dataset generation function."""
    if noise_types is None:
        noise_types = {
            "Combined": {"gaussian": True, "impulse": True, "snr_db_range": (18, 22), "impulse_prob_range": (0.03, 0.06), "impulse_amplitude_range": (2.0, 4.0)}
        }
    datasets = {}
    labels = np.random.randint(0, 16, num_samples)
    for noise_name, config in noise_types.items():
        complex_signals = np.random.randn(num_samples) + 1j * np.random.randn(num_samples)
        features = extract_features(complex_signals)
        datasets[noise_name] = {"complex": complex_signals, "features": features}
    return datasets, labels

def compare_classifiers(datasets: Dict[str, Dict[str, np.ndarray]], y: np.ndarray, test_size: float = 0.2) -> Dict[str, float]:
    """Compares different classifier types on the combined noise dataset."""
    if "Combined" not in datasets or "features" not in datasets["Combined"]:
        print("Error: 'Combined' noise dataset with 'features' not found.")
        return {}  # Return an empty dictionary instead of None

    X_features = datasets["Combined"]["features"]
    X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=test_size, random_state=42)
    classifiers = {
        "Decision Tree (max_depth=8)": DecisionTreeClassifier(max_depth=8, random_state=42),
        "Decision Tree (max_depth=12)": DecisionTreeClassifier(max_depth=12, random_state=42),
        "Decision Tree (max_depth=16)": DecisionTreeClassifier(max_depth=16, random_state=42),
        "Random Forest (10 trees)": RandomForestClassifier(n_estimators=10, random_state=42)
    }
    results = {}
    for name, clf in classifiers.items():
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        results[name] = accuracy
    return results

def main():
    """Main function to generate a dataset and compare different classifiers."""
    num_samples = 2000
    noise_types_to_generate = {
        "Combined": {"gaussian": True, "impulse": True, "snr_db_range": (18, 22), "impulse_prob_range": (0.03, 0.06), "impulse_amplitude_range": (2.0, 4.0)}
    }
    datasets, labels = generate_dataset(num_samples=num_samples, noise_types=noise_types_to_generate)

    if labels.shape[0] != num_samples:
        print(f"Warning: Number of labels ({labels.shape[0]}) does not match the expected number of samples ({num_samples}).")

    comparison_results = compare_classifiers(datasets, labels, test_size=0.3)

    print("--- Classifier Comparison Results (Combined Noise) ---")
    if comparison_results:  # Check if comparison_results is not None or empty
        for name, accuracy in comparison_results.items():
            print(f"{name}: Accuracy = {accuracy:.4f}")
    else:
        print("No classifier comparison results to display.")

if __name__ == "__main__":
    main()

--- Classifier Comparison Results (Combined Noise) ---
Decision Tree (max_depth=8): Accuracy = 0.0550
Decision Tree (max_depth=12): Accuracy = 0.0483
Decision Tree (max_depth=16): Accuracy = 0.0550
Random Forest (10 trees): Accuracy = 0.0533
