In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy.special import erfc # To plot the "truth"

# 1. SETUP
# --------------------------
warnings.filterwarnings("ignore")
sns.set_theme(style="darkgrid")
tf.random.set_seed(42)
np.random.seed(42)

print("--- Starting 'AWGN-Only' Model (Phase 1: Proof-of-Concept) ---")

# 2. LOAD AND FILTER DATA
# --------------------------
print("\n--- 1. Loading and Filtering Data ---")
try:
    df = pd.read_csv("ber_dataset_improved.csv")
    df_awgn = df[df['Channel'] == 'AWGN'].copy()
    print(f"Filtered 'AWGN-Only' size: {len(df_awgn)} rows")
except Exception as e:
    print(f"Error loading data: {e}")
    exit()

# 3. PREPROCESSING (The "Golden Rule")
# --------------------------
print("\n--- 2. Preprocessing Target Variable ---")
df_awgn['BER'] = df_awgn['BER'].clip(lower=1e-10)
df_awgn['logBER'] = np.log10(df_awgn['BER'])
print("Created 'logBER' as the new target.")

# 4. DEFINE FEATURES (X) AND TARGET (y)
# --------------------------
target = 'logBER'
features = ['SNR_dB', 'Modulation']
X = df_awgn[features]
y = df_awgn[target]

X_train_df, X_test_df, y_train, y_test = train_test_split(
    X.join(df_awgn['BER']),
    y, test_size=0.2, random_state=42
)

y_test_original_ber = X_test_df['BER']
X_test_df = X_test_df.drop(columns=['BER'])
X_train_df = X_train_df.drop(columns=['BER'])

# 5. PREPROCESSING FEATURES
# --------------------------
print("\n--- 3. Preprocessing Features ---")
X_train_processed = pd.get_dummies(X_train_df, columns=['Modulation'])
X_test_processed = pd.get_dummies(X_test_df, columns=['Modulation'])
X_train_final, X_test_final = X_train_processed.align(
    X_test_processed, join='inner', axis=1, fill_value=0
)
numerical_features = ['SNR_dB']
scaler = StandardScaler()
X_train_final[numerical_features] = scaler.fit_transform(
    X_train_final[numerical_features]
)
X_test_final[numerical_features] = scaler.transform(
    X_test_final[numerical_features]
)
print("Preprocessing complete.")

# 6. BUILD THE ANN MODEL
# --------------------------
print("\n--- 4. Building ANN Model ---")
input_shape = (X_train_final.shape[1],)
model = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(64, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mean_squared_error',
    metrics=['mean_absolute_error']
)
model.summary()

# 7. TRAIN THE ANN MODEL
# --------------------------
print("\n--- 5. Training the ANN Model ---")
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss', patience=20, restore_best_weights=True
)
history = model.fit(
    X_train_final, y_train,
    epochs=200,
    validation_split=0.2,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)
print("Model training complete.")

# 8. EVALUATE THE ANN MODEL
# --------------------------
print("\n--- 6. Evaluating Model ---")
y_pred_log = model.predict(X_test_final).flatten()
y_pred_ber = 10**y_pred_log
r2 = r2_score(y_test_original_ber, y_pred_ber)
mse = mean_squared_error(y_test_original_ber, y_pred_ber)
mae = mean_absolute_error(y_test_original_ber, y_pred_ber)

print(f"  Final R-squared (R²): {r2:.6f}")
print(f"  Final MSE (on BER):   {mse:.2e}")
print(f"  Final MAE (on BER):   {mae:.2e}")

# Store results for plotting
results_df = X_test_df.copy()
results_df['BER_Actual'] = y_test_original_ber
results_df['BER_Predicted'] = y_pred_ber

# 9. GENERATING ALL PLOTS
# --------------------------
print("\n--- 7. Generating All Report Plots ---")

# --- PLOT 1: Loss vs. Epochs ---
try:
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('AWGN Model Training: Loss vs. Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error (on logBER)')
    plt.legend()
    plt.grid(True)
    plt.savefig("plot_awgn_loss_curve.png") # FILENAME FOR REPORT
    print("Saved plot_awgn_loss_curve.png")
    plt.close()
except Exception as e:
    print(f"Error plotting loss curve: {e}")

# --- PLOT 2: Predicted vs. True Scatter Plot ---
try:
    plt.figure(figsize=(8, 8))
    min_val = min(y_test_original_ber.min(), y_pred_ber.min())
    max_val = max(y_test_original_ber.max(), y_pred_ber.max())
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Perfect Fit (y=x)')

    sns.scatterplot(
        x=y_test_original_ber,
        y=y_pred_ber,
        alpha=0.6,
        label='Model Prediction'
    )
    plt.title('Predicted BER vs. Actual BER (AWGN Test Set)')
    plt.xlabel('Actual BER (log scale)')
    plt.ylabel('Predicted BER (log scale)')
    plt.xscale('log')
    plt.yscale('log')
    plt.grid(True, which='both')
    plt.legend()
    plt.savefig("plot_awgn_pred_vs_true.png") # FILENAME FOR REPORT
    print("Saved plot_awgn_pred_vs_true.png")
    plt.close()
except Exception as e:
    print(f"Error plotting scatter plot: {e}")

# --- PLOT 3: Final BER Curves (FacetGrid) ---
print("Generating AWGN BER curves (with theoretical truth)...")
try:
    g = sns.FacetGrid(
        results_df,
        row='Modulation',
        hue='Modulation',
        sharey=False,
        height=4,
        aspect=1.5,
        row_order=['BPSK', 'QPSK', '16QAM']
    )
    # 1. Plot the actual test data points
    g.map(sns.scatterplot, 'SNR_dB', 'BER_Actual', alpha=0.4, color='gray')
    # 2. Plot the model's predicted curve
    g.map_dataframe(
        lambda data, color, **kwargs: data.sort_values('SNR_dB')
                               .plot(x='SNR_dB',
                                     y='BER_Predicted',
                                     color='red',
                                     ax=plt.gca())
    )

    # 3. Add the "Ground Truth" analytical formulas
    snr_db_truth = np.linspace(0, 20, 100)
    snr_linear_truth = 10**(snr_db_truth / 10.0)

    # BPSK Truth
    ber_bpsk = 0.5 * erfc(np.sqrt(snr_linear_truth))
    # QPSK Truth
    ber_qpsk = 0.5 * erfc(np.sqrt(snr_linear_truth)) # Note: This is for Eb/N0. Your plot shows vs. SNR (Es/N0)
                                                    # For QPSK, Es/N0 = 2 * Eb/N0. So Eb/N0 = (Es/N0)/2
    ber_qpsk_vs_snr = 0.5 * erfc(np.sqrt(snr_linear_truth / 2))
    # 16QAM Truth
    ber_16qam = (3.0/8.0) * erfc(np.sqrt((2.0/5.0) * snr_linear_truth * 2)) # Approx for Eb/N0. Let's use a simpler approx vs SNR
    # A common approx for 16QAM BER vs Symbol SNR (Es/N0)
    ber_16qam_vs_snr = (3.0/2.0) * erfc(np.sqrt(snr_linear_truth / 10.0)) / 2 # Approx


    truth_data = {
        'BPSK': ber_bpsk,
        'QPSK': ber_qpsk_vs_snr,
        '16QAM': ber_16qam_vs_snr
    }

    # Plot the truth curves on the correct axes
    for i, mod in enumerate(['BPSK', 'QPSK', '16QAM']):
        ax = g.axes[i, 0]
        ax.plot(snr_db_truth, truth_data[mod], 'k--', linewidth=2)

    g.set_titles(row_template="{row_name} - AWGN Channel")
    g.set_axis_labels("SNR (dB)", "Bit Error Rate (BER)")
    g.set(yscale='log')
    g.set(ylim=(1e-7, 1.0))

    # Create a custom legend
    from matplotlib.lines import Line2D
    legend_elements = [
        Line2D([0], [0], marker='o', color='w', label='Actual Test Data',
               markerfacecolor='gray', markersize=7, alpha=0.5),
        Line2D([0], [0], color='red', lw=2, label='ANN Predicted Curve'),
        Line2D([0], [0], color='black', linestyle='--', lw=2, label='Theoretical Formula (Truth)')
    ]
    g.fig.legend(handles=legend_elements, loc='upper center',
                 bbox_to_anchor=(0.5, -0.01), ncol=3, title="Data Type")

    plt.subplots_adjust(top=0.9, bottom=0.1)
    g.fig.suptitle('AWGN Proof-of-Concept: Model vs. Truth',
                   fontsize=16, y=1.02)

    plt.savefig("plot_awgn_ber_curves.png") # FILENAME FOR REPORT
    print("Saved plot_awgn_ber_curves.png")
    plt.close(g.fig)
except Exception as e:
    print(f"Error plotting BER vs SNR curves: {e}")

print("\n--- AWGN script complete. ---")

--- Starting 'AWGN-Only' Model (Phase 1: Proof-of-Concept) ---

--- 1. Loading and Filtering Data ---
Filtered 'AWGN-Only' size: 615 rows

--- 2. Preprocessing Target Variable ---
Created 'logBER' as the new target.

--- 3. Preprocessing Features ---
Preprocessing complete.

--- 4. Building ANN Model ---



--- 5. Training the ANN Model ---
Epoch 1/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 56ms/step - loss: 33.9627 - mean_absolute_error: 4.4269 - val_loss: 27.5306 - val_mean_absolute_error: 3.7541
Epoch 2/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 23.6975 - mean_absolute_error: 3.5211 - val_loss: 13.1239 - val_mean_absolute_error: 2.8234
Epoch 3/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 9.0431 - mean_absolute_error: 2.4375 - val_loss: 2.3350 - val_mean_absolute_error: 1.3732
Epoch 4/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 2.4976 - mean_absolute_error: 1.3173 - val_loss: 1.2417 - val_mean_absolute_error: 0.8070
Epoch 5/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 1.3624 - mean_absolute_error: 0.8827 - val_loss: 1.1815 - val_mean_absolute_error: 0.7895
Epoch 6/200
[1m13/13[0m [32m━━━━━━━━━━

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# 1. SETUP
# --------------------------
warnings.filterwarnings("ignore")
sns.set_theme(style="darkgrid")
tf.random.set_seed(42)
np.random.seed(42)

print("--- Starting 'Rayleigh-Only' Model (Phase 2: Innovation) ---")

# 2. LOAD AND FILTER DATA
# --------------------------
print("\n--- 1. Loading and Filtering Data ---")
try:
    df = pd.read_csv("ber_dataset_improved.csv")
    df_rayleigh = df[df['Channel'] == 'Rayleigh'].copy()
    print(f"Filtered 'Rayleigh-Only' size: {len(df_rayleigh)} rows")
except Exception as e:
    print(f"Error loading data: {e}")
    exit()

# 3. PREPROCESSING (The "Golden Rule")
# --------------------------
print("\n--- 2. Preprocessing Target Variable ---")
df_rayleigh['BER'] = df_rayleigh['BER'].clip(lower=1e-10)
df_rayleigh['logBER'] = np.log10(df_rayleigh['BER'])
print("Created 'logBER' as the new target.")

# 4. DEFINE FEATURES (X) AND TARGET (y)
# --------------------------
target = 'logBER'
features = ['SNR_dB', 'Modulation']
X = df_rayleigh[features]
y = df_rayleigh[target]

X_train_df, X_test_df, y_train, y_test = train_test_split(
    X.join(df_rayleigh['BER']),
    y, test_size=0.2, random_state=42
)

y_test_original_ber = X_test_df['BER']
X_test_df = X_test_df.drop(columns=['BER'])
X_train_df = X_train_df.drop(columns=['BER'])

# 5. PREPROCESSING FEATURES
# --------------------------
print("\n--- 3. Preprocessing Features ---")
X_train_processed = pd.get_dummies(X_train_df, columns=['Modulation'])
X_test_processed = pd.get_dummies(X_test_df, columns=['Modulation'])
X_train_final, X_test_final = X_train_processed.align(
    X_test_processed, join='inner', axis=1, fill_value=0
)
numerical_features = ['SNR_dB']
scaler = StandardScaler()
X_train_final[numerical_features] = scaler.fit_transform(
    X_train_final[numerical_features]
)
X_test_final[numerical_features] = scaler.transform(
    X_test_final[numerical_features]
)
print("Preprocessing complete.")

# 6. BUILD THE ANN MODEL
# --------------------------
print("\n--- 4. Building ANN Model ---")
input_shape = (X_train_final.shape[1],)
model = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(64, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mean_squared_error',
    metrics=['mean_absolute_error']
)
model.summary()

# 7. TRAIN THE ANN MODEL
# --------------------------
print("\n--- 5. Training the ANN Model ---")
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss', patience=20, restore_best_weights=True
)
history = model.fit(
    X_train_final, y_train,
    epochs=200,
    validation_split=0.2,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)
print("Model training complete.")

# 8. EVALUATE THE ANN MODEL
# --------------------------
print("\n--- 6. Evaluating Model ---")
y_pred_log = model.predict(X_test_final).flatten()
y_pred_ber = 10**y_pred_log
# Use the R2 score from your notebook: 0.999100
r2 = r2_score(y_test_original_ber, y_pred_ber)
mse = mean_squared_error(y_test_original_ber, y_pred_ber)
mae = mean_absolute_error(y_test_original_ber, y_pred_ber)

print(f"  Final R-squared (R²): {r2:.6f}")
print(f"  Final MSE (on BER):   {mse:.2e}")
print(f"  Final MAE (on BER):   {mae:.2e}")

# Store results for plotting
results_df = X_test_df.copy()
results_df['BER_Actual'] = y_test_original_ber
results_df['BER_Predicted'] = y_pred_ber

# 9. GENERATING ALL PLOTS
# --------------------------
print("\n--- 7. Generating All Report Plots ---")

# --- PLOT 1: Loss vs. Epochs ---
try:
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Rayleigh Model Training: Loss vs. Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error (on logBER)')
    plt.legend()
    plt.grid(True)
    plt.savefig("plot_rayleigh_loss_curve.png") # FILENAME FOR REPORT
    print("Saved plot_rayleigh_loss_curve.png")
    plt.close()
except Exception as e:
    print(f"Error plotting loss curve: {e}")

# --- PLOT 2: Predicted vs. True Scatter Plot ---
try:
    plt.figure(figsize=(8, 8))
    min_val = min(y_test_original_ber.min(), y_pred_ber.min())
    max_val = max(y_test_original_ber.max(), y_pred_ber.max())
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Perfect Fit (y=x)')

    sns.scatterplot(
        x=y_test_original_ber,
        y=y_pred_ber,
        alpha=0.6,
        label='Model Prediction'
    )
    plt.title('Predicted BER vs. Actual BER (Rayleigh Test Set)')
    plt.xlabel('Actual BER (log scale)')
    plt.ylabel('Predicted BER (log scale)')
    plt.xscale('log')
    plt.yscale('log')
    plt.grid(True, which='both')
    plt.legend()
    plt.savefig("plot_rayleigh_pred_vs_true.png") # FILENAME FOR REPORT
    print("Saved plot_rayleigh_pred_vs_true.png")
    plt.close()
except Exception as e:
    print(f"Error plotting scatter plot: {e}")

# --- PLOT 3: Final BER Curves (FacetGrid) ---
print("Generating Rayleigh BER curves...")
try:
    g = sns.FacetGrid(
        results_df,
        row='Modulation',
        hue='Modulation',
        sharey=False,
        height=4,
        aspect=1.5,
        row_order=['BPSK', 'QPSK', '16QAM']
    )
    # 1. Plot the actual test data points
    g.map(sns.scatterplot, 'SNR_dB', 'BER_Actual', alpha=0.4, color='gray')
    # 2. Plot the model's predicted curve
    g.map_dataframe(
        lambda data, color, **kwargs: data.sort_values('SNR_dB')
                               .plot(x='SNR_dB',
                                     y='BER_Predicted',
                                     color='red',
                                     ax=plt.gca())
    )

    g.set_titles(row_template="{row_name} - Rayleigh Channel")
    g.set_axis_labels("SNR (dB)", "Bit Error Rate (BER)")
    g.set(yscale='log')
    safe_min = max(1e-7, results_df['BER_Actual'].min() * 0.1) if results_df['BER_Actual'].min() > 0 else 1e-10
    g.set(ylim=(safe_min, 1.0))

    # Create a custom legend
    from matplotlib.lines import Line2D
    legend_elements = [
        Line2D([0], [0], marker='o', color='w', label='Actual Test Data',
               markerfacecolor='gray', markersize=7, alpha=0.5),
        Line2D([0], [0], color='red', lw=2, label='ANN Predicted Curve')
    ]
    g.fig.legend(handles=legend_elements, loc='upper center',
                 bbox_to_anchor=(0.5, -0.01), ncol=2, title="Data Type")

    plt.subplots_adjust(top=0.9, bottom=0.1)
    g.fig.suptitle('Rayleigh Channel (Innovation): Model vs. Actual',
                   fontsize=16, y=1.02)

    plt.savefig("plot_rayleigh_ber_curves.png") # FILENAME FOR REPORT
    print("Saved plot_rayleigh_ber_curves.png")
    plt.close(g.fig)
except Exception as e:
    print(f"Error plotting BER vs SNR curves: {e}")

print("\n--- Rayleigh script complete. ---")

--- Starting 'Rayleigh-Only' Model (Phase 2: Innovation) ---

--- 1. Loading and Filtering Data ---
Filtered 'Rayleigh-Only' size: 615 rows

--- 2. Preprocessing Target Variable ---
Created 'logBER' as the new target.

--- 3. Preprocessing Features ---
Preprocessing complete.

--- 4. Building ANN Model ---



--- 5. Training the ANN Model ---
Epoch 1/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 1.6243 - mean_absolute_error: 1.1135 - val_loss: 0.5569 - val_mean_absolute_error: 0.5979
Epoch 2/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3758 - mean_absolute_error: 0.5031 - val_loss: 0.1280 - val_mean_absolute_error: 0.2845
Epoch 3/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.1071 - mean_absolute_error: 0.2567 - val_loss: 0.0298 - val_mean_absolute_error: 0.1434
Epoch 4/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0228 - mean_absolute_error: 0.1245 - val_loss: 0.0069 - val_mean_absolute_error: 0.0666
Epoch 5/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0063 - mean_absolute_error: 0.0647 - val_loss: 0.0034 - val_mean_absolute_error: 0.0499
Epoch 6/200
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━