In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')
from openpyxl import load_workbook

# Load the data
data = pd.read_excel("Normalized Data Set.xlsx")

# Assuming `data` is your DataFrame
X = data.iloc[:, :-2].values
y = data.iloc[:, -4:].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the input data
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# Define the neural network model
def create_model(input_dim):
    model = Sequential()
    model.add(Dense(32, input_dim=input_dim, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=Adam(), loss='mse')
    return model

# Initialize lists to store evaluation metrics for each output
mse_scores = []
r2_scores_train = []
r2_scores_test = []
avg_relative_errors_train = []
avg_relative_errors_test = []
# Initialize lists to store true and predicted values
train_data = []
test_data = []
recoveries = [[0.2, 0.75, 0.25, 2.75]]

# Train and evaluate a separate model for each output
for i in range(4):
    print(f"\nProcessing Output {i+1}...\n")
    
    # Scale the output data
    scaler_y = StandardScaler()
    y_train_scaled = scaler_y.fit_transform(y_train[:, i].reshape(-1, 1))
    y_test_scaled = scaler_y.transform(y_test[:, i].reshape(-1, 1))

    # Create and train the model
    model = create_model(X_train.shape[1])
    model.fit(X_train, y_train_scaled, epochs=100, batch_size=32, verbose=1, validation_split=0.1)

    # Make predictions on training data
    y_train_pred_scaled = model.predict(X_train)
    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled)

    # Make predictions on testing data
    y_test_pred_scaled = model.predict(X_test)
    y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled)

    # Store true and predicted values
    train_data.append(pd.DataFrame({
        'True Values': y_train[:, i],
        'Predicted Values': y_train_pred.flatten()
    }))
    
    test_data.append(pd.DataFrame({
        'True Values': y_test[:, i],
        'Predicted Values': y_test_pred.flatten()
    }))

    # Calculate metrics for training data
    r2_train = r2_score(y_train[:, i], y_train_pred)
    relative_errors_train = np.abs((y_train[:, i] - y_train_pred) / y_train[:, i])
    avg_relative_error_train = np.mean(relative_errors_train)

    # Calculate metrics for testing data
    mse_test = mean_squared_error(y_test[:, i], y_test_pred)
    r2_test = r2_score(y_test[:, i], y_test_pred)
    relative_errors_test = np.abs((y_test[:, i] - y_test_pred) / y_test[:, i])
    avg_relative_error_test = np.mean(relative_errors_test)

    mse_scores.append(mse_test)
    r2_scores_train.append(r2_train)
    r2_scores_test.append(r2_test)
    avg_relative_errors_train.append(avg_relative_error_train)
    avg_relative_errors_test.append(avg_relative_error_test)

     # Print training data 
    print(f"Training Data for Output {i+1}:")
    print(train_data[-1])
    # Print testing data
    print(f"Testing Data for Output {i+1}:")
    print(test_data[-1])

    # Print training data and metrics
    #print(f"Training Data for Output {i+1}:")
    #print(pd.DataFrame({
     #   'True Values': y_train[:, i],
      #  'Predicted Values': y_train_pred.flatten()
    #}))
    print(f"Output {i+1} - Training R2 Score: {r2_train}")
    print(f"Output {i+1} - Training Average Relative Error: {avg_relative_error_train}")

    # Print testing data and metrics
    #print(f"Testing Data for Output {i+1}:")
    #print(pd.DataFrame({
     #   'True Values': y_test[:, i],
      #  'Predicted Values': y_test_pred.flatten()
    #}))
    print(f"Output {i+1} - Testing Mean Squared Error: {mse_test}")
    print(f"Output {i+1} - Testing R2 Score: {r2_test}")
    print(f"Output {i+1} - Testing Average Relative Error: {avg_relative_error_test}")

    # Plot the true vs predicted values for the current output
    output_names = data.columns[-4:]
    plt.figure()
    plt.scatter(y_test[:, i], y_test_pred)
    plt.xlabel('True Values')
    plt.ylabel('Predicted Values')
    plt.title(output_names[i])
    max_value = max(np.max(y_test[:, i]), np.max(y_test_pred))
    plt.xlim(0, max_value + 0.1 * max_value)
    plt.ylim(0, max_value + 0.1 * max_value)
    plt.xlim(-1,1)
    plt.ylim(-1,1)
    plt.savefig(f'output_{i+1}_scatter.png')
    plt.close()

# Save overall scores to a CSV file
metrics_df = pd.DataFrame({
    'Output': [f'Output {i+1}' for i in range(4)],
    'Mean Squared Error': mse_scores,
    'R2 Score (Train)': r2_scores_train,
    'R2 Score (Test)': r2_scores_test,
    'Average Relative Error (Train)': avg_relative_errors_train,
    'Average Relative Error (Test)': avg_relative_errors_test
})

metrics_df.to_csv('model_metrics.csv', index=False)
  
# Save all data to an Excel file
with pd.ExcelWriter('model_evaluation.xlsx') as writer:
    # Save metrics
    metrics_df.to_excel(writer, sheet_name='Metrics', index=False)

    # Save true and predicted values for training and testing data
    for i in range(4):
        train_data[i].to_excel(writer, sheet_name=f'Training Output {i+1}', index=False)
        test_data[i].to_excel(writer, sheet_name=f'Testing Output {i+1}', index=False)


# Print overall scores
print("Overall Scores:")
print(metrics_df)

# Save overall scores plot
plt.figure()
plt.plot(metrics_df['Output'], metrics_df['Mean Squared Error'], label='Mean Squared Error')
plt.plot(metrics_df['Output'], metrics_df['R2 Score (Train)'], label='R2 Score (Train)')
plt.plot(metrics_df['Output'], metrics_df['R2 Score (Test)'], label='R2 Score (Test)')
plt.plot(metrics_df['Output'], metrics_df['Average Relative Error (Train)'], label='Average Relative Error (Train)')
plt.plot(metrics_df['Output'], metrics_df['Average Relative Error (Test)'], label='Average Relative Error (Test)')
plt.xlabel('Output')
plt.ylabel('Metrics')
plt.title('Model Metrics')
plt.legend()
plt.savefig('model_metrics.png')
plt.close()



Processing Output 1...

Epoch 1/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: 1.0500 - val_loss: 0.6845
Epoch 2/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.9894 - val_loss: 0.6156
Epoch 3/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.8156 - val_loss: 0.5608
Epoch 4/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7855 - val_loss: 0.4883
Epoch 5/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7476 - val_loss: 0.4168
Epoch 6/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.5853 - val_loss: 0.3368
Epoch 7/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.5258 - val_loss: 0.2819
Epoch 8/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.4702 - val_loss: 0.2365
Epoch 9/100
[1m12/12

In [3]:
print(metrics_df)

     Output  Mean Squared Error  R2 Score (Train)  R2 Score (Test)  \
0  Output 1            0.011723          0.982130         0.969180   
1  Output 2            0.008441          0.973045         0.939189   
2  Output 3            0.014213          0.958968         0.891998   
3  Output 4            0.013591          0.945692         0.898550   

   Average Relative Error (Train)  Average Relative Error (Test)  
0                        4.903148                       2.341714  
1                        0.870637                       0.931142  
2                        0.928750                       1.186975  
3                        1.110064                       1.275070  
