In [3]:
# Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import MultiOutputRegressor
import xgboost as xgb

# Load data from CSV file
data = pd.read_csv('Book2.csv')

# Create interaction features
data['R_phi2_interaction'] = data['R'] * data['phi 2']

# Select features and target variables
X = data[['R', 'phi 2', 'R_phi2_interaction']].values
y = data[['A=B=0', 'A=0  B=1', 'A=1  B=0', 'A=B=1']].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the models (wrap models with MultiOutputRegressor)
models = {
    'Linear Regression': MultiOutputRegressor(LinearRegression()),
    'Random Forest': MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42)),
    'Gradient Boosting': MultiOutputRegressor(GradientBoostingRegressor(n_estimators=100, random_state=42)),
    'Support Vector Regression': MultiOutputRegressor(SVR()),
    'K-Nearest Neighbors': MultiOutputRegressor(KNeighborsRegressor()),
    'AdaBoost Regressor': MultiOutputRegressor(AdaBoostRegressor(n_estimators=100, random_state=42)),
    'XGBoost Regressor': MultiOutputRegressor(xgb.XGBRegressor(n_estimators=100, random_state=42))
}

# Train and evaluate models
results = {}

for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Evaluate the model using Mean Squared Error
    mse = mean_squared_error(y_test, y_pred)
    results[model_name] = mse

# Print the results
for model_name, mse in results.items():
    print(f'{model_name} MSE: {mse}')


Linear Regression MSE: 0.031925526997707024
Random Forest MSE: 0.04831769200000004
Gradient Boosting MSE: 0.022358337586655355
Support Vector Regression MSE: 0.09430770865896448
K-Nearest Neighbors MSE: 0.13040739999999998
AdaBoost Regressor MSE: 0.061412534722222224
XGBoost Regressor MSE: 0.03683723025589658


In [5]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.multioutput import MultiOutputRegressor

# Load data from CSV file
data = pd.read_csv('Book2.csv')

# Create interaction features
data['R_phi2_interaction'] = data['R'] * data['phi 2']

# Select features and target variables
X = data[['R', 'phi 2', 'R_phi2_interaction']].values
y = data[['A=B=0', 'A=0  B=1', 'A=1  B=0', 'A=B=1']].values

# Check shape of y (should be 2D)
print(f"Shape of y: {y.shape}")

# Normalize the features
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train separate Gradient Boosting models for each target variable using MultiOutputRegressor
gb_model = MultiOutputRegressor(GradientBoostingRegressor(n_estimators=100, random_state=42))

# Fit the model to the training data
gb_model.fit(X_train, y_train)

# Predict on the test set
gb_test_predictions = gb_model.predict(X_test)

# Evaluate the Gradient Boosting models
mae_scenario = {}
for i in range(4):
    mae_scenario[f'Scenario {i}'] = mean_absolute_error(y_test[:, i], gb_test_predictions[:, i])
    print(f"Mean Absolute Error (MAE) for Scenario {i}: {mae_scenario[f'Scenario {i}']}")

# Function to predict new values
def predict_new_values(R, phi2):
    new_data = np.array([[R, phi2, R * phi2]])
    new_data = scaler.transform(new_data)
    predictions = gb_model.predict(new_data)
    return predictions[0]

# Define the input values for R and phi2
R_values = [0.2, 0.2, 0.2, 0.2, 0.2, 
    0.3, 0.3, 0.3, 0.3, 0.3,
    0.4, 0.4, 0.4, 0.4, 0.4, 
    0.5, 0.5, 0.5, 0.5, 0.5, 
    0.6, 0.6, 0.6, 0.6, 0.6]
phi2_values = [0, 45, 90, -45, -90, 
    0, 45, 90, -45, -90, 
    0, 45, 90, -45, -90, 
    0, 45, 90, -45, -90, 
    0, 45, 90, -45, -90]

# Generate unique combinations of R and phi2
unique_pairs = list(set(zip(R_values, phi2_values)))

# Predict for each unique combination of R and phi2
results_dict = {}
predictions = {'A=B=0': [], 'A=0  B=1': [], 'A=1  B=0': [], 'A=B=1': []}
for R, phi2 in unique_pairs:
    pred = predict_new_values(R, phi2)
    results_dict[(R, phi2)] = pred
    predictions['A=B=0'].append(pred[0])
    predictions['A=0  B=1'].append(pred[1])
    predictions['A=1  B=0'].append(pred[2])
    predictions['A=B=1'].append(pred[3])

# Convert predictions to numpy arrays for calculations
predictions = {key: np.array(value) for key, value in predictions.items()}

# Filter out predictions where 'A=B=0' or 'A=0  B=1' is above 1.1
filter_mask = (predictions['A=B=0'] <= 1.1) & (predictions['A=0  B=1'] <= 1.05)
filtered_predictions = {key: predictions[key][filter_mask] for key in predictions}

# Calculate optimize_R formulas
optimize_R_XOR = (1 * filtered_predictions['A=1  B=0']) * filtered_predictions['A=0  B=1'] / (1 * filtered_predictions['A=B=1'])

# Prepare R and phi2 values for the DataFrame
R_filtered = np.array([pair[0] for pair in unique_pairs])[filter_mask]
phi2_filtered = np.array([pair[1] for pair in unique_pairs])[filter_mask]

# Store the results in a DataFrame
results_df = pd.DataFrame({
    'R': R_filtered,
    'phi2': phi2_filtered,
    'GB_preds_AB_0': filtered_predictions['A=B=0'],
    'GB_preds_A_1B_0': filtered_predictions['A=0  B=1'],
    'GB_preds_A_0B_1': filtered_predictions['A=1  B=0'],
    'GB_preds_AB_1': filtered_predictions['A=B=1'],
    'optimize_R_XOR': optimize_R_XOR,
})

# Sort by optimize_R_XOR in descending order
sorted_XOR = results_df.sort_values(by='optimize_R_XOR', ascending=False).head(7)
print("\nTop 7 sorted by optimize_R_XOR:")
print(sorted_XOR)


Shape of y: (25, 4)
Mean Absolute Error (MAE) for Scenario 0: 0.0
Mean Absolute Error (MAE) for Scenario 1: 5.716013040601986e-06
Mean Absolute Error (MAE) for Scenario 2: 4.1223291073810845e-06
Mean Absolute Error (MAE) for Scenario 3: 0.2596359995208488

Top 7 sorted by optimize_R_XOR:
      R  phi2  GB_preds_AB_0  GB_preds_A_1B_0  GB_preds_A_0B_1  GB_preds_AB_1  \
15  0.4   -90            0.0         0.530002         0.699999       0.031725   
10  0.3   -90            0.0         0.400005         0.799996       0.090402   
1   0.5   -90            0.0         0.749996         0.550002       0.200270   
5   0.2   -45            0.0         0.350007         0.819995       0.141639   
16  0.3   -45            0.0         0.400005         0.799996       0.202827   
23  0.2   -90            0.0         0.350007         0.819995       0.198205   
11  0.6   -90            0.0         0.999990         0.350008       0.399008   

    optimize_R_XOR  
15       11.694437  
10        3.539766  