In [1]:
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from scipy.optimize import minimize
import warnings
warnings.filterwarnings('ignore')

# Historical data for Function 6 (weeks 1-3)
X = np.array([
    [0.75, 0.75, 0.75, 0.75, 0.75],           # Week 1 input
    [0.3, 0.3, 0.3, 0.3, 0.3],                # Week 2 input
    [0.489055, 0.019291, 0.44768, 0.402488, 0.315673]   # Week 3 input
])

y = np.array([
    -1.5206776627405865,   # Week 1 output
    -1.1386161416875031,   # Week 2 output
    -1.1230704411246568    # Week 3 output
])

print("Function 6 Training Data:")
print("="*60)
for i in range(len(X)):
    print(f"Week {i+1}: Input = {X[i]} → Output = {y[i]:.6f}")
print()

# Method 1: Linear Regression
print("Method 1: Linear Regression")
print("-"*60)
lr_model = LinearRegression()
lr_model.fit(X, y)

# Method 2: Gradient Boosting
print("Method 2: Gradient Boosting")
print("-"*60)
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model.fit(X, y)

# Method 3: Random Forest
print("Method 3: Random Forest")
print("-"*60)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Method 4: Gaussian Process
print("Method 4: Gaussian Process")
print("-"*60)
kernel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-3, 1e3))
gp_model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, random_state=42)
gp_model.fit(X, y)

# Method 5: Bayesian Optimization using GP
print("Method 5: Bayesian Optimization")
print("-"*60)

def acquisition_function(x, model):
    """Expected Improvement acquisition function"""
    x = x.reshape(1, -1)
    mu, sigma = model.predict(x, return_std=True)
    # We want to maximize, so we negate for minimization
    return -mu[0]

# Use Bayesian Optimization to find best input
best_score = float('-inf')
best_input_bo = None

# Try multiple random starts for optimization
for _ in range(30):
    x0 = np.random.uniform(0.001, 0.5, 5)
    
    result = minimize(
        lambda x: acquisition_function(x, gp_model),
        x0,
        method='L-BFGS-B',
        bounds=[(0.001, 1.0)] * 5
    )
    
    if result.success:
        pred_value = gp_model.predict(result.x.reshape(1, -1))[0]
        if pred_value > best_score:
            best_score = pred_value
            best_input_bo = result.x

print(f"Bayesian Optimization found: {best_input_bo}")
print()

# Test candidate inputs
candidates = np.array([
    [0.2, 0.2, 0.2, 0.2, 0.2],
    [0.25, 0.25, 0.25, 0.25, 0.25],
    [0.3, 0.3, 0.3, 0.3, 0.3],
    [0.35, 0.35, 0.35, 0.35, 0.35],
    [0.4, 0.4, 0.4, 0.4, 0.4],
    [0.28, 0.32, 0.3, 0.29, 0.31],  # Slightly varied
    best_input_bo  # Add BO result
])

print("\nPredictions for candidate inputs:")
print("="*60)
print(f"{'Input':<50} {'LR':<10} {'GB':<10} {'RF':<10} {'GP':<10}")
print("-"*60)

all_predictions = []
for candidate in candidates:
    lr_pred = lr_model.predict([candidate])[0]
    gb_pred = gb_model.predict([candidate])[0]
    rf_pred = rf_model.predict([candidate])[0]
    gp_pred = gp_model.predict([candidate])[0]
    
    avg_pred = (lr_pred + gb_pred + rf_pred + gp_pred) / 4
    all_predictions.append(avg_pred)
    
    print(f"{str(candidate):<50} {lr_pred:<10.6f} {gb_pred:<10.6f} {rf_pred:<10.6f} {gp_pred:<10.6f}")

print()
print("Ensemble Predictions (average of all models):")
print("-"*60)
for i, candidate in enumerate(candidates):
    print(f"{str(candidate):<50} → {all_predictions[i]:.6f}")

# Choose best based on ensemble
best_idx = np.argmax(all_predictions)
best_input = candidates[best_idx]

print()
print("="*60)
print(f"RECOMMENDED Week 4 submission for Function 6: {best_input}")
print(f"Expected output: {all_predictions[best_idx]:.6f}")
print("="*60)

# Additional analysis
print("\nAdditional Analysis:")
print("-"*60)
print("Observation: All outputs are negative")
print("Week 1: [0.75] (uniform high) → -1.52 (worst)")
print("Week 2: [0.30] (uniform medium) → -1.14 (better)")
print("Week 3: [varied] → -1.12 (best so far)")
print("Pattern: Lower and/or more varied values seem to perform better")
print("Goal: Get closest to zero (maximize)")

Function 6 Training Data:
Week 1: Input = [0.75 0.75 0.75 0.75 0.75] → Output = -1.520678
Week 2: Input = [0.3 0.3 0.3 0.3 0.3] → Output = -1.138616
Week 3: Input = [0.489055 0.019291 0.44768  0.402488 0.315673] → Output = -1.123070

Method 1: Linear Regression
------------------------------------------------------------
Method 2: Gradient Boosting
------------------------------------------------------------
Method 3: Random Forest
------------------------------------------------------------
Method 4: Gaussian Process
------------------------------------------------------------
Method 5: Bayesian Optimization
------------------------------------------------------------
Bayesian Optimization found: [0.001 0.001 0.001 0.001 0.001]


Predictions for candidate inputs:
Input                                              LR         GB         RF         GP        
------------------------------------------------------------
[0.2 0.2 0.2 0.2 0.2]                              -1.053714  -1.1386

In [3]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, WhiteKernel
from scipy.optimize import minimize
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# 1. DATA PREPARATION (Extracted from your files)
# Inputs for Function 6 (3 weeks of history)
X = np.array([
    [0.75, 0.75, 0.75, 0.75, 0.75],                 # Week 1
    [0.3, 0.3, 0.3, 0.3, 0.3],                      # Week 2
    [0.489055, 0.019291, 0.44768, 0.402488, 0.315673] # Week 3
])

# Outputs for Function 6
y = np.array([
    -1.5206776627405865,
    -1.1386161416875031,
    -1.1230704411246568
])

# 2. MODEL DEFINITION
# We use a Gaussian Process with a Radial Basis Function (RBF) kernel.
# We include a WhiteKernel to account for potential noise in the observations.
kernel = C(1.0, (1e-3, 1e3)) * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2)) + \
         WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e-1))

gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=True)

# 3. TRAINING
gp.fit(X, y)

# 4. OPTIMIZATION (Bayesian Optimization Step)
# We want to find x that maximizes the GP prediction. 
# Since scipy.minimize finds the minimum, we minimize the negative prediction.
def objective_function(x):
    # Reshape x to (1, -1) as predict expects a 2D array
    prediction, sigma = gp.predict(x.reshape(1, -1), return_std=True)
    
    # Exploration vs Exploitation (UCB - Upper Confidence Bound)
    # Kappa controls the balance. Higher kappa = more exploration.
    kappa = 1.96 
    return -(prediction + kappa * sigma) # Negative because we want to maximize

# Constraints: Inputs must be positive. 
# We assume a reasonable search bound of [0.0, 1.0] based on previous data.
# If you suspect values can be higher, change 1.0 to a higher number.
bounds = [(0.001, 1.0) for _ in range(5)]

# Start the search from the best point we have found so far (Week 3)
x0 = X[2]

# Run the optimizer
res = minimize(
    objective_function, 
    x0, 
    bounds=bounds, 
    method='L-BFGS-B'
)

# 5. RESULT
print("--- Recommended Submission for Function 6 (Week 4) ---")
print("Predicted Optimal Input:", np.round(res.x, 6))
print("Predicted Output Value: ", np.round(-res.fun, 6)) # Note: this is the UCB value

--- Recommended Submission for Function 6 (Week 4) ---
Predicted Optimal Input: [0.688569 0.001    0.03538  0.001    0.001   ]
Predicted Output Value:  -0.861688
