In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("/content/Function_Catalogue_Round_8_ExactStructure.csv")

# Preview the structure
df.head()

FileNotFoundError: [Errno 2] No such file or directory: '/content/Function_Catalogue_Round_8_ExactStructure.csv'

In [None]:
# Display column names
print("Columns:", df.columns.tolist())

# Remove leading/trailing spaces if necessary
df.columns = df.columns.str.strip()

In [None]:
# Unique functions and themes
print("Unique Functions:", df['Function'].unique())
print("Unique Themes:", df['Theme'].unique())

In [None]:
# Missing data
missing_summary = df.isnull().sum()
print("Missing values per column:\n", missing_summary)

# View samples with missing values
df[df.isnull().any(axis=1)].head()

In [None]:
# Show sample InputVec values
df['InputVec'].head(10).tolist()

In [None]:
import re
import numpy as np

def parse_inputvec(vec):
    try:
        # Handle dash-separated simple format: "0.123-0.456"
        if '-' in vec and 'array' not in vec and '[' not in vec:
            parts = vec.split('-')
            return float(parts[0]), float(parts[1])
        # Handle formats like: "[np.float64(...), np.float64(...)]"
        nums = re.findall(r"[-+]?\d*\.\d+|\d+e[-+]?\d+", vec)
        if len(nums) >= 2:
            return float(nums[0]), float(nums[1])
        else:
            return np.nan, np.nan
    except:
        return np.nan, np.nan

# Apply the parser
df[['x1', 'x2']] = df['InputVec'].apply(lambda x: pd.Series(parse_inputvec(str(x))))

# Convert Score to numeric safely
df['Score'] = pd.to_numeric(df['Score'], errors='coerce')

# Show cleaned output
df[['InputVec', 'x1', 'x2', 'Score']].head()

In [None]:
df.sort_values('Score', ascending=True).head(10)  # lowest score = best?

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='x1', y='x2', hue='Score', palette='coolwarm')
plt.title("Function Landscape: InputVec vs Score")
plt.show()

In [None]:
def parse_vec_general(vec):
    try:
        nums = re.findall(r"[-+]?\d*\.\d+|\d+e[-+]?\d+", str(vec))
        floats = [float(n) for n in nums]
        while len(floats) < 4:  # pad short vectors
            floats.append(np.nan)
        return pd.Series(floats[:4], index=["x1", "x2", "x3", "x4"])
    except:
        return pd.Series([np.nan, np.nan, np.nan, np.nan], index=["x1", "x2", "x3", "x4"])

# Apply to all InputVec
df[['x1', 'x2', 'x3', 'x4']] = df['InputVec'].apply(parse_vec_general)

# Confirm parsing worked
df[['InputVec', 'x1', 'x2', 'x3', 'x4']].head(10)

In [None]:
# Focus only on F4
f4 = df[df['Function'] == 'F4']

# 3D visualization (first 3 dimensions)
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')
p = ax.scatter(f4['x1'], f4['x2'], f4['x3'], c=f4['Score'], cmap='coolwarm')
ax.set_title('F4 Function - First 3 Dimensions')
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('x3')
fig.colorbar(p)
plt.show()

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import random

# Filter data for F4 and drop NaNs
f4_df = df[df['Function'] == 'F4'].dropna(subset=['x1', 'x2', 'x3', 'x4', 'Score'])

# Prepare data for model
X = f4_df[['x1', 'x2', 'x3', 'x4']].values
y = f4_df['Score'].values

# Invert score since GA maximizes by default
y = -y

# Train a GP surrogate to guide fitness evaluation
kernel = RBF()
gp = GaussianProcessRegressor(kernel=kernel)
gp.fit(X, y)

# --- Genetic Algorithm Settings ---
POP_SIZE = 50
N_GEN = 20
N_ELITE = 10
MUTATION_RATE = 0.1

def random_candidate():
    return np.random.uniform(0, 1, 4)

def mutate(vec):
    return np.clip(vec + np.random.normal(0, 0.05, size=4), 0, 1)

def crossover(p1, p2):
    alpha = np.random.rand()
    return np.clip(alpha * p1 + (1 - alpha) * p2, 0, 1)

def fitness(cand):
    return -gp.predict(cand.reshape(1, -1))[0]  # Negative because original scores are negative

# --- Initialize population ---
population = np.array([random_candidate() for _ in range(POP_SIZE)])

for gen in range(N_GEN):
    scores = np.array([fitness(ind) for ind in population])
    elite_indices = scores.argsort()[:N_ELITE]
    elite = population[elite_indices]

    new_population = elite.tolist()
    while len(new_population) < POP_SIZE:
        parents = random.sample(list(elite), 2)
        child = crossover(parents[0], parents[1])
        if np.random.rand() < MUTATION_RATE:
            child = mutate(child)
        new_population.append(child)

    population = np.array(new_population)

# --- Output Top Candidates ---
final_scores = [fitness(ind) for ind in population]
top_indices = np.argsort(final_scores)[:10]
top_candidates = population[top_indices]

# Display top 10 new vectors
import pandas as pd
best_f4_vectors = pd.DataFrame(top_candidates, columns=['x1', 'x2', 'x3', 'x4'])
print(best_f4_vectors)

In [None]:
# Convert to dash-separated InputVec format
best_f4_vectors['InputVec'] = best_f4_vectors.apply(lambda row:
    f"{row['x1']:.6f}-{row['x2']:.6f}-{row['x3']:.6f}-{row['x4']:.6f}", axis=1)

# Add function metadata
best_f4_vectors['Function'] = 'F4'
best_f4_vectors['Theme'] = 'Fast, but Inaccurate Modelling'
best_f4_vectors['Score'] = ''  # placeholder until scored

# Rearrange columns
submission_ready = best_f4_vectors[['Function', 'Theme', 'InputVec', 'Score']]

# Export
submission_ready.to_csv("F4_GA_Submission_Round9.csv", index=False)
submission_ready.head()

In [None]:
# Manually define your selections — these should be filled with YOUR best vectors
submission = pd.DataFrame([
    {"Function": "F1", "Theme": "Searching for Contamination Sources", "InputVec": "0.574329-0.879898", "Score": ""},
    {"Function": "F2", "Theme": "Optimising Noisy Models", "InputVec": "0.712000-0.489000", "Score": ""},
    {"Function": "F3", "Theme": "Drug Discovery Problem", "InputVec": "0.842310-0.512783", "Score": ""},
    {"Function": "F4", "Theme": "Fast, but Inaccurate Modelling", "InputVec": "0.726952-0.189810-0.835269-0.100856", "Score": ""},
    {"Function": "F5", "Theme": "Yield in a Chemical Reaction", "InputVec": "0.789315-0.734624", "Score": ""},
    {"Function": "F6", "Theme": "Cake and Stuff", "InputVec": "0.562398-0.627903", "Score": ""},
    {"Function": "F7", "Theme": "Sometimes Lazy is Best", "InputVec": "0.392748-0.682337", "Score": ""},
    {"Function": "F8", "Theme": "High-dimensional Optimisation", "InputVec": "0.688905-0.968377", "Score": ""}
])

# Export to CSV
submission.to_csv("Round9_Full_Submission.csv", index=False)
submission

In [None]:
import pandas as pd

# Replace with your actual filename if different
df = pd.read_csv("/content/Function_Catalogue_Round_8_ExactStructure.csv")
df.columns = df.columns.str.strip()

In [None]:
import re
import numpy as np

def parse_vec_general(vec):
    try:
        nums = re.findall(r"[-+]?\d*\.\d+|\d+e[-+]?\d+", str(vec))
        floats = [float(n) for n in nums]
        while len(floats) < 4:  # pad short vectors
            floats.append(np.nan)
        return pd.Series(floats[:4], index=["x1", "x2", "x3", "x4"])
    except:
        return pd.Series([np.nan, np.nan, np.nan, np.nan], index=["x1", "x2", "x3", "x4"])

df[['x1', 'x2', 'x3', 'x4']] = df['InputVec'].apply(parse_vec_general)

In [None]:
# Filter Round 8 submissions
round8_submissions = df[df['Theme'] == 'Submission Round 8']

# Show function + InputVec for Round 8
round8_summary = round8_submissions[['Function', 'InputVec']].dropna().reset_index(drop=True)
round8_summary

In [None]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=round8_summary)

In [None]:
import pandas as pd

# Create a list of your submission vectors in order (F1 to F8)
submission_vectors = [
    "0.574329-0.879898",                                      # F1 (2D)
    "0.712000-0.489000",                                      # F2 (2D)
    "0.842310-0.512783-0.335000",                             # F3 (3D) ← make sure this is 3D
    "0.726952-0.189810-0.835269-0.100856",                    # F4 (4D)
    "0.789315-0.734624-0.122222-0.444444",                    # F5 (4D) ← expand to 5D if needed
    "0.562398-0.627903-0.300000-0.450000-0.123456",           # F6 (5D)
    "0.392748-0.682337-0.400000-0.600000-0.250000-0.123000",  # F7 (6D)
    "0.688905-0.968377-0.100000-0.200000-0.300000-0.400000-0.500000-0.600000"  # F8 (8D)
]

# Convert to DataFrame
submission_df = pd.DataFrame(submission_vectors, columns=["InputVec"])

# Export to CSV
submission_df.to_csv("Capstone_Round9_Submission_Format.csv", index=False, header=False)

In [None]:
import pandas as pd

# Replace these with your actual final vectors for F1 to F8
submission_vectors = [
    "0.574329-0.879898",                                      # F1 (2D)
    "0.712000-0.489000",                                      # F2 (2D)
    "0.842310-0.512783-0.335000",                             # F3 (3D)
    "0.726952-0.189810-0.835269-0.100856",                    # F4 (4D)
    "0.789315-0.734624-0.122222-0.444444-0.311111",           # F5 (5D)
    "0.562398-0.627903-0.300000-0.450000-0.123456",           # F6 (5D)
    "0.392748-0.682337-0.400000-0.600000-0.250000-0.123000",  # F7 (6D)
    "0.688905-0.968377-0.100000-0.200000-0.300000-0.400000-0.500000-0.600000"  # F8 (8D)
]

# Create DataFrame
submission_df = pd.DataFrame(submission_vectors, columns=["InputVec"])

# Preview the final submission
print("✅ Your Round 9 Submission Format:\n")
print(submission_df.to_string(index=False, header=False))

# Save to CSV with no header or index
submission_df.to_csv("Capstone_Round9_Submission_Format.csv", index=False, header=False)

In [None]:
import pandas as pd
import numpy as np

# Embed your Round 9 queries and observations directly into the code
queries = [
    [0.944034, 0.625117],
    [0.768771, 0.105777],
    [0.1456, 0.944454, 0.638992],
    [0.999363, 0.440618, 0.632742, 0.573562],
    [0.290636, 0.824504, 0.580542, 0.368022],
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],
    [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193],
    [0.25682, 0.180489, 0.295402, 0.830171, 0.672892, 0.287181, 0.913031, 0.782776]
]

observations = [
    [1.3276592050304897e-69, 0.27308818432797793, -0.10726379301566011, -20.949420794091107,
     13.377756436400528, -1.1857272137575898, 0.023506147714804363, 8.3316420728934],
    [6.933250699386668e-70, 0.09312933125476712, -0.15237863681921446, -22.018089179607596,
     12.979330536136526, -1.1838236564336748, 0.023506147714804363, 8.434940976893401],
    [6.933250699386668e-70, 0.23562706897099497, -0.14698930107649133, -22.018089179607596,
     12.979330536136526, -1.263310156017569, 0.023506147714804363, 8.434940976893401],
    [4.6015626100136584e-113, 0.33828687086483444, -0.11198066229665458, -20.949420794091107,
     13.377756436400528, -1.0715940396376764, 0.007250474515368666, 8.434940976893401],
    [4.6015626100136584e-113, 0.2358527631144813, -0.11658849810095136, -20.949420794091107,
     13.377756436400528, -1.108280528164095, 0.007250474515368666, 8.434940976893401],
    [4.6015626100136584e-113, 0.19467655335464928, -0.10701392790144047, -20.949420794091107,
     13.377756436400528, -1.0802789662503398, 0.007250474515368666, 8.434940976893401],
    [-6.3577541156068885e-86, 0.1094856197569318, -0.12400622516054445, -20.387612583330036,
     13.377756436400528, -1.1219255105599752, 0.03872554610994976, 8.294127480893401],
    [-6.3577541156068885e-86, 0.10969400967393614, -0.1271560099068078, -20.387612583330036,
     13.377756436400528, -1.09808634927923, 0.03872554610994976, 8.294127480893401]
]

# Create a DataFrame for comparison
comparison_data = []
for i, (query, obs) in enumerate(zip(queries, observations)):
    comparison_data.append({
        'Query_Index': i+1,
        'Query_Vector': query,
        'Observation_Score_Vector': obs
    })

df_comparison = pd.DataFrame(comparison_data)

# Save to CSV (optional, for your records)
comparison_file = "/content/Round9_Query_Observation_Comparison.csv"
df_comparison.to_csv(comparison_file, index=False)

# Display the DataFrame in notebook
df_comparison

In [None]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import random

# --- Step 1: Define your Round 9 queries and observations ---
queries = [
    [0.944034, 0.625117],  # F1 (2D)
    [0.768771, 0.105777],  # F2 (2D)
    [0.1456, 0.944454, 0.638992],  # F3 (3D)
    [0.999363, 0.440618, 0.632742, 0.573562],  # F4 (4D)
    [0.290636, 0.824504, 0.580542, 0.368022],  # F5 (4D)
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],  # F6 (5D)
    [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193],  # F7 (6D)
    [0.25682, 0.180489, 0.295402, 0.830171, 0.672892, 0.287181, 0.913031, 0.782776]  # F8 (8D)
]

observations_scores = [
    8.3316420728934,  # F1
    8.434940976893401,  # F2
    8.434940976893401,  # F3
    8.434940976893401,  # F4
    8.434940976893401,  # F5
    8.434940976893401,  # F6
    8.294127480893401,  # F7
    8.294127480893401   # F8
]

# --- Step 2: Loop per function and apply GP + GA ---
results = []

for idx, (query, score) in enumerate(zip(queries, observations_scores)):
    dims = len(query)
    X_data = np.array([query])
    y = -np.array([score])

    # Train GP model
    kernel = RBF()
    gp = GaussianProcessRegressor(kernel=kernel)
    gp.fit(X_data, y)

    # Genetic Algorithm settings
    POP_SIZE = 100
    N_GEN = 30
    N_ELITE = 10
    MUTATION_RATE = 0.1

    def random_candidate():
        return np.random.uniform(0, 1, dims)

    def mutate(vec):
        return np.clip(vec + np.random.normal(0, 0.05, size=dims), 0, 1)

    def crossover(p1, p2):
        alpha = np.random.rand()
        return np.clip(alpha * p1 + (1 - alpha) * p2, 0, 1)

    def fitness(cand):
        return -gp.predict(cand.reshape(1, -1))[0]

    # Initialize population
    population = np.array([random_candidate() for _ in range(POP_SIZE)])

    # Run GA
    for gen in range(N_GEN):
        scores = np.array([fitness(ind) for ind in population])
        elite_indices = scores.argsort()[:N_ELITE]
        elite = population[elite_indices]

        new_population = elite.tolist()
        while len(new_population) < POP_SIZE:
            parents = random.sample(list(elite), 2)
            child = crossover(parents[0], parents[1])
            if np.random.rand() < MUTATION_RATE:
                child = mutate(child)
            new_population.append(child)

        population = np.array(new_population)

    # Output best candidates for this function
    final_scores = [fitness(ind) for ind in population]
    top_indices = np.argsort(final_scores)[:5]
    top_candidates = population[top_indices]

    best_vectors = pd.DataFrame(top_candidates, columns=[f'x{i+1}' for i in range(dims)])
    best_vectors['InputVec'] = best_vectors.apply(lambda row: '-'.join([f"{row[f'x{i+1}']:.6f}" for i in range(dims)]), axis=1)
    best_vectors['Predicted_Score'] = [-fitness(ind) for ind in top_candidates]
    best_vectors['Function'] = f'F{idx+1}'

    results.append(best_vectors[['Function', 'InputVec', 'Predicted_Score']])

# --- Step 3: Combine and show all results ---
df_results = pd.concat(results, ignore_index=True)
df_results

In [None]:
# --- Step 4: Prepare submission format (1 best vector per function) ---
# Select the best (highest predicted score) per function
submission_list = []

for func in df_results['Function'].unique():
    func_df = df_results[df_results['Function'] == func]
    best_row = func_df.sort_values('Predicted_Score', ascending=False).iloc[0]
    submission_list.append({
        'Function': func,
        'InputVec': best_row['InputVec']
    })

df_submission = pd.DataFrame(submission_list)

# Export to CSV (Google Form-ready: Function - InputVec columns)
submission_file = "/content/Round10_Submission_Format.csv"
df_submission.to_csv(submission_file, index=False)

# Show submission table
df_submission

In [None]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import random

# Step 1: Define your queries and observed scores per function
queries = [
    [0.944034, 0.625117],  # F1
    [0.768771, 0.105777],  # F2
    [0.1456, 0.944454, 0.638992],  # F3
    [0.999363, 0.440618, 0.632742, 0.573562],  # F4
    [0.290636, 0.824504, 0.580542, 0.368022],  # F5
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],  # F6
    [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193],  # F7
    [0.25682, 0.180489, 0.295402, 0.830171, 0.672892, 0.287181, 0.913031, 0.782776]  # F8
]

observations_scores = [
    8.3316420728934,
    8.434940976893401,
    8.434940976893401,
    8.434940976893401,
    8.434940976893401,
    8.434940976893401,
    8.294127480893401,
    8.294127480893401
]

# Step 2: Train GP + GA per function
results = []

for idx, (query, score) in enumerate(zip(queries, observations_scores)):
    dims = len(query)
    X_data = np.array([query])
    y = -np.array([score])

    kernel = RBF()
    gp = GaussianProcessRegressor(kernel=kernel)
    gp.fit(X_data, y)

    POP_SIZE = 100
    N_GEN = 30
    N_ELITE = 10
    MUTATION_RATE = 0.1

    def random_candidate():
        return np.random.uniform(0, 1, dims)

    def mutate(vec):
        return np.clip(vec + np.random.normal(0, 0.05, size=dims), 0, 1)

    def crossover(p1, p2):
        alpha = np.random.rand()
        return np.clip(alpha * p1 + (1 - alpha) * p2, 0, 1)

    def fitness(cand):
        return -gp.predict(cand.reshape(1, -1))[0]

    population = np.array([random_candidate() for _ in range(POP_SIZE)])

    for gen in range(N_GEN):
        scores = np.array([fitness(ind) for ind in population])
        elite_indices = scores.argsort()[:N_ELITE]
        elite = population[elite_indices]

        new_population = elite.tolist()
        while len(new_population) < POP_SIZE:
            parents = random.sample(list(elite), 2)
            child = crossover(parents[0], parents[1])
            if np.random.rand() < MUTATION_RATE:
                child = mutate(child)
            new_population.append(child)

        population = np.array(new_population)

    final_scores = [fitness(ind) for ind in population]
    top_indices = np.argsort(final_scores)[:5]
    top_candidates = population[top_indices]

    best_vectors = pd.DataFrame(top_candidates, columns=[f'x{i+1}' for i in range(dims)])
    best_vectors['InputVec'] = best_vectors.apply(lambda row: '-'.join([f"{row[f'x{i+1}']:.6f}" for i in range(dims)]), axis=1)
    best_vectors['Predicted_Score'] = [-fitness(ind) for ind in top_candidates]
    best_vectors['Function'] = f'F{idx+1}'

    results.append(best_vectors[['Function', 'InputVec', 'Predicted_Score']])

df_results = pd.concat(results, ignore_index=True)
df_results

In [None]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import random

# Example Step 1: Combine all known queries and observed scores (currently only Round 9, needs your past data added)
queries = [
    [0.944034, 0.625117],  # F1
    [0.768771, 0.105777],  # F2
    [0.1456, 0.944454, 0.638992],  # F3
    [0.999363, 0.440618, 0.632742, 0.573562],  # F4
    [0.290636, 0.824504, 0.580542, 0.368022],  # F5
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],  # F6
    [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193],  # F7
    [0.25682, 0.180489, 0.295402, 0.830171, 0.672892, 0.287181, 0.913031, 0.782776]  # F8
]

observations_scores = [
    8.3316420728934,
    8.434940976893401,
    8.434940976893401,
    8.434940976893401,
    8.434940976893401,
    8.434940976893401,
    8.294127480893401,
    8.294127480893401
]

# ⚠ Here you need to append your Round 1–8 queries and scores manually
# Example:
# queries.extend([...])
# observations_scores.extend([...])

results = []

for idx, (query, score) in enumerate(zip(queries, observations_scores)):
    dims = len(query)
    X_data = np.array([query])
    y = -np.array([score])

    kernel = RBF()
    gp = GaussianProcessRegressor(kernel=kernel)
    gp.fit(X_data, y)

    POP_SIZE = 100
    N_GEN = 30
    N_ELITE = 10
    MUTATION_RATE = 0.1

    def random_candidate():
        return np.random.uniform(0, 1, dims)

    def mutate(vec):
        return np.clip(vec + np.random.normal(0, 0.05, size=dims), 0, 1)

    def crossover(p1, p2):
        alpha = np.random.rand()
        return np.clip(alpha * p1 + (1 - alpha) * p2, 0, 1)

    def fitness(cand):
        return -gp.predict(cand.reshape(1, -1))[0]

    population = np.array([random_candidate() for _ in range(POP_SIZE)])

    for gen in range(N_GEN):
        scores = np.array([fitness(ind) for ind in population])
        elite_indices = scores.argsort()[:N_ELITE]
        elite = population[elite_indices]

        new_population = elite.tolist()
        while len(new_population) < POP_SIZE:
            parents = random.sample(list(elite), 2)
            child = crossover(parents[0], parents[1])
            if np.random.rand() < MUTATION_RATE:
                child = mutate(child)
            new_population.append(child)

        population = np.array(new_population)

    final_scores = [fitness(ind) for ind in population]
    top_indices = np.argsort(final_scores)[:5]
    top_candidates = population[top_indices]

    best_vectors = pd.DataFrame(top_candidates, columns=[f'x{i+1}' for i in range(dims)])
    best_vectors['InputVec'] = best_vectors.apply(lambda row: '-'.join([f"{row[f'x{i+1}']:.6f}" for i in range(dims)]), axis=1)
    best_vectors['Predicted_Score'] = [-fitness(ind) for ind in top_candidates]
    best_vectors['Function'] = f'F{idx+1}'

    results.append(best_vectors[['Function', 'InputVec', 'Predicted_Score']])

df_results = pd.concat(results, ignore_index=True)
df_results

In [None]:
df = pd.read_csv("/content/_ Function catalogue .csv")

In [None]:
import pandas as pd

# Step 1: Load your existing catalogue
df_catalogue = pd.read_csv("/content/_ Function catalogue .csv")
df_catalogue.columns = df_catalogue.columns.str.strip()

# Step 2: Create your Round 9 submission and observations
df_round9 = pd.DataFrame([
    {"Function": "F1", "Theme": "Searching for Contamination Sources", "InputVec": "0.944034-0.625117", "Score": 8.3316420728934},
    {"Function": "F2", "Theme": "Optimising Noisy Models", "InputVec": "0.768771-0.105777", "Score": 8.434940976893401},
    {"Function": "F3", "Theme": "Drug Discovery Problem", "InputVec": "0.1456-0.944454-0.638992", "Score": 8.434940976893401},
    {"Function": "F4", "Theme": "Fast, but Inaccurate Modelling", "InputVec": "0.999363-0.440618-0.632742-0.573562", "Score": 8.434940976893401},
    {"Function": "F5", "Theme": "Yield in a Chemical Reaction", "InputVec": "0.290636-0.824504-0.580542-0.368022", "Score": 8.434940976893401},
    {"Function": "F6", "Theme": "Cake and Stuff", "InputVec": "0.404874-0.441523-0.135493-0.612513-0.497029", "Score": 8.434940976893401},
    {"Function": "F7", "Theme": "Sometimes Lazy is Best", "InputVec": "0.304835-0.615316-0.890015-0.497555-0.892324-0.798193", "Score": 8.294127480893401},
    {"Function": "F8", "Theme": "High-dimensional Optimisation", "InputVec": "0.25682-0.180489-0.295402-0.830171-0.672892-0.287181-0.913031-0.782776", "Score": 8.294127480893401}
])

# Step 3: Append Round 9 to the existing catalogue
df_updated = pd.concat([df_catalogue, df_round9], ignore_index=True)

# Step 4: Save updated catalogue to a new file
df_updated.to_csv("/content/Function_catalogue_updated.csv", index=False)

# Preview to confirm
df_updated.head(10)

In [None]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=df_updated)

In [None]:
import pandas as pd

# Load your cleaned catalogue
df_check = pd.read_csv("/content/Function_catalogue_updated.csv")
df_check.columns = df_check.columns.str.strip()

# Check number of rows per Function
function_counts = df_check['Function'].value_counts().reset_index()
function_counts.columns = ['Function', 'Number_of_Rows']

function_counts

In [None]:
import pandas as pd

# Load your current catalogue (Round 9 only)
df_catalogue = pd.read_csv("/content/Function_catalogue_updated.csv")
df_catalogue.columns = df_catalogue.columns.str.strip()

# Create a template for appending Round 1–8 data
df_previous_rounds = pd.DataFrame([
    {"Function": "F1", "Theme": "Searching for Contamination Sources", "InputVec": "0.123456-0.654321", "Score": 7.123},
    {"Function": "F1", "Theme": "Searching for Contamination Sources", "InputVec": "0.234567-0.543210", "Score": 7.234},
    {"Function": "F2", "Theme": "Optimising Noisy Models", "InputVec": "0.345678-0.432109", "Score": 7.345},
    {"Function": "F2", "Theme": "Optimising Noisy Models", "InputVec": "0.456789-0.321098", "Score": 7.456},
    {"Function": "F3", "Theme": "Drug Discovery Problem", "InputVec": "0.567890-0.210987-0.109876", "Score": 7.567},
    # ⚠ Continue adding more data for all functions (F1 to F8) from your previous rounds...
])

# Append the new data
df_combined = pd.concat([df_catalogue, df_previous_rounds], ignore_index=True)

# Save the combined catalogue
df_combined.to_csv("/content/Function_catalogue_all_rounds.csv", index=False)

df_combined.head(10)

In [None]:
# Export the best candidate per function from df_results to a submission CSV

# Select the best (highest predicted score) per function
submission_list = []

for func in df_results['Function'].unique():
    func_df = df_results[df_results['Function'] == func]
    best_row = func_df.sort_values('Predicted_Score', ascending=False).iloc[0]
    submission_list.append({
        'Function': func,
        'InputVec': best_row['InputVec']
    })

df_submission = pd.DataFrame(submission_list)

# Export to CSV
submission_file = "/content/Round10_Submission_Format.csv"
df_submission.to_csv(submission_file, index=False)

# Display the submission DataFrame
df_submission

In [None]:
import numpy as np
import pandas as pd

# Assuming you already have df_clean and parse_vec_general imported

def inspect_clean(df):
    """
    For each function, counts usable rows (non-null parsed vectors and scores)
    and reports the unique dimensionalities found.
    """
    inspection = []
    # Re-parse the InputVec column in case you need to reconfirm its shape
    df['ParsedVec'] = df['InputVec'].apply(parse_vec_general)

    for func, group in df.groupby('Function'):
        # Keep only rows with both a parsed vector and a valid Score
        valid = group.dropna(subset=['ParsedVec', 'Score'])
        # Compute dimensionality for each vector
        dims = valid['ParsedVec'].apply(lambda v: v.shape[0] if hasattr(v, 'shape') else len(v))
        inspection.append({
            'Function': func,
            'UsableRows': len(valid),
            'Dimensionalities': sorted(dims.unique().tolist())
        })

    inspection_df = pd.DataFrame(inspection)
    print(inspection_df)
    return inspection_df

# Run the inspection
inspection_df = inspect_clean(df_clean)

In [None]:
import numpy as np
import pandas as pd

def inspect_clean(df):
    """
    For each function, counts usable rows (non-null parsed vectors and scores)
    and reports the unique dimensionalities found.
    """
    inspection = []
    # Work on a copy so we don’t modify the original
    df_clean = df.copy()

    # Use map() so that each returned array/Series is kept as one object,
    # rather than being expanded into multiple DataFrame columns.
    df_clean['ParsedVec'] = df_clean['InputVec'].map(parse_vec_general)

    for func, group in df_clean.groupby('Function'):
        # Keep only rows where ParsedVec is present and Score is non-null
        valid = group.dropna(subset=['ParsedVec', 'Score'])
        # Compute dimensionality for each vector
        dims = valid['ParsedVec'].apply(
            lambda v: v.shape[0] if hasattr(v, 'shape') else len(v)
        )
        inspection.append({
            'Function': func,
            'UsableRows': len(valid),
            'Dimensionalities': sorted(dims.unique().tolist())
        })

    inspection_df = pd.DataFrame(inspection)
    print(inspection_df)
    return inspection_df

# Run the inspection
inspection_df = inspect_clean(df_clean)

In [None]:
def inspect_bad_rows(df):
    # Work on a copy
    df_tmp = df.copy()
    # Parse vectors, keeping failures as NaN
    df_tmp['ParsedVec'] = df_tmp['InputVec'].map(parse_vec_general)
    # Identify rows dropped in your training step
    bad = df_tmp[df_tmp['ParsedVec'].isna() | df_tmp['Score'].isna()]

    for func, group in bad.groupby('Function'):
        print(f"\n=== Function {func}: {len(group)} bad rows ===")
        # Show the first few examples of what's malformed or missing
        print(group[['InputVec', 'Score']].head().to_string(index=False))

    # Also, compare against the original counts
    print("\nOriginal row counts per function:")
    print(df['Function'].value_counts().sort_index())

# Run it
inspect_bad_rows(df_clean)

In [None]:
import numpy as np
import pandas as pd

def inspect_bad_in_raw(df_raw):
    """
    Parses every InputVec in the raw DataFrame, then
    shows the rows where parsing failed or Score was missing.
    """
    df = df_raw.copy()

    # safe wrapper so a bad parse returns np.nan instead of exploding
    def safe_parse(x):
        try:
            return parse_vec_general(x)
        except Exception:
            return np.nan

    df['ParsedVec'] = df['InputVec'].map(safe_parse)

    # these are the rows that will be dropped when you make df_clean
    bad = df[df['ParsedVec'].isna() | df['Score'].isna()]

    for func, grp in bad.groupby('Function'):
        print(f"\n=== Function {func}: {len(grp)} dropped rows ===")
        print(grp[['InputVec','Score']].head().to_string(index=False))

    print("\nRaw row counts per function:")
    print(df_raw['Function'].value_counts().sort_index())

# Usage: assuming your original data was loaded into df_raw
inspect_bad_in_raw(df_raw)

In [None]:
# Right after loading your CSV / database / source...
df_raw = df.copy()       # Keep an untouched copy

In [None]:
# e.g.:
df_clean = (
    df_raw
    .assign(ParsedVec = lambda d: d['InputVec'].map(parse_vec_general))
    .dropna(subset=['ParsedVec', 'Score'])
)

In [None]:
# If you haven’t yet, make df_raw as a copy of your original df
# (do this immediately after loading your data)
df_raw = df.copy()

# Now inspect what columns exist
print("Columns in df_raw:", df_raw.columns.tolist())

# And peek at the first few rows so we can see the actual names/structure
print(df_raw.head())

In [None]:
# Skip the very first line, and use line 1 as your header:
df_raw = pd.read_csv("mydata.csv", header=1)

# Now confirm you have the right columns:
print(df_raw.columns.tolist())
# → ['index', 'Function', 'Theme', 'InputVec', 'Score', ...]

In [None]:
import os

# List everything in /mnt/data (where your upload lives)
print(os.listdir('/mnt/data'))

# Or list the notebook’s working directory
print(os.listdir('.'))

In [None]:
import numpy as np

def safe_parse(x):
    try:
        return parse_vec_general(x)
    except Exception:
        return np.nan

df_raw['ParsedVec'] = df_raw['InputVec'].map(safe_parse)
dropped = df_raw[df_raw['ParsedVec'].isna() | df_raw['Score'].isna()]

for func, grp in dropped.groupby('Function'):
    print(f"\n=== Function {func}: {len(grp)} dropped rows ===")
    print(grp[['InputVec','Score']].head().to_string(index=False))

print("\nRaw row counts per function:")
print(df_raw['Function'].value_counts().sort_index())

In [None]:
import os

# 1. List the current working directory
print("CWD:", os.getcwd())
print("Files here:", os.listdir('.'))

# 2. Also check /content if you’re not in it
print("/content:", os.listdir('/content'))

In [None]:
import pandas as pd

# Path to the file with the bad header
path = "./_ Function catalogue .csv"   # that “ ” is a narrow no-break space (\u202f)

# 1. Read it, using the second line as header
df_raw = pd.read_csv(path, header=1)

# 2. Keep only the columns you need
df_raw = df_raw[['Function', 'Theme', 'InputVec', 'Score']]

# 3. Verify
print("Columns:", df_raw.columns.tolist())
print(df_raw.head())

In [None]:
import os
import pandas as pd
import numpy as np

# 1. Auto-detect the file whose name contains “Function catalogue”
files = os.listdir('.')
filename = next(f for f in files if 'Function catalogue' in f)
print("Loading:", filename)

# 2. Read it, using the 2nd line as header
df_raw = pd.read_csv(filename, header=1)

# 3. Keep only the columns we care about
df_raw = df_raw[['Function', 'Theme', 'InputVec', 'Score']]

# 4. Verify
print("Columns:", df_raw.columns.tolist())
print(df_raw.head())

In [None]:
def safe_parse(x):
    try:
        return parse_vec_general(x)
    except Exception:
        return np.nan

# 5. Parse every InputVec safely
df_raw['ParsedVec'] = df_raw['InputVec'].map(safe_parse)

# 6. Find what would be dropped
dropped = df_raw[df_raw['ParsedVec'].isna() | df_raw['Score'].isna()]

# 7. Report by function
for func, grp in dropped.groupby('Function'):
    print(f"\n=== Function {func}: {len(grp)} dropped rows ===")
    print(grp[['InputVec','Score']].head().to_string(index=False))

# 8. Show raw totals
print("\nRaw row counts per function:")
print(df_raw['Function'].value_counts().sort_index())

In [None]:
import numpy as np

# 1) Wrap parse_vec_general so failures become NaN
def safe_parse(x):
    try:
        return parse_vec_general(x)
    except Exception:
        return np.nan

# 2) Apply to every row
df_raw['ParsedVec'] = df_raw['InputVec'].map(safe_parse)

# 3) Select the “bad” ones
dropped = df_raw[df_raw['ParsedVec'].isna() | df_raw['Score'].isna()]

# 4) Print a summary by function
for func, grp in dropped.groupby('Function'):
    print(f"\n=== Function {func}: {len(grp)} dropped rows ===")
    print(grp[['InputVec','Score']].head().to_string(index=False))

# 5) And show your raw totals again for reference
print("\nRaw row counts per function:")
print(df_raw['Function'].value_counts().sort_index())

In [None]:
import numpy as np

# 1) Safe-parse as before
def safe_parse(x):
    try:
        return parse_vec_general(x)
    except Exception:
        return np.nan

df_raw['ParsedVec'] = df_raw['InputVec'].map(safe_parse)

# 2) Identify all dropped rows
dropped = df_raw[df_raw['ParsedVec'].isna() | df_raw['Score'].isna()]

# 3) Print total count
print("Total rows dropped:", len(dropped))

# 4) If there are any, show the first 10 examples
if len(dropped) > 0:
    print("\nSample dropped rows:")
    print(dropped[['Function','InputVec','Score']].head(10).to_string(index=False))
else:
    print("\nNo rows failed parsing or had missing scores.")

# 5) And per-function drop counts
print("\nDropped rows per function:")
print(dropped['Function'].value_counts().sort_index())

In [None]:
import numpy as np

# 1. Choose your expected dimension
expected_dim = 8

# 2. Parse every InputVec (no safe‐wrap so we see real shapes or errors)
df_raw['ParsedVec'] = df_raw['InputVec'].map(parse_vec_general)

# 3. Compute each vector’s length
df_raw['Dim'] = df_raw['ParsedVec'].apply(
    lambda v: v.shape[0] if hasattr(v, 'shape')
              else (len(v) if v is not None else np.nan)
)

# 4. Show the dimensionality distribution
print("Dimensionality distribution:")
print(df_raw['Dim'].value_counts().sort_index())

# 5. Find rows with the “wrong” dim or missing Score
bad = df_raw[(df_raw['Dim'] != expected_dim) | df_raw['Score'].isna()]

# 6. Summary
print(f"\nTotal bad rows (dim≠{expected_dim} or no Score):", len(bad))
if len(bad):
    print("\nSample bad rows:")
    print(bad[['Function','InputVec','Dim','Score']].head(10).to_string(index=False))

print("\nBad rows per function:")
print(bad['Function'].value_counts().sort_index())

In [None]:
import pandas as pd
import numpy as np

# 1) Make sure Score is numeric
df_raw['Score'] = pd.to_numeric(df_raw['Score'], errors='coerce')

# 2) (Re-)parse every InputVec
df_raw['ParsedVec'] = df_raw['InputVec'].map(lambda x:
    parse_vec_general(x)
)

# 3) Drop only rows missing a parsed vector or score
df_clean = df_raw.dropna(subset=['ParsedVec','Score']).copy()

# 4) Summarize usable rows and dims per function
inspection = []
for func, grp in df_clean.groupby('Function'):
    # lengths of each vector
    dims = [v.shape[0] for v in grp['ParsedVec']]
    inspection.append({
        'Function': func,
        'UsableRows': len(grp),
        'Dimensionalities': sorted(set(dims))
    })

inspection_df = pd.DataFrame(inspection)
print(inspection_df)

In [None]:
import numpy as np
import pandas as pd

def build_clean_df(df_raw):
    # 1) Ensure Score is numeric
    df_raw['Score'] = pd.to_numeric(df_raw['Score'], errors='coerce')

    # 2) Parse vectors
    df_raw['ParsedVec'] = df_raw['InputVec'].map(parse_vec_general)

    # 3) Drop any bad rows
    df_clean = df_raw.dropna(subset=['ParsedVec','Score']).copy()
    return df_clean

def validate_functions(df_clean, min_samples=5, expected_dim=8):
    stats = df_clean.groupby('Function')['ParsedVec'] \
            .agg(count=lambda xs: len(xs),
                 dims=lambda xs: sorted({len(v) for v in xs}))
    for func, row in stats.iterrows():
        cnt, dims = row['count'], row['dims']
        if cnt < min_samples:
            raise ValueError(f"Function {func!r} has only {cnt} samples (<{min_samples})")
        if dims != [expected_dim]:
            raise ValueError(f"Function {func!r} has unexpected vector dims: {dims}")
    return stats

def assemble_xy(df_clean):
    X = np.stack(df_clean['ParsedVec'].values)   # shape: (N, 8)
    y = df_clean['Score'].values                 # shape: (N,)
    return X, y

# --- USAGE ---
# 1) Build df_clean
df_clean = build_clean_df(df_raw)

# 2) Validate
stats = validate_functions(df_clean)
print("All functions OK:\n", stats)

# 3) Assemble data
X, y = assemble_xy(df_clean)

# 4) Train your GP+GA model
#    (replace the next lines with your actual training call)
model = YourGPGAClass(...)
model.fit(X, y)

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

# 1. Define a kernel (constant × RBF) and GP regressor
kernel = C(1.0, (1e-3, 1e3)) * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2))
model = GaussianProcessRegressor(
    kernel=kernel,
    n_restarts_optimizer=5,
    normalize_y=True,
    random_state=42
)

# 2. Fit to your data
model.fit(X, y)

# 3. Quick check
print("Optimized kernel:", model.kernel_)
print("Log-marginal-likelihood:", model.log_marginal_likelihood(model.kernel_.theta))

In [None]:
from your_module import GPGA   # wherever you defined it

# 1. Instantiate with whatever hyperparameters make sense
model = GPGA(pop_size=100, generations=30, crossover_rate=0.8, mutation_rate=0.1)

# 2. Train on your data
model.fit(X, y)

# 3. (Optional) Inspect fitness history or best individual
print("Best fitness per generation:", model.best_fitness_per_gen)

In [None]:
print("Optimized kernel:", model.kernel_)
print("Log-marginal-likelihood:", model.log_marginal_likelihood(model.kernel_.theta))

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from your_module import GPGA   # your custom class

# 1) Split once for a quick train/test on the GP
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 2) Build and fit the GaussianProcessRegressor
kernel = C(1.0, (1e-3, 1e3)) * RBF(
    length_scale=1.0, length_scale_bounds=(1e-2, 1e2)
)
gp_model = GaussianProcessRegressor(
    kernel=kernel,
    n_restarts_optimizer=5,
    normalize_y=True,
    random_state=42
)
gp_model.fit(X_train, y_train)

# 3) Inspect the fitted GP
print("Optimized kernel:", gp_model.kernel_)
print("Log-marginal-likelihood:",
      gp_model.log_marginal_likelihood())  # no need to pass theta

# 4) Evaluate on held-out data
y_pred, y_std = gp_model.predict(X_test, return_std=True)
print("Test R²:", r2_score(y_test, y_pred))

In [None]:
# --- cell 1: your GPGA class definition ---
import numpy as np

class GPGA:
    def __init__(self, pop_size=50, generations=20,
                 crossover_rate=0.8, mutation_rate=0.1,
                 random_state=None):
        self.pop_size = pop_size
        self.generations = generations
        self.crossover_rate = crossover_rate
        self.mutation_rate = mutation_rate
        self.random_state = random_state
        self.best_fitness_per_gen = []
        self.best_individual_ = None
        self.best_fitness_ = None
        # … any other setup …

    def fit(self, X, y):
        rng = np.random.RandomState(self.random_state)
        # initialize population of hyperparameter sets (or kernel params)
        population = self._init_population(rng)
        for gen in range(self.generations):
            # 1) evaluate fitness of each individual
            fitness = [self._evaluate(ind, X, y) for ind in population]
            # 2) record the best
            best_idx = np.argmax(fitness)
            self.best_fitness_per_gen.append(fitness[best_idx])
            self.best_individual_ = population[best_idx]
            self.best_fitness_ = fitness[best_idx]
            # 3) select, crossover, mutate to form next gen
            population = self._next_generation(population, fitness, rng)
        return self

    # stub methods—fill these in with your GA logic:
    def _init_population(self, rng): ...
    def _evaluate(self, individual, X, y): ...
    def _next_generation(self, population, fitness, rng): ...

In [None]:
# --- cell 2: import and use it ---
# (No import needed if it’s in the same notebook — just call it directly)
ga_model = GPGA(pop_size=100, generations=30, random_state=42)
ga_model.fit(X, y)
print("GA best fitness per generation:", ga_model.best_fitness_per_gen)

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

class GPGA:
    def __init__(self, pop_size=50, generations=20,
                 crossover_rate=0.8, mutation_rate=0.1,
                 amp_bounds=(1e-3, 1e3), ls_bounds=(1e-2, 1e2),
                 random_state=None):
        self.pop_size = pop_size
        self.generations = generations
        self.crossover_rate = crossover_rate
        self.mutation_rate = mutation_rate
        self.amp_bounds = amp_bounds
        self.ls_bounds = ls_bounds
        self.random_state = random_state

        # history
        self.best_fitness_per_gen = []
        self.best_individual_ = None
        self.best_fitness_ = None

    def _init_population(self, rng):
        # Each individual is [amplitude, length_scale]
        low = np.array([self.amp_bounds[0], self.ls_bounds[0]])
        high= np.array([self.amp_bounds[1], self.ls_bounds[1]])
        return [rng.uniform(low, high) for _ in range(self.pop_size)]

    def _evaluate(self, individual, X, y):
        amp, ls = individual
        kernel = C(amp, self.amp_bounds) * RBF(length_scale=ls,
                                               length_scale_bounds=self.ls_bounds)
        gp = GaussianProcessRegressor(kernel=kernel,
                                      normalize_y=True,
                                      random_state=self.random_state)
        gp.fit(X, y)
        # we’ll use log-marginal-likelihood as fitness
        return gp.log_marginal_likelihood()

    def _next_generation(self, population, fitness, rng):
        # Simple tournament selection + arithmetic crossover + Gaussian mutation

        # 1) Tournament: pick 2 at random, take the better
        def select_one():
            i, j = rng.choice(len(population), size=2, replace=False)
            return population[i] if fitness[i] > fitness[j] else population[j]

        # 2) Build next pop
        new_pop = []
        while len(new_pop) < self.pop_size:
            # select parents
            p1, p2 = select_one(), select_one()
            # crossover
            if rng.rand() < self.crossover_rate:
                alpha = rng.rand()
                child = alpha*p1 + (1-alpha)*p2
            else:
                child = p1.copy()
            # mutation
            if rng.rand() < self.mutation_rate:
                child += rng.normal(scale=0.1, size=child.shape)
                # clip back into bounds
                child = np.clip(child,
                                [self.amp_bounds[0], self.ls_bounds[0]],
                                [self.amp_bounds[1], self.ls_bounds[1]])
            new_pop.append(child)
        return new_pop

    def fit(self, X, y):
        rng = np.random.RandomState(self.random_state)
        population = self._init_population(rng)
        for gen in range(self.generations):
            fitness = [self._evaluate(ind, X, y) for ind in population]
            best_idx = int(np.argmax(fitness))
            self.best_fitness_per_gen.append(fitness[best_idx])
            self.best_individual_ = population[best_idx].copy()
            self.best_fitness_ = fitness[best_idx]
            population = self._next_generation(population, fitness, rng)
        return self

In [None]:
# Instantiate & run
ga_model = GPGA(
    pop_size=100,
    generations=30,
    crossover_rate=0.8,
    mutation_rate=0.1,
    random_state=42
)
ga_model.fit(X, y)

print("GA best fitness per generation:", ga_model.best_fitness_per_gen)
print("GA overall best individual:", ga_model.best_individual_)
print("GA best fitness:", ga_model.best_fitness_)

In [None]:
import numpy as np

# How many total NaNs?
total_nans = np.isnan(X).sum()
print("Total NaN entries in X:", total_nans)

# Does any row contain a NaN?
rows_with_nans = np.where(np.isnan(X).any(axis=1))[0]
print("Rows with NaNs:", rows_with_nans)

# And for sanity, check scores too:
print("Any NaNs in y?", np.isnan(y).any())

In [None]:
from sklearn.impute import SimpleImputer

# 1) Inspect how many NaNs in each dimension
import numpy as np
nans_per_dim = np.isnan(X).sum(axis=0)
print("NaNs per feature (dim):", nans_per_dim)

# 2) Impute (fill) each column with its mean
imp = SimpleImputer(strategy='mean')
X_imputed = imp.fit_transform(X)

# 3) Double-check no more NaNs
print("Any NaNs left in X_imputed?", np.isnan(X_imputed).any())

# 4) Retrain your GA-wrapped GP on the imputed data
ga_model = GPGA(
    pop_size=100,
    generations=30,
    crossover_rate=0.8,
    mutation_rate=0.1,
    random_state=42
)
ga_model.fit(X_imputed, y)

print("GA best fitness per generation:", ga_model.best_fitness_per_gen)
print("GA best individual (amp, ls):", ga_model.best_individual_)
print("GA best fitness:", ga_model.best_fitness_)

In [None]:
# Check shapes immediately before fitting
print("X_imputed.shape:", X_imputed.shape)
print("y.shape:", y.shape)

In [None]:
# Rebuild y from df_clean
y = df_clean['Score'].values

# Verify shapes
print("X_imputed.shape:", X_imputed.shape)   # should be (175, 8)
print("y.shape:", y.shape)                   # should now be (175,)

In [None]:
ga_model.fit(X_imputed, y)
print("GA best fitness per generation:", ga_model.best_fitness_per_gen)
print("GA best individual:", ga_model.best_individual_)
print("GA best fitness:", ga_model.best_fitness_) import time

# grab one population and RNG from your model
rng = np.random.RandomState(42)
pop = ga_model._init_population(rng)

# time how long it takes to eval that one generation
start = time.time()
_ = [ga_model._evaluate(ind, X_imputed, y) for ind in pop]
elapsed = time.time() - start

print(f"1 generation ({len(pop)} fits) took {elapsed:.1f}s")
print(f"Estimated total time: {elapsed * ga_model.generations:.1f}s (~{(elapsed * ga_model.generations)/60:.1f}min)")

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, RBF
from sklearn.metrics import r2_score

# 1) Split (if you haven’t already)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_imputed, y, test_size=0.2, random_state=42
)

# 2) Build a GP with the GA’s best hyper-parameters and no optimizer
amp, ls = ga_model.best_individual_
kernel = C(amp, (amp, amp)) * RBF(length_scale=ls, length_scale_bounds=(ls, ls))
gp = GaussianProcessRegressor(
    kernel=kernel,
    normalize_y=True,
    optimizer=None  # ← turn off SciPy optimization entirely
)

# 3) Fit & evaluate
gp.fit(X_train, y_train)
y_pred, y_std = gp.predict(X_test, return_std=True)

print("Test R²:",   r2_score(y_test, y_pred))
print("Test LL:",   gp.log_marginal_likelihood())  # now works without the status error

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score

class GPGA_CV(GPGA):
    def _evaluate(self, individual, X, y):
        amp, ls = individual
        kernel = C(amp, self.amp_bounds) * RBF(
            length_scale=ls, length_scale_bounds=self.ls_bounds
        )
        kf = KFold(n_splits=5, shuffle=True, random_state=self.random_state)
        scores = []
        for tr, te in kf.split(X):
            gp = GaussianProcessRegressor(kernel=kernel, normalize_y=True,
                                          optimizer=None)
            gp.fit(X[tr], y[tr])
            y_pred = gp.predict(X[te])
            scores.append(r2_score(y[te], y_pred))
        return np.mean(scores)  # higher = better

In [None]:
 class GPGA:
-    def __init__(…, ls_bounds=(1e-2, 1e2), …):
+    def __init__(…, ls_bounds=(1e-2, 10), …):
         …

In [None]:
from sklearn.preprocessing import StandardScaler

# 1) Fit scalers on training folds inside GPGA_CV._evaluate
x_scaler = StandardScaler().fit(X[tr])
y_scaler = StandardScaler().fit(y[tr].reshape(-1,1))

X_tr_s = x_scaler.transform(X[tr])
X_te_s = x_scaler.transform(X[te])
y_tr_s = y_scaler.transform(y[tr].reshape(-1,1)).ravel()

gp.fit(X_tr_s, y_tr_s)
y_pred_s = gp.predict(X_te_s)
y_pred = y_scaler.inverse_transform(y_pred_s.reshape(-1,1)).ravel()

In [None]:
from sklearn.preprocessing import StandardScaler

xs = StandardScaler().fit_transform(X_imputed)
ys = StandardScaler().fit_transform(y.reshape(-1,1)).ravel()
# Then run ga_model.fit(xs, ys)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, RBF

# 1) Split BEFORE scaling to preserve a true hold-out set
X_train, X_test, y_train, y_test = train_test_split(
    X_imputed, y, test_size=0.2, random_state=42
)

# 2) Fit scalers on train only
x_sc = StandardScaler().fit(X_train)
y_sc = StandardScaler().fit(y_train.reshape(-1,1))

X_tr_s = x_sc.transform(X_train)
X_te_s = x_sc.transform(X_test)
y_tr_s = y_sc.transform(y_train.reshape(-1,1)).ravel()

# 3) Rerun GA on scaled training data
ga_model = GPGA(
    pop_size=50,       # you can reduce for speed
    generations=15,    # fewer gens now that scaling helps
    crossover_rate=0.8,
    mutation_rate=0.1,
    random_state=42
)
ga_model.fit(X_tr_s, y_tr_s)

print("GA best fitness per generation:", ga_model.best_fitness_per_gen)
print("GA best hyperparams [amp, ls]:", ga_model.best_individual_)

# 4) Build a GP with those hyper-parameters and no optimizer
amp, ls = ga_model.best_individual_
kernel = C(amp, (amp, amp)) * RBF(length_scale=ls, length_scale_bounds=(ls, ls))
gp = GaussianProcessRegressor(kernel=kernel, normalize_y=True, optimizer=None)

# 5) Fit GP on the scaled train set
gp.fit(X_tr_s, y_tr_s)

# 6) Predict on the scaled test set, then invert the scaling on y_pred
y_pred_s, y_std_s = gp.predict(X_te_s, return_std=True)
y_pred = y_sc.inverse_transform(y_pred_s.reshape(-1,1)).ravel()

# 7) Evaluate on unscaled test targets
print("Test  R² (unscaled):", r2_score(y_test, y_pred))

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, RBF
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
import numpy as np

class GPGA_CV(GPGA):
    def __init__(self, *args, n_splits=5, optimizer=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.n_splits = n_splits
        self.optimizer = optimizer  # pass through to GP

    def _evaluate(self, individual, X, y):
        # Build a fixed-kernel GP
        amp, ls = individual
        kernel = C(amp, self.amp_bounds) * RBF(
            length_scale=ls, length_scale_bounds=self.ls_bounds
        )

        cv = KFold(n_splits=self.n_splits,
                   shuffle=True, random_state=self.random_state)
        scores = []
        for train_idx, test_idx in cv.split(X):
            X_tr, X_te = X[train_idx], X[test_idx]
            y_tr, y_te = y[train_idx], y[test_idx]

            gp = GaussianProcessRegressor(
                kernel=kernel,
                normalize_y=True,
                optimizer=self.optimizer,  # skip internal SciPy if None
                random_state=self.random_state
            )
            gp.fit(X_tr, y_tr)
            y_pred = gp.predict(X_te)
            scores.append(r2_score(y_te, y_pred))
        return np.mean(scores)  # higher R² is better

# --- USAGE on your scaled data ---
ga_cv = GPGA_CV(
    pop_size=50,       # smaller for speed
    generations=15,
    crossover_rate=0.8,
    mutation_rate=0.1,
    amp_bounds=(1e-3,1e3),
    ls_bounds=(1e-2,10),   # tightened upper bound
    random_state=42,
    optimizer=None,       # prevents SciPy errors
    n_splits=5
)

# Fit on scaled train set
ga_cv.fit(xs, ys)

print("GA_CV best fitness per generation:", ga_cv.best_fitness_per_gen)
print("GA_CV best hyperparams [amp, ls]:", ga_cv.best_individual_)
print("GA_CV best CV R²:", ga_cv.best_fitness_)

In [None]:
from sklearn.gaussian_process.kernels import ConstantKernel as C, Matern, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor

# Pull in your best GA‐CV hyperparams:
amp, ls = ga_cv.best_individual_

# Define a Matern + white‐noise kernel
kernel = (
    C(amp, (1e-3, 1e3))
    * Matern(length_scale=ls, length_scale_bounds=(1e-4, 5), nu=1.5)
    + WhiteKernel(noise_level=1e-3, noise_level_bounds=(1e-5, 1e1))
)

# Build a GP that *doesn't* re-optimize (optimizer=None)
gp = GaussianProcessRegressor(
    kernel=kernel,
    normalize_y=True,
    optimizer=None,
    random_state=42
)

# Fit on your scaled train data:
gp.fit(X_tr_s, y_tr_s)

# Predict on the scaled test set and invert the scaling:
y_pred_s, y_std_s = gp.predict(X_te_s, return_std=True)
y_pred = y_sc.inverse_transform(y_pred_s.reshape(-1,1)).ravel()

# Evaluate:
from sklearn.metrics import r2_score
print("Test R² (Matern+White):", r2_score(y_test, y_pred))

In [None]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score

# 1) Split on the imputed unscaled data
X_tr, X_te, y_tr, y_te = train_test_split(
    X_imputed, y, test_size=0.2, random_state=42
)

# 2) Instantiate and fit
hgb = HistGradientBoostingRegressor(
    max_iter=200,
    learning_rate=0.1,
    max_depth=5,
    random_state=42
)
hgb.fit(X_tr, y_tr)

# 3) Evaluate on test set
y_pred = hgb.predict(X_te)
print("HGB Test R²:", r2_score(y_te, y_pred))

# 4) (Optional) 5-fold CV on full dataset
cv_scores = cross_val_score(
    hgb, X_imputed, y, cv=5, scoring="r2", n_jobs=-1
)
print("HGB CV  R² mean:", cv_scores.mean(), "±", cv_scores.std())

In [None]:
import matplotlib.pyplot as plt

plt.scatter(y_te, y_pred, alpha=0.6)
lims = [min(y_te.min(), y_pred.min()), max(y_te.max(), y_pred.max())]
plt.plot(lims, lims, 'k--')
plt.xlabel("True Score"); plt.ylabel("Predicted Score")
plt.title("HGB: True vs Predicted")
plt.show()

In [None]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import cross_val_score
import numpy as np

results = []
for func, group in df_clean.groupby('Function'):
    # 1) Extract that function’s true input vectors & scores
    Xf = np.stack(group['ParsedVec'].values)   # shape: (n_samples, dim_f)
    yf = group['Score'].values                 # shape: (n_samples,)

    # 2) Fit & CV-test a simple model
    model = HistGradientBoostingRegressor(
        max_iter=200,
        learning_rate=0.1,
        max_depth=5,
        random_state=42
    )
    # 5-fold CV R²
    scores = cross_val_score(model, Xf, yf, cv=5, scoring="r2", n_jobs=-1)
    results.append((func, scores.mean(), scores.std()))

# 3) Tabulate
import pandas as pd
df_func_perf = pd.DataFrame(results, columns=["Function","R2_mean","R2_std"])
print(df_func_perf)

In [None]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2, include_bias=False)
Xf2 = poly.fit_transform(Xf)
# then cross_val_score on Xf2 instead of Xf

In [None]:
# e.g. for F4 only:
f4 = df_clean[df_clean['Function']=="F4"]
Xf4 = np.stack(f4['ParsedVec'].values)
yf4 = f4['Score'].values

# scale, then GA_CV.fit(Xf4_s, yf4_s) as before

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import cross_val_score

results_poly = []

for func, group in df_clean.groupby("Function"):
    # 1) Build a boolean mask of that function’s rows
    mask = df_clean["Function"] == func

    # 2) Pull the imputed vectors & scores
    Xf = X_imputed[mask]       # shape: (n_func, 8), no NaNs
    yf = group["Score"].values # shape: (n_func,)

    # 3) Make 2nd-degree features
    poly = PolynomialFeatures(degree=2, include_bias=False)
    Xf2 = poly.fit_transform(Xf)

    # 4) 5-fold CV
    hgb = HistGradientBoostingRegressor(
        max_iter=200, learning_rate=0.1, max_depth=5, random_state=42
    )
    scores = cross_val_score(
        hgb, Xf2, yf, cv=5, scoring="r2", n_jobs=-1
    )

    results_poly.append({
        "Function": func,
        "R2_mean_poly": scores.mean(),
        "R2_std_poly": scores.std()
    })

df_poly_perf = pd.DataFrame(results_poly)
print(df_poly_perf)

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import cross_val_score

results_ttr = []

for func, group in df_clean.groupby("Function"):
    # 1) Raw inputs & scores
    Xf = X_imputed[df_clean["Function"]==func]  # no NaNs
    yf = group["Score"].values                  # may be wildly skewed

    # 2) Build pipeline: X scaled, y Yeo–Johnson
    x_sc = StandardScaler()
    pt  = PowerTransformer(method="yeo-johnson", standardize=True)
    base = HistGradientBoostingRegressor(
        max_iter=200, learning_rate=0.1, max_depth=5, random_state=42
    )
    model = TransformedTargetRegressor(
        regressor=base,
        transformer=pt,
        func=None,           # not needed if transformer handles inverse
    )
    # Wrap in an X-scaler via a simple pipeline
    # but cross_val_score can’t handle two-step pipelines and TTR
    # so we’ll manually scale X and use TTR directly:

    Xf_s = x_sc.fit_transform(Xf)

    # 3) 5-fold CV on the transformed target
    scores = cross_val_score(
        model, Xf_s, yf,
        cv=5,
        scoring="r2",
        n_jobs=-1
    )

    results_ttr.append({
        "Function": func,
        "R2_mean_ttr": scores.mean(),
        "R2_std_ttr":  scores.std()
    })

df_ttr_perf = pd.DataFrame(results_ttr)
print(df_ttr_perf)

In [None]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, Matern, WhiteKernel

# 1) Retrain one surrogate per function
surrogates = {}
for func, group in df_clean.groupby("Function"):
    mask = (df_clean["Function"] == func)
    Xf = X_imputed[mask]             # (n_f, 8) no NaNs
    yf = group["Score"].values

    amp, ls = ga_cv.best_individual_
    kernel = (
        C(amp, (1e-3,1e3))
        * Matern(length_scale=ls, length_scale_bounds=(1e-4,5), nu=1.5)
        + WhiteKernel(noise_level=1e-3, noise_level_bounds=(1e-5,1e1))
    )
    gp = GaussianProcessRegressor(
        kernel=kernel,
        normalize_y=True,
        optimizer=None,
        random_state=42
    )
    gp.fit(Xf, yf)
    surrogates[func] = (gp, Xf.shape[1])

# 2) Sample & rank
N = 5000  # number of random samples per function
k = 10    # how many top candidates to keep per function

new_cands = []
for func, (gp, dim) in surrogates.items():
    CAND = np.random.rand(N, dim)
    preds = gp.predict(CAND)
    top_idx = np.argsort(preds)[-k:]      # indices of the k largest predictions

    for j in top_idx:
        vec = CAND[j]
        new_cands.append({
            "Function": func,
            "InputVec": "-".join(f"{x:.6f}" for x in vec),
            "PredictedScore": float(preds[j])
        })

df_new = pd.DataFrame(new_cands)
# See your top-10 proposals per function:
print(df_new.groupby("Function").head(k))

In [None]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, Matern, WhiteKernel

new_cands = []
N = 5000  # random samples per function
k = 10    # top candidates to keep per function

for func, group in df_clean.groupby("Function"):
    # 1) Rebuild Xf from the raw ParsedVec (no global padding)
    #    Each parsed vector v is an array or list of length d_func
    Xf = np.vstack(group["ParsedVec"].values)  # shape (n_f, d_func)
    yf = group["Score"].values                 # shape (n_f,)

    d = Xf.shape[1]  # true dimension for this function

    # 2) Train a GP+Matern+White on this function’s data
    amp, ls = ga_cv.best_individual_  # or you could have stored one per function
    kernel = (
        C(amp, (1e-3,1e3))
        * Matern(length_scale=ls,
                 length_scale_bounds=(1e-4, 5),
                 nu=1.5)
        + WhiteKernel(noise_level=1e-3,
                      noise_level_bounds=(1e-5,1e1))
    )
    gp = GaussianProcessRegressor(
        kernel=kernel,
        normalize_y=True,
        optimizer=None,
        random_state=42
    )
    gp.fit(Xf, yf)

    # 3) Sample and rank in the true d-dimensional unit cube
    CAND = np.random.rand(N, d)
    preds = gp.predict(CAND)
    top_idx = np.argsort(preds)[-k:]  # best k

    for j in top_idx:
        vec = CAND[j]
        new_cands.append({
            "Function":     func,
            "InputVec":     "-".join(f"{x:.6f}" for x in vec),
            "PredictedScore": float(preds[j])
        })

# 4) Wrap up into a DataFrame and inspect
df_new = pd.DataFrame(new_cands)
print(df_new.groupby("Function").head(k))

In [None]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel as C, Matern, WhiteKernel

new_cands = []
N = 5000  # how many random samples per function
k = 10    # how many top candidates to keep per function

for func, group in df_clean.groupby("Function"):
    # 1) Build Xf by splitting the InputVec strings (no NaNs)
    Xf = np.vstack([
        np.fromiter(map(float, iv.split("-")), dtype=float)
        for iv in group["InputVec"]
    ])   # shape (n_rows, d_func)
    yf = group["Score"].values

    d = Xf.shape[1]  # this function’s true dimensionality

    # 2) Train a GP+Matern+White on this function’s own data
    amp, ls = ga_cv.best_individual_  # or replace with per-func best if you stored that
    kernel = (
        C(amp, (1e-3,1e3))
        * Matern(length_scale=ls, length_scale_bounds=(1e-4,5), nu=1.5)
        + WhiteKernel(noise_level=1e-3, noise_level_bounds=(1e-5,1e1))
    )
    gp = GaussianProcessRegressor(
        kernel=kernel,
        normalize_y=True,
        optimizer=None,
        random_state=42
    )
    gp.fit(Xf, yf)

    # 3) Sample N random points in [0,1]^d, predict, and pick top k
    CAND = np.random.rand(N, d)
    preds = gp.predict(CAND)
    top_idx = np.argsort(preds)[-k:]  # indices of the k largest predictions

    for j in top_idx:
        vec = CAND[j]
        new_cands.append({
            "Function":        func,
            "InputVec":        "-".join(f"{x:.6f}" for x in vec),
            "PredictedScore":  float(preds[j])
        })

# 4) Collect into a DataFrame and inspect
df_new = pd.DataFrame(new_cands)
print(df_new.groupby("Function").head(k))

In [None]:
# Pick the row with highest PredictedScore for each Function
df_final = (
    df_new
    .loc[df_new.groupby("Function")["PredictedScore"].idxmax()]
    .reset_index(drop=True)
    [["Function", "InputVec"]]
)

print(df_final)
# And export if you like:
df_final.to_csv("Round10_Final_Submission.csv", index=False)

In [None]:
df_final.to_csv("Round10_Final_Submission.csv", index=False)

In [None]:
# from a code cell—this shells out to zip the notebook and CSV
!zip submission_round10.zip Round10_Final_Submission.csv *.ipynb

from google.colab import files
files.download("submission_round10.zip")

# New section "Round 10+ Advanced Strategies (Bayesian Optimization, CMA-ES, LHS)" round 10

In [None]:
import numpy as np

# -------------------------------
# ROUND 2–9 Best Queries and Scores per Function (Based on your latest notebook state)
# -------------------------------

queries_F1 = [
    [0.944034, 0.625117],
    [0.07314, 0.227123]
]
scores_F1 = [
    1.3276592050304897e-69,
    4.6015626100136584e-113
]

queries_F2 = [
    [0.768771, 0.105777],
    [0.768771, 0.105777]
]
scores_F2 = [
    0.27308818432797793,
    0.33828687086483444
]

queries_F3 = [
    [0.1456, 0.944454, 0.638992],
    [0.1856, 0.984454, 0.678992]
]
scores_F3 = [
    -0.10726379301566011,
    -0.05132522888449367
]

queries_F4 = [
    [0.999363, 0.440618, 0.632742, 0.573562],
    [0.999363, 0.460618, 0.652742, 0.593562]
]
scores_F4 = [
    -20.949420794091107,
    -4.55517117590318
]

queries_F5 = [
    [0.290636, 0.824504, 0.580542, 0.368022],
    [0.270636, 0.824504, 0.580542, 0.368022]
]
scores_F5 = [
    13.377756436400528,
    1105.1479642901527
]

queries_F6 = [
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],
    [0.384874, 0.441523, 0.115493, 0.592513, 0.477029]
]
scores_F6 = [
    -1.1857272137575898,
    -0.5399068214874156
]

queries_F7 = [
    [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193],
    [0.94976, 0.670137, 0.267596, 0.089014, 0.282662, 0.021611]
]
scores_F7 = [
    0.023506147714804363,
    2.090579827192728
]

queries_F8 = [
    [0.25682, 0.180489, 0.295402, 0.830171, 0.672892, 0.287181, 0.913031, 0.782776],
    [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776]
]
scores_F8 = [
    8.3316420728934,
    9.5659163853995
]

# -------------------------------
# ROUND 10: Adding all new queries and observations
# -------------------------------

# F1 (2D)
queries_F1 += [
    [0.944034, 0.645117],
    [0.07314, 0.227123]
]
scores_F1 += [
    6.933250699386668e-70,
    -1.560646704467778e-117
]

# F2 (2D)
queries_F2 += [
    [0.768771, 0.105777],
    [0.768771, 0.105777],
    [0.768771, 0.105777]
]
scores_F2 += [
    0.09312933125476712,
    0.23562706897099497,
    0.5762427881544481
]

# F3 (3D)
queries_F3 += [
    [0.1856, 0.984454, 0.678992],
    [0.1856, 0.984454, 0.678992],
    [0.1456, 0.944454, 0.638992]
]
scores_F3 += [
    -0.15237863681921446,
    -0.14698930107649133,
    -0.01882082499050317
]

# F4 (4D)
queries_F4 += [
    [0.999363, 0.460618, 0.652742, 0.593562],
    [0.999363, 0.460618, 0.652742, 0.593562],
    [0.999363, 0.440618, 0.632742, 0.573562]
]
scores_F4 += [
    -22.018089179607596,
    -22.018089179607596,
    -20.33439467922256
]

# F5 (4D)
queries_F5 += [
    [0.270636, 0.824504, 0.580542, 0.368022],
    [0.270636, 0.824504, 0.580542, 0.368022],
    [0.290636, 0.824504, 0.580542, 0.368022]
]
scores_F5 += [
    12.979330536136526,
    12.979330536136526,
    46.39060566194315
]

# F6 (5D)
queries_F6 += [
    [0.384874, 0.441523, 0.115493, 0.592513, 0.477029],
    [0.384874, 0.441523, 0.115493, 0.592513, 0.477029],
    [0.433609, 0.430291, 0.199161, 0.583735, 0.454637]
]
scores_F6 += [
    -1.1838236564336748,
    -1.263310156017569,
    -1.0581335024921092
]

# F7 (6D)
queries_F7 += [
    [0.94976, 0.670137, 0.267596, 0.089014, 0.282662, 0.021611],
    [0.94976, 0.670137, 0.267596, 0.089014, 0.282662, 0.021611],
    [0.94976, 0.670137, 0.267596, 0.089014, 0.282662, 0.021611]
]
scores_F7 += [
    0.03872554610994976,
    0.03872554610994976,
    1.5474127567590061
]

# F8 (8D)
queries_F8 += [
    [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776],
    [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776],
    [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776]
]
scores_F8 += [
    7.3910313497699,
    7.3910313497699,
    8.316440887821
]

# -------------------------------
# Final Sanity Check (optional)
# -------------------------------
print(f"F1 queries: {len(queries_F1)}, scores: {len(scores_F1)}")
print(f"F2 queries: {len(queries_F2)}, scores: {len(scores_F2)}")
print(f"F3 queries: {len(queries_F3)}, scores: {len(scores_F3)}")
print(f"F4 queries: {len(queries_F4)}, scores: {len(scores_F4)}")
print(f"F5 queries: {len(queries_F5)}, scores: {len(scores_F5)}")
print(f"F6 queries: {len(queries_F6)}, scores: {len(scores_F6)}")
print(f"F7 queries: {len(queries_F7)}, scores: {len(scores_F7)}")
print(f"F8 queries: {len(queries_F8)}, scores: {len(scores_F8)}")


Bayesian Optimization per function

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from scipy.stats import norm, qmc
import numpy as np

def suggest_ei(X_hist, y_hist, n_suggest=2, samples=2000):
    # Fit GP
    gp = GaussianProcessRegressor(kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True)
    gp.fit(X_hist, y_hist)
    # Candidates
    sampler = qmc.LatinHypercube(d=X_hist.shape[1])
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

# Example for F3:
X_hist_f3 = np.array(queries_F3)
y_hist_f3 = np.array(scores_F3)
suggestions_f3 = suggest_ei(X_hist_f3, y_hist_f3, n_suggest=2)
print("Suggested next queries for F3:", suggestions_f3)

# You can repeat this for F2, F4, F6, F8:
# X_hist_f2 = np.array(queries_F2)
# y_hist_f2 = np.array(scores_F2)
# suggestions_f2 = suggest_ei(X_hist_f2, y_hist_f2, n_suggest=2)


In [None]:
!pip install cma


In [None]:
import cma

def suggest_cma_es(fitness_function, dim, x0=None, sigma0=0.3, popsize=8, n_generations=20):
    if x0 is None:
        x0 = np.random.uniform(0, 1, dim)
    es = cma.CMAEvolutionStrategy(x0.tolist(), sigma0, {'popsize': popsize, 'bounds': [0, 1]})
    es.optimize(fitness_function, iterations=n_generations)
    return es.result.xbest

# Example for F8
X_hist_f8 = np.array(queries_F8)
y_hist_f8 = np.array(scores_F8)

# Fit GP surrogate for F8
gp = GaussianProcessRegressor(kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True)
gp.fit(X_hist_f8, y_hist_f8)

# CMA-ES surrogate-driven optimizer
def surrogate_neg(x):
    return -gp.predict(np.array(x).reshape(1, -1))[0]

best_cma_f8 = suggest_cma_es(surrogate_neg, dim=X_hist_f8.shape[1])
print("Suggested next query for F8 (CMA-ES):", best_cma_f8)


LHS samples for F1 and F7

In [None]:
from scipy.stats import qmc

def suggest_lhs(dim, n_suggest=5):
    sampler = qmc.LatinHypercube(d=dim)
    samples = sampler.random(n_suggest)
    return samples

# Example for F1 and F7
lhs_f1 = suggest_lhs(dim=len(queries_F1[0]), n_suggest=5)
lhs_f7 = suggest_lhs(dim=len(queries_F7[0]), n_suggest=5)

print("LHS samples for F1:", lhs_f1)
print("LHS samples for F7:", lhs_f7)


In [None]:
 LOOP FULL CODE

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from scipy.stats import norm, qmc
import cma
import numpy as np

# --- Helper Functions ---

def suggest_ei(X_hist, y_hist, n_suggest=1, samples=2000):
    gp = GaussianProcessRegressor(kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=X_hist.shape[1])
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

def suggest_cma_es(fitness_function, dim, x0=None, sigma0=0.3, popsize=8, n_generations=20):
    if x0 is None:
        x0 = np.random.uniform(0, 1, dim)
    es = cma.CMAEvolutionStrategy(x0.tolist(), sigma0, {'popsize': popsize, 'bounds': [0, 1]})
    es.optimize(fitness_function, iterations=n_generations)
    return es.result.xbest

def suggest_lhs(dim, n_suggest=5):
    sampler = qmc.LatinHypercube(d=dim)
    samples = sampler.random(n_suggest)
    return samples

def format_query(vector):
    return '-'.join([f"{v:.6f}" for v in vector])

# --- Strategy Allocation per Function ---

final_queries = []

# F1 - LHS only (plateau)
lhs_f1 = suggest_lhs(dim=len(queries_F1[0]), n_suggest=5)
final_queries.extend([format_query(q) for q in lhs_f1])

# F2 - BO
suggestions_f2 = suggest_ei(np.array(queries_F2), np.array(scores_F2), n_suggest=1)
final_queries.append(format_query(suggestions_f2[0]))

# F3 - BO
suggestions_f3 = suggest_ei(np.array(queries_F3), np.array(scores_F3), n_suggest=1)
final_queries.append(format_query(suggestions_f3[0]))

# F4 - BO
suggestions_f4 = suggest_ei(np.array(queries_F4), np.array(scores_F4), n_suggest=1)
final_queries.append(format_query(suggestions_f4[0]))

# F5 - Skip (already optimized enough)

# F6 - BO + CMA-ES
suggestions_f6 = suggest_ei(np.array(queries_F6), np.array(scores_F6), n_suggest=1)
gp_f6 = GaussianProcessRegressor(kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True)
gp_f6.fit(np.array(queries_F6), np.array(scores_F6))
best_cma_f6 = suggest_cma_es(lambda x: -gp_f6.predict(np.array(x).reshape(1, -1))[0], dim=len(queries_F6[0]))
final_queries.append(format_query(suggestions_f6[0]))
final_queries.append(format_query(best_cma_f6))

# F7 - LHS only (plateau)
lhs_f7 = suggest_lhs(dim=len(queries_F7[0]), n_suggest=5)
final_queries.extend([format_query(q) for q in lhs_f7])

# F8 - BO + CMA-ES + LHS
suggestions_f8 = suggest_ei(np.array(queries_F8), np.array(scores_F8), n_suggest=1)
gp_f8 = GaussianProcessRegressor(kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True)
gp_f8.fit(np.array(queries_F8), np.array(scores_F8))
best_cma_f8 = suggest_cma_es(lambda x: -gp_f8.predict(np.array(x).reshape(1, -1))[0], dim=len(queries_F8[0]))
lhs_f8 = suggest_lhs(dim=len(queries_F8[0]), n_suggest=3)
final_queries.append(format_query(suggestions_f8[0]))
final_queries.append(format_query(best_cma_f8))
final_queries.extend([format_query(q) for q in lhs_f8])

# --- Export Submission File ---

with open('formatted_submission_round11.txt', 'w') as f:
    for q in final_queries:
        f.write(q + '\n')

print(f"✅ Submission file 'formatted_submission_round11.txt' created with {len(final_queries)} queries.")


In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from scipy.stats import norm, qmc
import cma

# -----------------------------------------
# Load ALL Queries and Scores (Rounds 2–10)
# -----------------------------------------
queries_F1 = [
    [0.944034, 0.625117], [0.07314, 0.227123], [0.944034, 0.645117], [0.07314, 0.227123]
]
scores_F1 = [
    1.3276592050304897e-69, 4.6015626100136584e-113, 6.933250699386668e-70, -1.560646704467778e-117
]

queries_F2 = [
    [0.768771, 0.105777], [0.768771, 0.105777], [0.768771, 0.105777], [0.768771, 0.105777], [0.768771, 0.105777]
]
scores_F2 = [
    0.27308818432797793, 0.33828687086483444, 0.09312933125476712, 0.23562706897099497, 0.5762427881544481
]

queries_F3 = [
    [0.1456, 0.944454, 0.638992], [0.1856, 0.984454, 0.678992], [0.1856, 0.984454, 0.678992], [0.1456, 0.944454, 0.638992], [0.1856, 0.984454, 0.678992]
]
scores_F3 = [
    -0.10726379301566011, -0.05132522888449367, -0.15237863681921446, -0.14698930107649133, -0.01882082499050317
]

queries_F4 = [
    [0.999363, 0.440618, 0.632742, 0.573562], [0.999363, 0.460618, 0.652742, 0.593562], [0.999363, 0.460618, 0.652742, 0.593562], [0.999363, 0.440618, 0.632742, 0.573562], [0.999363, 0.460618, 0.652742, 0.593562]
]
scores_F4 = [
    -20.949420794091107, -4.55517117590318, -22.018089179607596, -22.018089179607596, -20.33439467922256
]

queries_F5 = [
    [0.290636, 0.824504, 0.580542, 0.368022], [0.270636, 0.824504, 0.580542, 0.368022], [0.270636, 0.824504, 0.580542, 0.368022], [0.290636, 0.824504, 0.580542, 0.368022], [0.270636, 0.824504, 0.580542, 0.368022]
]
scores_F5 = [
    13.377756436400528, 1105.1479642901527, 12.979330536136526, 12.979330536136526, 46.39060566194315
]

queries_F6 = [
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029], [0.384874, 0.441523, 0.115493, 0.592513, 0.477029], [0.384874, 0.441523, 0.115493, 0.592513, 0.477029], [0.433609, 0.430291, 0.199161, 0.583735, 0.454637], [0.384874, 0.441523, 0.115493, 0.592513, 0.477029]
]
scores_F6 = [
    -1.1857272137575898, -0.5399068214874156, -1.1838236564336748, -1.263310156017569, -1.0581335024921092
]

queries_F7 = [
    [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193], [0.94976, 0.670137, 0.267596, 0.089014, 0.282662, 0.021611], [0.94976, 0.670137, 0.267596, 0.089014, 0.282662, 0.021611], [0.94976, 0.670137, 0.267596, 0.089014, 0.282662, 0.021611], [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193]
]
scores_F7 = [
    0.023506147714804363, 2.090579827192728, 0.03872554610994976, 0.03872554610994976, 1.5474127567590061
]

queries_F8 = [
    [0.25682, 0.180489, 0.295402, 0.830171, 0.672892, 0.287181, 0.913031, 0.782776], [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776], [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776], [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776], [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776]
]
scores_F8 = [
    8.3316420728934, 9.5659163853995, 7.3910313497699, 7.3910313497699, 8.316440887821
]

# -----------------------------------------
# Smart Loop (BO + CMA-ES + LHS)
# -----------------------------------------
def suggest_ei(X_hist, y_hist, n_suggest=1, samples=2000):
    gp = GaussianProcessRegressor(kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=X_hist.shape[1])
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

def suggest_cma_es(fitness_function, dim, x0=None, sigma0=0.3, popsize=8, n_generations=20):
    if x0 is None:
        x0 = np.random.uniform(0, 1, dim)
    es = cma.CMAEvolutionStrategy(x0.tolist(), sigma0, {'popsize': popsize, 'bounds': [0, 1]})
    es.optimize(fitness_function, iterations=n_generations)
    return es.result.xbest

def suggest_lhs(dim, n_suggest=5):
    sampler = qmc.LatinHypercube(d=dim)
    samples = sampler.random(n_suggest)
    return samples

def format_query(vector):
    return '-'.join([f"{v:.6f}" for v in vector])

final_queries = []

lhs_f1 = suggest_lhs(dim=len(queries_F1[0]), n_suggest=5)
final_queries.extend([format_query(q) for q in lhs_f1])

suggestions_f2 = suggest_ei(np.array(queries_F2), np.array(scores_F2), n_suggest=1)
final_queries.append(format_query(suggestions_f2[0]))

suggestions_f3 = suggest_ei(np.array(queries_F3), np.array(scores_F3), n_suggest=1)
final_queries.append(format_query(suggestions_f3[0]))

suggestions_f4 = suggest_ei(np.array(queries_F4), np.array(scores_F4), n_suggest=1)
final_queries.append(format_query(suggestions_f4[0]))

suggestions_f6 = suggest_ei(np.array(queries_F6), np.array(scores_F6), n_suggest=1)
gp_f6 = GaussianProcessRegressor(kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True)
gp_f6.fit(np.array(queries_F6), np.array(scores_F6))
best_cma_f6 = suggest_cma_es(lambda x: -gp_f6.predict(np.array(x).reshape(1, -1))[0], dim=len(queries_F6[0]))
final_queries.append(format_query(suggestions_f6[0]))
final_queries.append(format_query(best_cma_f6))

lhs_f7 = suggest_lhs(dim=len(queries_F7[0]), n_suggest=5)
final_queries.extend([format_query(q) for q in lhs_f7])

suggestions_f8 = suggest_ei(np.array(queries_F8), np.array(scores_F8), n_suggest=1)
gp_f8 = GaussianProcessRegressor(kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True)
gp_f8.fit(np.array(queries_F8), np.array(scores_F8))
best_cma_f8 = suggest_cma_es(lambda x: -gp_f8.predict(np.array(x).reshape(1, -1))[0], dim=len(queries_F8[0]))
lhs_f8 = suggest_lhs(dim=len(queries_F8[0]), n_suggest=3)
final_queries.append(format_query(suggestions_f8[0]))
final_queries.append(format_query(best_cma_f8))
final_queries.extend([format_query(q) for q in lhs_f8])

# Display all queries
final_queries


# Second Initial Data Integration.

## Round 10b - Integration of Second Initial Data  
In Round 10b, we integrated the new batch of initial queries provided by the capstone team into our Round 10 pipeline.  
This allows us to retrain our surrogates on an expanded dataset and test if the newly provided data changes our proposal engine outputs.


In [None]:
# 1. Upload your zipped initial_data2.zip
from google.colab import files
uploaded = files.upload()



Saving IMP-PCMLAI-capstone-second-set-of-initial_data.zip to IMP-PCMLAI-capstone-second-set-of-initial_data (1).zip


In [None]:
import zipfile

with zipfile.ZipFile('IMP-PCMLAI-capstone-second-set-of-initial_data.zip', 'r') as zip_ref:
    zip_ref.extractall('initial_data2')

print("✅ Folder extracted as 'initial_data2'.")


✅ Folder extracted as 'initial_data2'.


In [None]:
import os

for root, dirs, files in os.walk("initial_data2"):
    print(f"📂 {root}")
    for file in files:
        print(f"   └── {file}")


📂 initial_data2
📂 initial_data2/__MACOSX
   └── ._initial_data2
📂 initial_data2/__MACOSX/initial_data2
   └── ._function_3
   └── ._function_5
   └── ._function_4
   └── ._function_7
   └── ._function_6
   └── ._function_2
   └── ._function_8
   └── ._function_1
📂 initial_data2/initial_data2
📂 initial_data2/initial_data2/function_8
   └── initial_outputs.npy
   └── initial_inputs.npy
📂 initial_data2/initial_data2/function_5
   └── initial_outputs.npy
   └── initial_inputs.npy
📂 initial_data2/initial_data2/function_7
   └── initial_outputs.npy
   └── initial_inputs.npy
📂 initial_data2/initial_data2/function_4
   └── initial_outputs.npy
   └── initial_inputs.npy
📂 initial_data2/initial_data2/function_1
   └── initial_outputs.npy
   └── initial_inputs.npy
📂 initial_data2/initial_data2/function_6
   └── initial_outputs.npy
   └── initial_inputs.npy
📂 initial_data2/initial_data2/function_3
   └── initial_outputs.npy
   └── initial_inputs.npy
📂 initial_data2/initial_data2/function_2
   └── i

In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from scipy.stats import norm, qmc
import cma

# --- Expected dimensions per function ---
expected_dims = {'F1': 2, 'F2': 2, 'F3': 3, 'F4': 4, 'F5': 4, 'F6': 5, 'F7': 6, 'F8': 8}

# --- Step 1: Reload clean second initial data ONLY ---
root_folder = 'initial_data2/initial_data2'
queries = {}
scores = {}

for f_index in range(1, 9):
    f_name = f'F{f_index}'
    f_folder = f'{root_folder}/function_{f_index}'

    # Load clean data
    inputs = np.load(f'{f_folder}/initial_inputs.npy')
    outputs = np.load(f'{f_folder}/initial_outputs.npy')

    # Store clean in dictionaries
    queries[f_name] = inputs
    scores[f_name] = outputs

    # Check dimensions
    assert inputs.shape[1] == expected_dims[f_name], f"Dimension mismatch in {f_name}"

print("✅ Clean isolated datasets loaded and verified per function.")

# --- Step 2: Safe query suggestion functions ---
def suggest_ei_safe(dim, X_hist, y_hist, n_suggest=1, samples=10000):
    gp = GaussianProcessRegressor(kernel=Matern(nu=1.5) + WhiteKernel(noise_level=1e-6), normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=dim)
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

def suggest_lhs(dim, n_suggest=1):
    sampler = qmc.LatinHypercube(d=dim)
    samples = sampler.random(n_suggest)
    return samples

def format_query(vector):
    return '-'.join([f"{v:.6f}" for v in vector])

# --- Step 3: Generate clean Round 10b submission ---
final_queries_10b_clean = []

# F1 and F7 - Pure LHS (correct dimensions)
for f in ['F1', 'F7']:
    lhs = suggest_lhs(dim=expected_dims[f], n_suggest=1)
    final_queries_10b_clean.append(format_query(lhs[0]))

# F2–F5 - EI (safe dimension enforcement)
for f in ['F2', 'F3', 'F4', 'F5']:
    dim = expected_dims[f]
    suggestion = suggest_ei_safe(dim, queries[f], scores[f], n_suggest=1)
    final_queries_10b_clean.append(format_query(suggestion[0]))

# F6 & F8 - EI (safe dimension enforcement)
for f in ['F6', 'F8']:
    dim = expected_dims[f]
    suggestion = suggest_ei_safe(dim, queries[f], scores[f], n_suggest=1)
    final_queries_10b_clean.append(format_query(suggestion[0]))

# Confirm output
print("✅ Clean Round 10b submission (1 per function F1–F8):")
for i, q in enumerate(final_queries_10b_clean):
    print(f"F{i+1}: {q}")

# --- Step 4: Save clean submission ---
submission_file_10b_clean = 'formatted_submission_round10b_clean.txt'
with open(submission_file_10b_clean, 'w') as f:
    for line in final_queries_10b_clean:
        f.write(line + '\n')

print(f"\n✅ Clean and dimension-forced submission file created: {submission_file_10b_clean}")



✅ Clean isolated datasets loaded and verified per function.




✅ Clean Round 10b submission (1 per function F1–F8):
F1: 0.154736-0.580622
F2: 0.148799-0.233206-0.942751-0.737382-0.076690-0.333501
F3: 0.309781-0.054693
F4: 0.858120-0.694383-0.741292
F5: 0.346940-0.417577-0.354331-0.507312
F6: 0.152870-0.636437-0.526862-0.752732
F7: 0.987589-0.643668-0.575757-0.721036-0.569814
F8: 0.024319-0.254796-0.136871-0.124977-0.454085-0.548966-0.104045-0.528358

✅ Clean and dimension-forced submission file created: formatted_submission_round10b_clean.txt




In [None]:
for f_index in range(1, 9):
    f_folder = f'initial_data2/initial_data2/function_{f_index}'
    inputs = np.load(f'{f_folder}/initial_inputs.npy')
    print(f"F{f_index} inputs shape: {inputs.shape}")


F1 inputs shape: (20, 2)
F2 inputs shape: (10, 2)
F3 inputs shape: (15, 3)
F4 inputs shape: (30, 4)
F5 inputs shape: (20, 4)
F6 inputs shape: (20, 5)
F7 inputs shape: (30, 6)
F8 inputs shape: (40, 8)


In [None]:
# Rerun this to recreate your file from final_queries_10b_clean
with open('formatted_submission_round10b_clean.txt', 'w') as f:
    for line in final_queries_10b_clean:
        f.write(line + '\n')

print("✅ Clean submission file recreated: formatted_submission_round10b_clean.txt")


✅ Clean submission file recreated: formatted_submission_round10b_clean.txt


In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from scipy.stats import norm, qmc

# --- Expected dimensions per function ---
expected_dims = {'F1': 2, 'F2': 2, 'F3': 3, 'F4': 4, 'F5': 4, 'F6': 5, 'F7': 6, 'F8': 8}

# --- Load clean data ---
root_folder = 'initial_data2/initial_data2'
queries = {}
scores = {}

for f_index in range(1, 9):
    f_name = f'F{f_index}'
    f_folder = f'{root_folder}/function_{f_index}'
    inputs = np.load(f'{f_folder}/initial_inputs.npy')
    outputs = np.load(f'{f_folder}/initial_outputs.npy')
    queries[f_name] = inputs
    scores[f_name] = outputs
    assert inputs.shape[1] == expected_dims[f_name], f"Dimension mismatch in {f_name}"

print("✅ Clean isolated datasets loaded and verified per function.\n")

# --- Safe suggestion functions ---
def suggest_ei_safe(dim, X_hist, y_hist, n_suggest=1, samples=10000):
    gp = GaussianProcessRegressor(kernel=Matern(nu=1.5) + WhiteKernel(noise_level=1e-6), normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=dim)
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

def suggest_lhs(dim, n_suggest=1):
    sampler = qmc.LatinHypercube(d=dim)
    samples = sampler.random(n_suggest)
    return samples

def format_query(vector):
    return '-'.join([f"{v:.6f}" for v in vector])

# --- Generate queries ---
final_queries_10b_clean = []

for f in ['F1', 'F7']:
    lhs = suggest_lhs(dim=expected_dims[f], n_suggest=1)
    final_queries_10b_clean.append(format_query(lhs[0]))

for f in ['F2', 'F3', 'F4', 'F5', 'F6', 'F8']:
    dim = expected_dims[f]
    suggestion = suggest_ei_safe(dim, queries[f], scores[f], n_suggest=1)
    final_queries_10b_clean.append(format_query(suggestion[0]))

# --- Save ---
submission_file_10b_clean = 'formatted_submission_round10b_clean.txt'
with open(submission_file_10b_clean, 'w') as f:
    for line in final_queries_10b_clean:
        f.write(line + '\n')

print(f"✅ Clean and dimension-forced submission file created: {submission_file_10b_clean}\n")

# --- Validate and print ---
def validate_submission_format_and_print(filename):
    valid = True
    with open(filename, 'r') as f:
        lines = [line.strip() for line in f.readlines()]

    if len(lines) != 8:
        print(f"❌ ERROR: Submission file should have exactly 8 lines, found {len(lines)}.")
        valid = False

    for i, line in enumerate(lines):
        num_values = line.count('-') + 1
        expected = list(expected_dims.values())[i]
        print(f"F{i+1}: {line}  -->  {num_values} values (Expected: {expected})")
        if num_values != expected:
            print(f"❌ ERROR: Line {i+1} (F{i+1}) has wrong number of values.")
            valid = False

    if valid:
        print("\n✅ Submission file passed all strict validation checks (8 lines, correct dimensions, clean format).")
    else:
        print("\n❌ Issues found. Please correct the file before submission.")

# Run validation
validate_submission_format_and_print(submission_file_10b_clean)


✅ Clean isolated datasets loaded and verified per function.





✅ Clean and dimension-forced submission file created: formatted_submission_round10b_clean.txt

F1: 0.026652-0.513638  -->  2 values (Expected: 2)
F2: 0.246985-0.760921-0.909485-0.444078-0.877780-0.529697  -->  6 values (Expected: 2)
❌ ERROR: Line 2 (F2) has wrong number of values.
F3: 0.736713-0.154028  -->  2 values (Expected: 3)
❌ ERROR: Line 3 (F3) has wrong number of values.
F4: 0.192789-0.044596-0.529195  -->  3 values (Expected: 4)
❌ ERROR: Line 4 (F4) has wrong number of values.
F5: 0.272172-0.418113-0.345078-0.505945  -->  4 values (Expected: 4)
F6: 0.423388-0.591971-0.547197-0.567750  -->  4 values (Expected: 5)
❌ ERROR: Line 6 (F6) has wrong number of values.
F7: 0.067956-0.357918-0.447384-0.963809-0.924844  -->  5 values (Expected: 6)
❌ ERROR: Line 7 (F7) has wrong number of values.
F8: 0.063103-0.075034-0.147500-0.059923-0.679779-0.486507-0.016103-0.520590  -->  8 values (Expected: 8)

❌ Issues found. Please correct the file before submission.




In [None]:
# Hardcoded clean block with enforced correct dimensions per function

final_queries_10b_clean = []

# F1 - LHS
lhs_f1 = suggest_lhs(2, 1)
final_queries_10b_clean.append(format_query(lhs_f1[0]))

# F2 - EI Safe
ei_f2 = suggest_ei_safe(2, queries['F2'], scores['F2'], 1)
final_queries_10b_clean.append(format_query(ei_f2[0]))

# F3 - EI Safe
ei_f3 = suggest_ei_safe(3, queries['F3'], scores['F3'], 1)
final_queries_10b_clean.append(format_query(ei_f3[0]))

# F4 - EI Safe
ei_f4 = suggest_ei_safe(4, queries['F4'], scores['F4'], 1)
final_queries_10b_clean.append(format_query(ei_f4[0]))

# F5 - EI Safe
ei_f5 = suggest_ei_safe(4, queries['F5'], scores['F5'], 1)
final_queries_10b_clean.append(format_query(ei_f5[0]))

# F6 - EI Safe
ei_f6 = suggest_ei_safe(5, queries['F6'], scores['F6'], 1)
final_queries_10b_clean.append(format_query(ei_f6[0]))

# F7 - LHS
lhs_f7 = suggest_lhs(6, 1)
final_queries_10b_clean.append(format_query(lhs_f7[0]))

# F8 - EI Safe
ei_f8 = suggest_ei_safe(8, queries['F8'], scores['F8'], 1)
final_queries_10b_clean.append(format_query(ei_f8[0]))

# --- Save ---
submission_file_10b_clean = 'formatted_submission_round10b_clean.txt'
with open(submission_file_10b_clean, 'w') as f:
    for line in final_queries_10b_clean:
        f.write(line + '\n')

print("✅ Submission file recreated.")

# --- Validate and print ---
validate_submission_format_and_print(submission_file_10b_clean)




✅ Submission file recreated.
F1: 0.299067-0.165444  -->  2 values (Expected: 2)
F2: 0.516638-0.675606  -->  2 values (Expected: 2)
F3: 0.545939-0.026068-0.337518  -->  3 values (Expected: 3)
F4: 0.310098-0.365623-0.397439-0.511497  -->  4 values (Expected: 4)
F5: 0.697139-0.863625-0.802958-0.004318  -->  4 values (Expected: 4)
F6: 0.572663-0.944633-0.098631-0.990648-0.022670  -->  5 values (Expected: 5)
F7: 0.812471-0.064564-0.569280-0.660293-0.814823-0.906764  -->  6 values (Expected: 6)
F8: 0.094126-0.103311-0.031557-0.156631-0.600022-0.417871-0.077991-0.839494  -->  8 values (Expected: 8)

✅ Submission file passed all strict validation checks (8 lines, correct dimensions, clean format).




In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from scipy.stats import norm, qmc

# --- Expected dimensions per function ---
expected_dims = {'F1': 2, 'F2': 2, 'F3': 3, 'F4': 4, 'F5': 4, 'F6': 5, 'F7': 6, 'F8': 8}

# --- Step 1: Load previous rounds queries and scores (ensure you load your own data) ---
# Replace the following placeholders with your own lists from past rounds:
# Example: queries_F1 = [[...], [...], ...]   scores_F1 = [score1, score2, ...]
# You must ensure these are the FULL cumulative historical queries and scores.

# For demonstration only, let's create empty placeholders:
queries_F = {f'F{i}': [] for i in range(1, 9)}
scores_F = {f'F{i}': [] for i in range(1, 9)}

# --- Step 2: Load and append new initial data ---
root_folder = 'initial_data2/initial_data2'

for f_index in range(1, 9):
    f_name = f'F{f_index}'
    f_folder = f'{root_folder}/function_{f_index}'
    new_inputs = np.load(f'{f_folder}/initial_inputs.npy')
    new_outputs = np.load(f'{f_folder}/initial_outputs.npy')
    queries_F[f_name] += new_inputs.tolist()
    scores_F[f_name] += new_outputs.tolist()
    assert np.array(queries_F[f_name]).shape[1] == expected_dims[f_name], f"Dimension mismatch in {f_name}"

print("✅ All rounds and new data successfully integrated per function.\n")

# --- Step 3: Safe suggestion functions ---
def suggest_ei_safe(dim, X_hist, y_hist, n_suggest=1, samples=10000):
    gp = GaussianProcessRegressor(kernel=Matern(nu=1.5) + WhiteKernel(noise_level=1e-6), normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=dim)
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

def suggest_lhs(dim, n_suggest=1):
    sampler = qmc.LatinHypercube(d=dim)
    samples = sampler.random(n_suggest)
    return samples

def format_query(vector):
    return '-'.join([f"{v:.6f}" for v in vector])

# --- Step 4: Generate queries ---
final_queries_10b_clean = []

# F1 and F7 - LHS only
lhs_f1 = suggest_lhs(2, 1)
final_queries_10b_clean.append(format_query(lhs_f1[0]))

lhs_f7 = suggest_lhs(6, 1)
final_queries_10b_clean.append(format_query(lhs_f7[0]))

# F2–F5, F6, F8 - EI after retraining on full history
for f in ['F2', 'F3', 'F4', 'F5', 'F6', 'F8']:
    dim = expected_dims[f]
    X_hist = np.array(queries_F[f])
    y_hist = np.array(scores_F[f])
    suggestion = suggest_ei_safe(dim, X_hist, y_hist, n_suggest=1)
    final_queries_10b_clean.append(format_query(suggestion[0]))

# --- Step 5: Save submission ---
submission_file_10b_clean = 'formatted_submission_round10b_clean.txt'
with open(submission_file_10b_clean, 'w') as f:
    for line in final_queries_10b_clean:
        f.write(line + '\n')

print("✅ Submission file created: formatted_submission_round10b_clean.txt\n")

# --- Step 6: Validate and print ---
def validate_submission_format_and_print(filename):
    valid = True
    with open(filename, 'r') as f:
        lines = [line.strip() for line in f.readlines()]

    if len(lines) != 8:
        print(f"❌ ERROR: Submission file should have exactly 8 lines, found {len(lines)}.")
        valid = False

    for i, line in enumerate(lines):
        num_values = line.count('-') + 1
        expected = list(expected_dims.values())[i]
        print(f"F{i+1}: {line}  -->  {num_values} values (Expected: {expected})")
        if num_values != expected:
            print(f"❌ ERROR: Line {i+1} (F{i+1}) has wrong number of values.")
            valid = False

    if valid:
        print("\n✅ Submission file passed all strict validation checks (8 lines, correct dimensions, clean format).")
    else:
        print("\n❌ Issues found. Please correct the file before submission.")

validate_submission_format_and_print(submission_file_10b_clean)


✅ All rounds and new data successfully integrated per function.





✅ Submission file created: formatted_submission_round10b_clean.txt

F1: 0.783216-0.655134  -->  2 values (Expected: 2)
F2: 0.518400-0.952677-0.681280-0.277810-0.865838-0.030272  -->  6 values (Expected: 2)
❌ ERROR: Line 2 (F2) has wrong number of values.
F3: 0.565266-0.548775  -->  2 values (Expected: 3)
❌ ERROR: Line 3 (F3) has wrong number of values.
F4: 0.195510-0.196283-0.621999  -->  3 values (Expected: 4)
❌ ERROR: Line 4 (F4) has wrong number of values.
F5: 0.363661-0.406324-0.375193-0.431906  -->  4 values (Expected: 4)
F6: 0.252818-0.946147-0.487879-0.300221  -->  4 values (Expected: 5)
❌ ERROR: Line 6 (F6) has wrong number of values.
F7: 0.627421-0.706013-0.895323-0.701697-0.114611  -->  5 values (Expected: 6)
❌ ERROR: Line 7 (F7) has wrong number of values.
F8: 0.048262-0.042542-0.089672-0.125161-0.783651-0.436627-0.181279-0.416319  -->  8 values (Expected: 8)

❌ Issues found. Please correct the file before submission.


In [None]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from scipy.stats import norm, qmc

# --- Expected dimensions ---
expected_dims = {'F1': 2, 'F2': 2, 'F3': 3, 'F4': 4, 'F5': 4, 'F6': 5, 'F7': 6, 'F8': 8}

# --- Step 1: Load only clean, known data ---
root_folder = 'initial_data2/initial_data2'
clean_data_per_function = {}

for f_index in range(1, 9):
    f_name = f'F{f_index}'
    f_folder = f'{root_folder}/function_{f_index}'
    inputs = np.load(f'{f_folder}/initial_inputs.npy')
    outputs = np.load(f'{f_folder}/initial_outputs.npy')
    # Clean dimension check
    assert inputs.shape[1] == expected_dims[f_name], f"Dimension mismatch in {f_name}"
    clean_data_per_function[f_name] = {'queries': inputs, 'scores': outputs}

print("✅ Loaded clean, isolated datasets per function.\n")

# --- Suggestion helpers ---
def suggest_ei_safe(dim, X_hist, y_hist, n_suggest=1, samples=10000):
    assert X_hist.shape[1] == dim, f"Dataset mismatch inside suggest_ei_safe (expected {dim})"
    gp = GaussianProcessRegressor(kernel=Matern(nu=1.5) + WhiteKernel(noise_level=1e-6), normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=dim)
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

def suggest_lhs(dim, n_suggest=1):
    sampler = qmc.LatinHypercube(d=dim)
    samples = sampler.random(n_suggest)
    return samples

def format_query(vector):
    return '-'.join([f"{v:.6f}" for v in vector])

# --- Step 2: Generate per function safely ---
final_queries_10b_clean = []

# F1 - Pure LHS
f1_query = suggest_lhs(2, 1)[0]
final_queries_10b_clean.append(format_query(f1_query))

# F2 - EI on clean data
f2_query = suggest_ei_safe(2, clean_data_per_function['F2']['queries'], clean_data_per_function['F2']['scores'], 1)[0]
final_queries_10b_clean.append(format_query(f2_query))

# F3
f3_query = suggest_ei_safe(3, clean_data_per_function['F3']['queries'], clean_data_per_function['F3']['scores'], 1)[0]
final_queries_10b_clean.append(format_query(f3_query))

# F4
f4_query = suggest_ei_safe(4, clean_data_per_function['F4']['queries'], clean_data_per_function['F4']['scores'], 1)[0]
final_queries_10b_clean.append(format_query(f4_query))

# F5
f5_query = suggest_ei_safe(4, clean_data_per_function['F5']['queries'], clean_data_per_function['F5']['scores'], 1)[0]
final_queries_10b_clean.append(format_query(f5_query))

# F6
f6_query = suggest_ei_safe(5, clean_data_per_function['F6']['queries'], clean_data_per_function['F6']['scores'], 1)[0]
final_queries_10b_clean.append(format_query(f6_query))

# F7 - Pure LHS
f7_query = suggest_lhs(6, 1)[0]
final_queries_10b_clean.append(format_query(f7_query))

# F8
f8_query = suggest_ei_safe(8, clean_data_per_function['F8']['queries'], clean_data_per_function['F8']['scores'], 1)[0]
final_queries_10b_clean.append(format_query(f8_query))

# --- Step 3: Save ---
submission_file_10b_clean = 'formatted_submission_round10b_clean.txt'
with open(submission_file_10b_clean, 'w') as f:
    for line in final_queries_10b_clean:
        f.write(line + '\n')

print("✅ Submission file created: formatted_submission_round10b_clean.txt\n")

# --- Step 4: Validate and print ---
def validate_submission_format_and_print(filename):
    valid = True
    with open(filename, 'r') as f:
        lines = [line.strip() for line in f.readlines()]

    if len(lines) != 8:
        print(f"❌ ERROR: Submission file should have exactly 8 lines, found {len(lines)}.")
        valid = False

    for i, line in enumerate(lines):
        num_values = line.count('-') + 1
        expected = list(expected_dims.values())[i]
        print(f"F{i+1}: {line}  -->  {num_values} values (Expected: {expected})")
        if num_values != expected:
            print(f"❌ ERROR: Line {i+1} (F{i+1}) has wrong number of values.")
            valid = False

    if valid:
        print("\n✅ Submission file passed all strict validation checks (8 lines, correct dimensions, clean format).")
    else:
        print("\n❌ Issues found. Please correct the file before submission.")

validate_submission_format_and_print(submission_file_10b_clean)


✅ Loaded clean, isolated datasets per function.

✅ Submission file created: formatted_submission_round10b_clean.txt

F1: 0.456851-0.079824  -->  2 values (Expected: 2)
F2: 0.787800-0.035231  -->  2 values (Expected: 2)
F3: 0.321450-0.823208-0.409574  -->  3 values (Expected: 3)
F4: 0.350240-0.366425-0.396955-0.442771  -->  4 values (Expected: 4)
F5: 0.712434-0.449440-0.225710-0.405942  -->  4 values (Expected: 4)
F6: 0.638805-0.693836-0.473247-0.366139-0.482008  -->  5 values (Expected: 5)
F7: 0.538231-0.530784-0.595488-0.620346-0.413495-0.619624  -->  6 values (Expected: 6)
F8: 0.157267-0.128954-0.047746-0.033620-0.520267-0.741193-0.182530-0.458689  -->  8 values (Expected: 8)

✅ Submission file passed all strict validation checks (8 lines, correct dimensions, clean format).




In [None]:
## Round 11 Preparation

**Strategy Type (Planned):** Advanced Iterative Optimization with Final Surrogate Refinement
**Focus (Planned):** Leverage full Round 10 feedback to refine surrogates, enhance EI performance, enforce diversity, and finalize per-function strategies for the last rounds.
**Planned Methods:**
- **Retrain per-function surrogates using data from Rounds 0–10.**
- **Expected Improvement (EI) acquisition and diversity clustering for F2–F6, F8.**
- **Pure exploration with LHS and focused local sampling for F1, F7.**
- **Dimensional validation enforced at every step.**

### ✅ Next steps once Round 10 feedback arrives:
1. **Ingest Round 10 feedback and append to historical datasets per function.**
2. **Retrain surrogates for F1–F8 on the full data.**
3. **Run updated function-specific candidate proposal pipeline.**
4. **Validate and prepare final submission for Round 11.**

---

### Round 11 Observations Placeholder

```python
# Round 11 Observations Placeholder

# F1
queries_F1 = []
scores_F1 = []

# F2
queries_F2 = []
scores_F2 = []

# F3
queries_F3 = []
scores_F3 = []

# F4
queries_F4 = []
scores_F4 = []

# F5
queries_F5 = []
scores_F5 = []

# F6
queries_F6 = []
scores_F6 = []

# F7
queries_F7 = []
scores_F7 = []

# F8
queries_F8 = []
scores_F8 = []


SyntaxError: invalid character '–' (U+2013) (<ipython-input-32-89028bb5baa2>, line 6)

In [None]:
# Round 11 queries and scores
round11_queries = [
    [0.456851, 0.079824],  # F1
    [0.7878, 0.035231],    # F2
    [0.32145, 0.823208, 0.409574],  # F3
    [0.35024, 0.366425, 0.396955, 0.442771],  # F4
    [0.712434, 0.44944, 0.22571, 0.405942],  # F5
    [0.638805, 0.693836, 0.473247, 0.366139, 0.482008],  # F6
    [0.538231, 0.530784, 0.595488, 0.620346, 0.413495, 0.619624],  # F7
    [0.157267, 0.128954, 0.047746, 0.03362, 0.520267, 0.741193, 0.18253, 0.458689]  # F8
]

round11_scores = [
    -0.1459,   # F1
    0.0337,    # F2
    -0.0246,   # F3
    0.2822,    # F4
    13.3302,   # F5
    -1.4710,   # F6
    0.3841,    # F7
    9.8592     # F8
]

# Initialize or update each function's queries and scores
for i in range(8):
    fn = f"F{i+1}"
    globals()[f"queries_{fn}"] = globals().get(f"queries_{fn}", [])
    globals()[f"scores_{fn}"]  = globals().get(f"scores_{fn}", [])
    globals()[f"queries_{fn}"].append(round11_queries[i])
    globals()[f"scores_{fn}"].append(round11_scores[i])

In [None]:
!pip install cma

Collecting cma
  Downloading cma-4.0.0-py3-none-any.whl.metadata (8.0 kB)
Downloading cma-4.0.0-py3-none-any.whl (283 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m283.5/283.5 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cma
Successfully installed cma-4.0.0


In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm, qmc
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel

# 1. Your historical data (queries_F1...F8, scores_F1...F8) should already be loaded and appended
# 2. Define expected dimensions per function
expected_dims = {'F1': 2, 'F2': 2, 'F3': 3, 'F4': 4, 'F5': 4, 'F6': 5, 'F7': 6, 'F8': 8}

# 3. EI acquisition function
def suggest_ei(dim, X_hist, y_hist, n_suggest=1, samples=10000):
    gp = GaussianProcessRegressor(kernel=Matern(nu=1.5) + WhiteKernel(noise_level=1e-6), normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=dim)
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

# 4. Format vectors into the expected dash-separated string
def format_query(vector):
    return '-'.join([f"{v:.6f}" for v in vector])

# 5. Generate submission for Round 11
final_queries_round11 = []

for i in range(1, 9):
    func = f"F{i}"
    dim = expected_dims[func]
    X = np.array(globals()[f'queries_{func}'])
    y = np.array(globals()[f'scores_{func}'])

    if func in ['F1', 'F7']:
        # Pure LHS
        sample = qmc.LatinHypercube(d=dim).random(1)[0]
        final_queries_round11.append(format_query(sample))
    else:
        # EI-guided
        suggestion = suggest_ei(dim, X, y, n_suggest=1)
        final_queries_round11.append(format_query(suggestion[0]))

# 6. Save to text file
with open("formatted_submission_round11.txt", "w") as f:
    for q in final_queries_round11:
        f.write(q + '\n')

# 7. Display for verification
for i, q in enumerate(final_queries_round11):
    print(f"F{i+1}: {q}")

F1: 0.192884-0.926413
F2: 0.005054-0.998337
F3: 0.992182-0.084736-0.999894
F4: 0.966750-0.982004-0.908933-0.873589
F5: 0.064860-0.941199-0.990350-0.927245
F6: 0.006714-0.013039-0.101960-0.868555-0.917215
F7: 0.037117-0.034564-0.896005-0.769715-0.537417-0.940525
F8: 0.982454-0.918714-0.621824-0.920226-0.973726-0.081911-0.932950-0.683774




In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm, qmc
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel

# --- Function metadata ---
expected_dims = {'F1': 2, 'F2': 2, 'F3': 3, 'F4': 4, 'F5': 4, 'F6': 5, 'F7': 6, 'F8': 8}

# --- EI strategy ---
def suggest_ei(dim, X_hist, y_hist, n_suggest=1, samples=10000):
    gp = GaussianProcessRegressor(kernel=Matern(nu=1.5) + WhiteKernel(noise_level=1e-6), normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=dim)
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

# --- Helper to format vector into dash-separated string ---
def format_query(vector):
    return '-'.join([f"{v:.6f}" for v in vector])

# --- Generate Round 11 queries ---
final_queries_round11 = []

for i in range(1, 9):
    func = f"F{i}"
    dim = expected_dims[func]
    X = np.array(globals()[f'queries_{func}'])
    y = np.array(globals()[f'scores_{func}'])

    if func in ['F1', 'F7']:
        # Latin Hypercube Sampling
        sample = qmc.LatinHypercube(d=dim).random(1)[0]
        final_queries_round11.append(format_query(sample))
    else:
        # EI-guided
        suggestion = suggest_ei(dim, X, y, n_suggest=1)
        final_queries_round11.append(format_query(suggestion[0]))

# --- Save submission ---
submission_file = "formatted_submission_round11.txt"
with open(submission_file, "w") as f:
    for q in final_queries_round11:
        f.write(q + '\n')

# --- Validation ---
print(f"✅ Submission file '{submission_file}' created.\n")
valid = True

if len(final_queries_round11) != 8:
    print(f"❌ ERROR: Submission has {len(final_queries_round11)} lines (should be 8)")
    valid = False

for i, line in enumerate(final_queries_round11):
    func = f"F{i+1}"
    dim_expected = expected_dims[func]
    values = line.strip().split("-")
    if len(values) != dim_expected:
        print(f"❌ ERROR: F{i+1} has {len(values)} values (expected {dim_expected}) → {line}")
        valid = False
    else:
        print(f"F{i+1}: {line} ✅ {dim_expected}D")

if valid:
    print("\n✅ All validation checks passed — submission is ready.")
else:
    print("\n❌ Submission has formatting or dimensionality issues. Fix before uploading.")



✅ Submission file 'formatted_submission_round11.txt' created.

F1: 0.091551-0.163071 ✅ 2D
F2: 0.001379-0.994497 ✅ 2D
F3: 0.992244-0.012868-0.880511 ✅ 3D
F4: 0.988164-0.958607-0.987289-0.138835 ✅ 4D
F5: 0.015062-0.991568-0.855815-0.961844 ✅ 4D
F6: 0.194185-0.012652-0.001263-0.973034-0.976119 ✅ 5D
F7: 0.953227-0.191686-0.726186-0.916246-0.443283-0.482373 ✅ 6D
F8: 0.987211-0.999823-0.737463-0.988837-0.760353-0.175770-0.956319-0.602546 ✅ 8D

✅ All validation checks passed — submission is ready.


In [None]:
# === ROUND 12 STRATEGY ===
import numpy as np, scipy.stats.qmc as qmc
from random import seed
seed(42)

def fmt(v): return "-".join([f"{x:.6f}" for x in v])

# === Best vectors (from Round 10) ===
best = {
    "F1": np.array([0.944034, 0.625117]),  # needs explore
    "F2": np.array([0.768771, 0.105777]),  # stable
    "F3": np.array([0.145600, 0.944454, 0.638992]),
    "F4": np.array([0.999363, 0.440618, 0.632742, 0.573562]),
    "F5": np.array([0.290636, 0.824504, 0.580542, 0.368022]),
    "F6": np.array([0.384874, 0.441523, 0.115493, 0.592513, 0.477029]),
    "F7": np.array([0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193]),  # needs explore
    "F8": np.array([0.236820, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776])
}

# === Candidate Generator ===
def micro(vec, dim, delta):
    v = vec.copy(); v[dim] += delta; return v
def random_vec(dim):
    return qmc.LatinHypercube(d=dim).random(1)[0]

# === Candidates per function ===
cands = []

# F1 – 3 randoms + baseline
for _ in range(3):
    cands.append(("F1", random_vec(2)))
cands.append(("F1", best["F1"]))

# F2–F5 – lock in best
for fn in ["F2", "F3", "F4", "F5"]:
    cands.append((fn, best[fn]))

# F6 – baseline + +0.03 x₁ and +0.03 x₅
cands += [
    ("F6", best["F6"]),
    ("F6", micro(best["F6"], 0, +0.03)),
    ("F6", micro(best["F6"], 4, +0.03))
]

# F7 – 3 Latin Hypercube randoms
for _ in range(3):
    cands.append(("F7", random_vec(6)))
cands.append(("F7", best["F7"]))

# F8 – baseline + +0.03 x₁ and +0.03 x₂
cands += [
    ("F8", best["F8"]),
    ("F8", micro(best["F8"], 0, +0.03)),
    ("F8", micro(best["F8"], 1, +0.03))
]

# === Export Files ===
with open("formatted_submission.txt", "w") as f:
    chosen = {}
    for fn, vec in cands:
        if fn not in chosen:
            chosen[fn] = vec
    for i in range(1, 9):
        f.write(fmt(chosen[f"F{i}"]) + "\n")

with open("candidate_review.txt", "w") as f:
    for i in range(1, 9):
        fn = f"F{i}"
        f.write(f"{fn}:\n")
        for idx, (f_tag, vec) in enumerate([c for c in cands if c[0] == fn]):
            f.write(f"cand{idx}: {fmt(vec)}\n")
        f.write("\n")

print("✅ Round 12: Exported formatted_submission.txt and candidate_review.txt")


✅ Round 12: Exported formatted_submission.txt and candidate_review.txt


In [None]:
!pip install -q cma

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/288.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m286.7/288.2 kB[0m [31m8.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m288.2/288.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm, qmc
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
import cma

# --- Configuration ---
expected_dims = {'F1': 2, 'F2': 2, 'F3': 3, 'F4': 4, 'F5': 4, 'F6': 5, 'F7': 6, 'F8': 8}
N_SAMPLES = 10000
K_TOP = 1

# --- Strategy Functions ---
def suggest_ei(X_hist, y_hist, dim, n_suggest=1, samples=N_SAMPLES):
    gp = GaussianProcessRegressor(kernel=Matern(nu=2.5) + WhiteKernel(), normalize_y=True)
    gp.fit(X_hist, y_hist)
    sampler = qmc.LatinHypercube(d=dim)
    X_cand = sampler.random(samples)
    mu, sigma = gp.predict(X_cand, return_std=True)
    best_y = np.max(y_hist)
    z = (mu - best_y) / (sigma + 1e-9)
    ei = (mu - best_y) * norm.cdf(z) + sigma * norm.pdf(z)
    return X_cand[np.argsort(ei)[-n_suggest:]]

def suggest_cma_es(gp, dim, sigma0=0.3, popsize=8, n_generations=20):
    def fitness(x):
        return -gp.predict(np.array(x).reshape(1, -1))[0]
    x0 = np.random.uniform(0, 1, dim)
    es = cma.CMAEvolutionStrategy(x0.tolist(), sigma0, {'popsize': popsize, 'bounds': [0, 1]})
    es.optimize(fitness, iterations=n_generations)
    return es.result.xbest

def suggest_lhs(dim, n=1):
    return qmc.LatinHypercube(d=dim).random(n)

def fmt(vec):
    return '-'.join([f"{x:.6f}" for x in vec])

# --- Historical Data Input ---
queries_dict = {
    "F1": [
        [0.944034, 0.625117],
        [0.073140, 0.227123],
        [0.944034, 0.645117],
        [0.974034, 0.675117]
    ],
    "F2": [
        [0.768771, 0.105777],
        [0.778771, 0.105777],
        [0.601115, 0.708072],
        [0.712000, 0.489000]
    ],
    "F3": [
        [0.145600, 0.944454, 0.638992],
        [0.185600, 0.984454, 0.678992],
        [0.175600, 0.974454, 0.658992],
        [0.611852, 0.139493, 0.292144]
    ],
    "F4": [
        [0.999363, 0.440618, 0.632742, 0.573562],
        [0.989363, 0.430618, 0.632742, 0.583562],
        [0.304613, 0.097672, 0.684233, 0.440152],
        [0.726952, 0.189810, 0.835269, 0.100856]
    ],
    "F5": [
        [0.290636, 0.824504, 0.580542, 0.368022],
        [0.270636, 0.824504, 0.580542, 0.368022],
        [0.088492, 0.195982, 0.045227, 0.325330],
        [0.789315, 0.734624, 0.122222, 0.444444]
    ],
    "F6": [
        [0.384874, 0.441523, 0.115493, 0.592513, 0.477029],
        [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],
        [0.433609, 0.430291, 0.199161, 0.583735, 0.454637],
        [0.562398, 0.627903, 0.300000, 0.450000, 0.123456]
    ],
    "F7": [
        [0.949760, 0.670137, 0.267596, 0.089014, 0.282662, 0.021611],
        [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193],
        [0.334835, 0.585316, 0.860015, 0.467555, 0.862324, 0.768193],
        [0.392748, 0.682337, 0.400000, 0.600000, 0.250000, 0.123000]
    ],
    "F8": [
        [0.236820, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776],
        [0.256820, 0.180489, 0.295402, 0.830171, 0.672892, 0.287181, 0.913031, 0.782776],
        [0.276820, 0.180489, 0.305402, 0.810171, 0.652892, 0.267181, 0.923031, 0.792776],
        [0.688905, 0.968377, 0.100000, 0.200000, 0.300000, 0.400000, 0.500000, 0.600000]
    ]
}

scores_dict = {
    "F1": [
        1.3276592050304897e-69,
        4.6015626100136584e-113,
        6.933250699386668e-70,
        1.0331432167463326e-46
    ],
    "F2": [
        0.27308818432797793,
        0.09312933125476712,
        0.33828687086483444,
        0.5762427881544481
    ],
    "F3": [
        -0.10726379301566011,
        -0.05132522888449367,
        -0.01882082499050317,
        -0.14698930107649133
    ],
    "F4": [
        -20.949420794091107,
        -4.55517117590318,
        -22.018089179607596,
        0.28219639798857843
    ],
    "F5": [
        13.377756436400528,
        1105.1479642901527,
        46.39060566194315,
        13.330156169480178
    ],
    "F6": [
        -1.1857272137575898,
        -0.5399068214874156,
        -1.0581335024921092,
        -1.4710430538524402
    ],
    "F7": [
        0.023506147714804363,
        2.090579827192728,
        1.5474127567590061,
        0.38412848438494507
    ],
    "F8": [
        8.3316420728934,
        9.5659163853995,
        8.316440887821,
        9.8592499607924
    ]
}

# --- Strategy Map per Function ---
strategy_map = {
    "F1": "lhs",
    "F2": "ei",
    "F3": "ei",
    "F4": "ei",
    "F5": "best",  # already optimal
    "F6": "ei+cma",
    "F7": "lhs",
    "F8": "ei+cma+lhs"
}

final_queries = []
review_queries = []

for fn in [f"F{i}" for i in range(1, 9)]:
    X = np.array(queries_dict[fn])
    y = np.array(scores_dict[fn])
    d = expected_dims[fn]
    strat = strategy_map[fn]

    if strat == "lhs":
        sample = suggest_lhs(d, n=K_TOP)
        final_queries.append(fmt(sample[0]))
        review_queries.append((fn, sample[0]))

    elif strat == "ei":
        best = suggest_ei(X, y, d, n_suggest=K_TOP)
        final_queries.append(fmt(best[0]))
        review_queries.append((fn, best[0]))

    elif strat == "ei+cma":
        gp = GaussianProcessRegressor(kernel=Matern(nu=2.5) + WhiteKernel(), normalize_y=True)
        gp.fit(X, y)
        ei_vec = suggest_ei(X, y, d, n_suggest=1)[0]
        cma_vec = suggest_cma_es(gp, d)
        final_queries.append(fmt(ei_vec))
        review_queries.extend([(fn, ei_vec), (fn, cma_vec)])

    elif strat == "ei+cma+lhs":
        gp = GaussianProcessRegressor(kernel=Matern(nu=2.5) + WhiteKernel(), normalize_y=True)
        gp.fit(X, y)
        ei_vec = suggest_ei(X, y, d, n_suggest=1)[0]
        cma_vec = suggest_cma_es(gp, d)
        lhs_vec = suggest_lhs(d, 1)[0]
        final_queries.append(fmt(ei_vec))
        review_queries.extend([(fn, ei_vec), (fn, cma_vec), (fn, lhs_vec)])

    elif strat == "best":
        best_idx = np.argmax(y)
        best_vec = X[best_idx]
        final_queries.append(fmt(best_vec))
        review_queries.append((fn, best_vec))

# --- Save Final Submission File ---
with open("formatted_submission.txt", "w") as f:
    for q in final_queries:
        f.write(q + "\n")

# --- Save Backup Candidates (Optional) ---
with open("candidate_review.txt", "w") as f:
    for fn in sorted(set(f for f, _ in review_queries)):
        f.write(f"{fn}:\n")
        count = 0
        for f_tag, vec in review_queries:
            if f_tag == fn:
                f.write(f"cand{count}: {fmt(vec)}\n")
                count += 1
        f.write("\n")
with open("formatted_submission.txt", "r") as f:
    lines = [line.strip() for line in f.readlines()]
    for i, line in enumerate(lines):
        print(f"F{i+1}: {line}")
print("✅ Round 13 files created: formatted_submission.txt + candidate_review.txt")



(4_w,8)-aCMA-ES (mu_w=2.6,w_1=52%) in dimension 5 (seed=362182, Mon May 26 04:09:50 2025)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1      8 1.063702647897389e+00 1.0e+00 2.51e-01  2e-01  3e-01 0:00.0
(4_w,8)-aCMA-ES (mu_w=2.6,w_1=52%) in dimension 8 (seed=257321, Mon May 26 04:09:51 2025)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1      8 -9.050075717069841e+00 1.0e+00 2.72e-01  3e-01  3e-01 0:00.0




    2     16 -9.138297502230099e+00 1.2e+00 2.57e-01  2e-01  3e-01 0:00.0
    3     24 -9.225789659149898e+00 1.2e+00 2.58e-01  2e-01  3e-01 0:00.0
F1: 0.879410-0.446353
F2: 0.820170-0.854216
F3: 0.179665-0.962743-0.643881
F4: 0.381484-0.672193-0.109540-0.465085
F5: 0.270636-0.824504-0.580542-0.368022
F6: 0.253449-0.564709-0.522052-0.814941-0.923631
F7: 0.221622-0.200272-0.950788-0.832244-0.934903-0.269186
F8: 0.696464-0.988667-0.065271-0.356464-0.165350-0.435606-0.661504-0.499543
✅ Round 13 files created: formatted_submission.txt + candidate_review.txt


In [None]:
# === Load Round 13 Submission ===
with open("formatted_submission.txt", "r") as f:
    new_queries = [line.strip() for line in f.readlines()]

# === Score Prediction and Comparison ===
print("🔍 Evaluating accuracy of Round 13 submission vs past scores:\n")

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
import numpy as np

for i in range(1, 9):
    fn = f"F{i}"
    dim = expected_dims[fn]

    # Parse historical data
    X = np.array(queries_dict[fn])
    y = np.array(scores_dict[fn])

    # Train surrogate model
    gp = GaussianProcessRegressor(kernel=Matern(nu=2.5) + WhiteKernel(), normalize_y=True)
    gp.fit(X, y)

    # Parse new query
    q_vec = np.array([float(x) for x in new_queries[i-1].split("-")])
    y_pred, y_std = gp.predict(q_vec.reshape(1, -1), return_std=True)

    # Summary statistics
    best = np.max(y)
    mean = np.mean(y)
    worst = np.min(y)

    print(f"{fn}:")
    print(f"  🔢 Submitted vector: {new_queries[i-1]}")
    print(f"  📈 Predicted Score: {y_pred[0]:.4f} ± {y_std[0]:.4f}")
    print(f"  🥇 Best Past Score: {best:.4f}")
    print(f"  📊 Average Score:   {mean:.4f}")
    print(f"  🫠 Worst Past Score:{worst:.4f}\n")

🔍 Evaluating accuracy of Round 13 submission vs past scores:

F1:
  🔢 Submitted vector: 0.879410-0.446353
  📈 Predicted Score: 0.0000 ± 0.0000
  🥇 Best Past Score: 0.0000
  📊 Average Score:   0.0000
  🫠 Worst Past Score:0.0000





F2:
  🔢 Submitted vector: 0.820170-0.854216
  📈 Predicted Score: 0.3202 ± 0.1731
  🥇 Best Past Score: 0.5762
  📊 Average Score:   0.3202
  🫠 Worst Past Score:0.0931





F3:
  🔢 Submitted vector: 0.179665-0.962743-0.643881
  📈 Predicted Score: -0.0471 ± 0.0377
  🥇 Best Past Score: -0.0188
  📊 Average Score:   -0.0811
  🫠 Worst Past Score:-0.1470





F4:
  🔢 Submitted vector: 0.381484-0.672193-0.109540-0.465085
  📈 Predicted Score: -11.8101 ± 9.8399
  🥇 Best Past Score: 0.2822
  📊 Average Score:   -11.8101
  🫠 Worst Past Score:-22.0181

F5:
  🔢 Submitted vector: 0.270636-0.824504-0.580542-0.368022
  📈 Predicted Score: 294.5617 ± 568.0593
  🥇 Best Past Score: 1105.1480
  📊 Average Score:   294.5616
  🫠 Worst Past Score:13.3302

F6:
  🔢 Submitted vector: 0.253449-0.564709-0.522052-0.814941-0.923631
  📈 Predicted Score: -1.0637 ± 0.3376
  🥇 Best Past Score: -0.5399
  📊 Average Score:   -1.0637
  🫠 Worst Past Score:-1.4710

F7:
  🔢 Submitted vector: 0.221622-0.200272-0.950788-0.832244-0.934903-0.269186
  📈 Predicted Score: 1.2286 ± 0.8289
  🥇 Best Past Score: 2.0906
  📊 Average Score:   1.0114
  🫠 Worst Past Score:0.0235

F8:
  🔢 Submitted vector: 0.696464-0.988667-0.065271-0.356464-0.165350-0.435606-0.661504-0.499543
  📈 Predicted Score: 9.2809 ± 0.9177
  🥇 Best Past Score: 9.8592
  📊 Average Score:   9.0183
  🫠 Worst Past Score:8.316



In [None]:
!pip install cma



In [None]:
import numpy as np

# Replace with your actual top F6 vectors
top_f6_vectors = np.array([
    [0.433609, 0.430291, 0.199161, 0.583735, 0.454637],
    [0.414874, 0.421523, 0.115493, 0.592513, 0.477029],
    [0.460059, 0.426932, 0.132988, 0.650127, 0.537691],
    [0.384874, 0.441523, 0.115493, 0.592513, 0.477029],
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],
])

In [None]:
def crossover(parent1, parent2):
    alpha = np.random.uniform(0.3, 0.7)  # biased average
    return alpha * parent1 + (1 - alpha) * parent2

def mutate(vector, mutation_rate=0.1, std_dev=0.02):
    if np.random.rand() < mutation_rate:
        noise = np.random.normal(0, std_dev, size=vector.shape)
        return np.clip(vector + noise, 0, 1)
    return vector

In [None]:
# Combine original and new candidates
new_candidates = evolve_population(top_f6_vectors, num_offspring=20)

# Final submission list
F6_submission_candidates = np.vstack((top_f6_vectors, new_candidates))

NameError: name 'evolve_population' is not defined

In [None]:
import numpy as np

# STEP 1: Top-performing F6 vectors — update these with your real data
top_f6_vectors = np.array([
    [0.433609, 0.430291, 0.199161, 0.583735, 0.454637],
    [0.414874, 0.421523, 0.115493, 0.592513, 0.477029],
    [0.460059, 0.426932, 0.132988, 0.650127, 0.537691],
    [0.384874, 0.441523, 0.115493, 0.592513, 0.477029],
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],
])

# STEP 2: Crossover — mix two parents
def crossover(parent1, parent2):
    alpha = np.random.uniform(0.3, 0.7)  # weighted mix
    return alpha * parent1 + (1 - alpha) * parent2

# STEP 3: Mutation — add small noise
def mutate(vector, mutation_rate=0.2, std_dev=0.02):
    if np.random.rand() < mutation_rate:
        noise = np.random.normal(0, std_dev, size=vector.shape)
        return np.clip(vector + noise, 0, 1)
    return vector

# STEP 4: Evolution — generate children
def evolve_population(parents, num_offspring=20):
    offspring = []
    num_parents = len(parents)
    while len(offspring) < num_offspring:
        i, j = np.random.choice(num_parents, 2, replace=False)
        child = crossover(parents[i], parents[j])
        child = mutate(child)
        offspring.append(child)
    return np.array(offspring)

# STEP 5: Generate candidates and prepare for submission
new_candidates = evolve_population(top_f6_vectors, num_offspring=20)
F6_submission_candidates = np.vstack((top_f6_vectors, new_candidates))

In [None]:
# Print all F6 candidate vectors
for i, vec in enumerate(F6_submission_candidates):
    print(f"Candidate {i+1}: {vec}")

Candidate 1: [0.433609 0.430291 0.199161 0.583735 0.454637]
Candidate 2: [0.414874 0.421523 0.115493 0.592513 0.477029]
Candidate 3: [0.460059 0.426932 0.132988 0.650127 0.537691]
Candidate 4: [0.384874 0.441523 0.115493 0.592513 0.477029]
Candidate 5: [0.404874 0.441523 0.135493 0.612513 0.497029]
Candidate 6: [0.41854205 0.4361804  0.16577723 0.5988245  0.47686488]
Candidate 7: [0.41355452 0.43812994 0.15472638 0.60381949 0.48422286]
Candidate 8: [0.41749694 0.43474283 0.14494444 0.61066265 0.4797329 ]
Candidate 9: [0.39719717 0.441523   0.12781617 0.60483617 0.48935217]
Candidate 10: [0.4517245  0.42799043 0.15383938 0.62920661 0.51152035]
Candidate 11: [0.42651898 0.43580003 0.13451047 0.62726618 0.51297768]
Candidate 12: [0.43571093 0.43165718 0.12732238 0.63146916 0.51804609]
Candidate 13: [0.44813074 0.42844682 0.1628303  0.62018594 0.50023581]
Candidate 14: [0.43594766 0.43330708 0.13408248 0.63369275 0.51992502]
Candidate 15: [0.42101961 0.43521197 0.17126675 0.59634323 0.4732

**f6 best codadate round 13 **

In [None]:
import numpy as np

# Your 25 F6 candidate vectors
F6_submission_candidates = np.array([
    [0.433609, 0.430291, 0.199161, 0.583735, 0.454637],
    [0.414874, 0.421523, 0.115493, 0.592513, 0.477029],
    [0.460059, 0.426932, 0.132988, 0.650127, 0.537691],
    [0.384874, 0.441523, 0.115493, 0.592513, 0.477029],
    [0.404874, 0.441523, 0.135493, 0.612513, 0.497029],
    [0.41854205, 0.4361804, 0.16577723, 0.5988245, 0.47686488],
    [0.41355452, 0.43812994, 0.15472638, 0.60381949, 0.48422286],
    [0.41749694, 0.43474283, 0.14494444, 0.61066265, 0.4797329],
    [0.39719717, 0.441523, 0.12781617, 0.60483617, 0.48935217],
    [0.4517245, 0.42799043, 0.15383938, 0.62920661, 0.51152035],
    [0.42651898, 0.43580003, 0.13451047, 0.62726618, 0.51297768],
    [0.43571093, 0.43165718, 0.12732238, 0.63146916, 0.51804609],
    [0.44813074, 0.42844682, 0.1628303, 0.62018594, 0.50023581],
    [0.43594766, 0.43330708, 0.13408248, 0.63369275, 0.51992502],
    [0.42101961, 0.43521197, 0.17126675, 0.59634323, 0.4732098],
    [0.41518522, 0.43453715, 0.16753114, 0.58705344, 0.46310207],
    [0.44730445, 0.42855176, 0.16489752, 0.61811188, 0.49764123],
    [0.41137479, 0.43541534, 0.16098942, 0.58773976, 0.46485283],
    [0.44902909, 0.42833274, 0.1605828, 0.62244088, 0.50305666],
    [0.41135638, 0.43541958, 0.16095781, 0.58774307, 0.46486129],
    [0.4257082, 0.43601441, 0.13454728, 0.62671355, 0.51238027],
    [0.40657144, 0.43652237, 0.15274305, 0.58860492, 0.4670598],
    [0.41495714, 0.43568483, 0.12249313, 0.6155656, 0.50130117],
    [0.4114104, 0.4284502, 0.1224202, 0.5994402, 0.4839562],
    [0.43645178, 0.43317379, 0.1340596, 0.63403636, 0.52029647]
])

# Simulated scoring function (tweak if needed)
def mock_score(vec):
    return round(
        1
        - abs(vec[3] - 0.63) * 2       # 4th dimension
        - abs(vec[4] - 0.52) * 1.5     # 5th dimension
        - abs(vec[2] - 0.13)          # 3rd dimension
        - abs(vec[0] - 0.435) * 0.5   # 1st dimension (less sensitive)
        , 6)

# Score and print
best_score = -1
best_index = -1

for i, vec in enumerate(F6_submission_candidates):
    score = mock_score(vec)
    print(f"Candidate {i+1}: {vec} → Score: {score}")
    if score > best_score:
        best_score = score
        best_index = i

# Print best candidate
print("\n🏆 Best Candidate to Submit:")
print(f"Candidate {best_index+1}: {F6_submission_candidates[best_index]} → Score: {best_score}")

Candidate 1: [0.433609 0.430291 0.199161 0.583735 0.454637] → Score: 0.739569
Candidate 2: [0.414874 0.421523 0.115493 0.592513 0.477029] → Score: 0.835999
Candidate 3: [0.460059 0.426932 0.132988 0.650127 0.537691] → Score: 0.917692
Candidate 4: [0.384874 0.441523 0.115493 0.592513 0.477029] → Score: 0.820999
Candidate 5: [0.404874 0.441523 0.135493 0.612513 0.497029] → Score: 0.910014
Candidate 6: [0.41854205 0.4361804  0.16577723 0.5988245  0.47686488] → Score: 0.82894
Candidate 7: [0.41355452 0.43812994 0.15472638 0.60381949 0.48422286] → Score: 0.858524
Candidate 8: [0.41749694 0.43474283 0.14494444 0.61066265 0.4797329 ] → Score: 0.877229
Candidate 9: [0.39719717 0.441523   0.12781617 0.60483617 0.48935217] → Score: 0.882615
Candidate 10: [0.4517245  0.42799043 0.15383938 0.62920661 0.51152035] → Score: 0.953492
Candidate 11: [0.42651898 0.43580003 0.13451047 0.62726618 0.51297768] → Score: 0.975248
Candidate 12: [0.43571093 0.43165718 0.12732238 0.63146916 0.51804609] → Score: 0

In [None]:
import numpy as np
from scipy.stats import qmc

def micro_perturbations(base_vec, delta=0.01):
    candidates = [base_vec]
    for i in range(len(base_vec)):
        plus = base_vec.copy()
        minus = base_vec.copy()
        plus[i] = min(1, plus[i] + delta)
        minus[i] = max(0, minus[i] - delta)
        candidates.extend([plus, minus])
    return candidates

def latin_hypercube_samples(dim, n_samples):
    sampler = qmc.LatinHypercube(d=dim)
    samples = sampler.random(n_samples)
    return samples

# Example base vectors (replace with your best known)
best_F7 = np.array([0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193])
best_F4 = np.array([0.999363, 0.440618, 0.632742, 0.573562])
best_F8 = np.array([0.236820, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776])

# Generate candidates
F7_candidates = latin_hypercube_samples(dim=6, n_samples=10).tolist()
F4_candidates = micro_perturbations(best_F4, delta=0.005)
F8_candidates = micro_perturbations(best_F8[:2], delta=0.03)  # perturb first two dims
# Combine with baseline for F8 full vector
F8_candidates = [np.concatenate([c, best_F8[2:]]) if len(c) == 2 else c for c in F8_candidates]

# Print sizes
print(f"Generated {len(F7_candidates)} candidates for Function 7")
print(f"Generated {len(F4_candidates)} candidates for Function 4")
print(f"Generated {len(F8_candidates)} candidates for Function 8")

Generated 10 candidates for Function 7
Generated 9 candidates for Function 4
Generated 5 candidates for Function 8


In [None]:
import numpy as np

# Your actual candidate vectors for each function

F7_candidates = np.array([
    [0.304835, 0.615316, 0.890015, 0.497555, 0.892324, 0.798193],
    [0.205, 0.625, 0.89, 0.5, 0.89, 0.8],
    [0.31, 0.6, 0.88, 0.49, 0.9, 0.79],
    [0.29, 0.62, 0.87, 0.51, 0.88, 0.81],
    [0.32, 0.61, 0.89, 0.5, 0.9, 0.8],
    [0.3, 0.62, 0.85, 0.5, 0.91, 0.77],
    [0.31, 0.615, 0.9, 0.495, 0.88, 0.79],
    [0.305, 0.61, 0.88, 0.505, 0.89, 0.8],
    [0.3, 0.62, 0.89, 0.5, 0.9, 0.81],
    [0.299, 0.613, 0.87, 0.498, 0.89, 0.79]
])

F4_candidates = np.array([
    [0.999363, 0.440618, 0.632742, 0.573562],
    [1.004, 0.445, 0.635, 0.57],
    [0.995, 0.438, 0.63, 0.575],
    [1.0, 0.44, 0.63, 0.57],
    [0.999, 0.442, 0.631, 0.572],
    [1.002, 0.439, 0.629, 0.571],
    [0.998, 0.441, 0.633, 0.57],
    [1.001, 0.44, 0.632, 0.573],
    [0.997, 0.443, 0.63, 0.574]
])

F8_candidates = np.array([
    [0.236820, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776],
    [0.266820, 0.190489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776],
    [0.206820, 0.130489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776],
    [0.236820, 0.130489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776],
    [0.236820, 0.190489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776]
])

# Mock scoring functions tuned to each function's sensitive dims

def score_F7(vec):
    return round(
        1
        - abs(vec[1] - 0.62) * 1.5
        - abs(vec[2] - 0.89) * 2
        - abs(vec[5] - 0.8) * 1.2
        , 6)

def score_F4(vec):
    return round(
        1
        - abs(vec[0] - 0.999) * 5
        - abs(vec[1] - 0.44) * 3
        - abs(vec[2] - 0.63) * 2
        , 6)

def score_F8(vec):
    return round(
        1
        - abs(vec[0] - 0.24) * 4
        - abs(vec[1] - 0.16) * 4
        - abs(vec[7] - 0.76) * 1
        , 6)

def evaluate_candidates(candidates, scoring_func, func_name):
    scores = []
    for i, vec in enumerate(candidates):
        sc = scoring_func(vec)
        scores.append((i+1, vec, sc))
    scores.sort(key=lambda x: x[2], reverse=True)
    print(f"--- {func_name} ---")
    for c, v, s in scores:
        print(f"Candidate {c}: Score={s} Vector={v}")
    best = scores[0]
    print(f"Best candidate for {func_name}: Candidate {best[0]} with score {best[2]}\n")
    return best

# Run evaluations
best_F7 = evaluate_candidates(F7_candidates, score_F7, "Function 7")
best_F4 = evaluate_candidates(F4_candidates, score_F4, "Function 4")
best_F8 = evaluate_candidates(F8_candidates, score_F8, "Function 8")

--- Function 7 ---
Candidate 2: Score=0.9925 Vector=[0.205 0.625 0.89  0.5   0.89  0.8  ]
Candidate 1: Score=0.990776 Vector=[0.304835 0.615316 0.890015 0.497555 0.892324 0.798193]
Candidate 9: Score=0.988 Vector=[0.3  0.62 0.89 0.5  0.9  0.81]
Candidate 5: Score=0.985 Vector=[0.32 0.61 0.89 0.5  0.9  0.8 ]
Candidate 8: Score=0.965 Vector=[0.305 0.61  0.88  0.505 0.89  0.8  ]
Candidate 7: Score=0.9605 Vector=[0.31  0.615 0.9   0.495 0.88  0.79 ]
Candidate 4: Score=0.948 Vector=[0.29 0.62 0.87 0.51 0.88 0.81]
Candidate 3: Score=0.938 Vector=[0.31 0.6  0.88 0.49 0.9  0.79]
Candidate 10: Score=0.9375 Vector=[0.299 0.613 0.87  0.498 0.89  0.79 ]
Candidate 6: Score=0.884 Vector=[0.3  0.62 0.85 0.5  0.91 0.77]
Best candidate for Function 7: Candidate 2 with score 0.9925

--- Function 4 ---
Candidate 4: Score=0.995 Vector=[1.   0.44 0.63 0.57]
Candidate 5: Score=0.992 Vector=[0.999 0.442 0.631 0.572]
Candidate 1: Score=0.990847 Vector=[0.999363 0.440618 0.632742 0.573562]
Candidate 7: Score=0

In [None]:
final_submission = {
    "F1": [
        # Insert your best known vectors for F1 here
        # Example placeholder:
        [0.999999, 0.999999]
    ],
    "F2": [
        # Best vectors for F2
        # Example placeholder:
        [0.994497, 0.105777]
    ],
    "F3": [
        # Best vectors for F3
        # Example placeholder:
        [0.992244, 0.012868, 0.880511]
    ],
    "F4": [
        [1.0, 0.44, 0.63, 0.57]  # Best candidate from recent test
    ],
    "F5": [
        # Best vectors for F5
        # Example placeholder:
        [0.991568, 0.824504, 0.580542, 0.368022]
    ],
    "F6": [
        [0.43571093, 0.43165718, 0.12732238, 0.63146916, 0.51804609]  # Best F6 vector
    ],
    "F7": [
        [0.205, 0.625, 0.89, 0.5, 0.89, 0.8]  # Best candidate from recent test
    ],
    "F8": [
        [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776]  # Best candidate
    ]
}

In [None]:
def format_vector_for_submission(vec):
    # Clip values between 0 and 0.999999 (or just below 1 to follow format)
    clipped = [min(max(v, 0), 0.999999) for v in vec]
    # Format each value with 6 decimals and ensure leading '0.'
    formatted_values = [f"{v:.6f}" if v < 1 else "0.999999" for v in clipped]
    # Join with dash separator
    return "-".join(formatted_values)

# Example: your best vectors for each function
best_vectors = {
    "F1": [0.999999, 0.999999],
    "F2": [0.994497, 0.105777],
    "F3": [0.992244, 0.012868, 0.880511],
    "F4": [1.0, 0.44, 0.63, 0.57],
    "F5": [0.991568, 0.824504, 0.580542, 0.368022],
    "F6": [0.43571093, 0.43165718, 0.12732238, 0.63146916, 0.51804609],
    "F7": [0.205, 0.625, 0.89, 0.5, 0.89, 0.8],
    "F8": [0.23682, 0.160489, 0.275402, 0.810171, 0.652892, 0.267181, 0.893031, 0.762776],
}

# Format all functions for submission
formatted_submission = {}
for func, vec in best_vectors.items():
    formatted_submission[func] = format_vector_for_submission(vec)

# Print each formatted string for submission input
for func, formatted_str in formatted_submission.items():
    print(f"{func}: {formatted_str}")

F1: 0.999999-0.999999
F2: 0.994497-0.105777
F3: 0.992244-0.012868-0.880511
F4: 0.999999-0.440000-0.630000-0.570000
F5: 0.991568-0.824504-0.580542-0.368022
F6: 0.435711-0.431657-0.127322-0.631469-0.518046
F7: 0.205000-0.625000-0.890000-0.500000-0.890000-0.800000
F8: 0.236820-0.160489-0.275402-0.810171-0.652892-0.267181-0.893031-0.762776
