In [None]:
# Install required packages
!pip install qiskit qiskit-aer qiskit-machine-learning scikit-optimize lazypredict --quiet

# === Imports ===
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Quantum
from qiskit_aer import Aer
from qiskit.utils import algorithm_globals, QuantumInstance
from qiskit.circuit.library import ZZFeatureMap, TwoLocal
from qiskit_machine_learning.algorithms import VQC

# Bayesian Optimization
from skopt import gp_minimize
from skopt.space import Integer, Real
from skopt.utils import use_named_args

# LazyPredict (optional)
from lazypredict.Supervised import LazyRegressor

# === Load and preprocess dataset ===
df_raw = pd.read_csv("train_FD001.txt", sep='\s+', header=None)
df_raw.columns = ['engine_id', 'time_in_cycles', 'op_setting_1', 'op_setting_2', 'op_setting_3'] + \
                 [f'sensor_measurement_{i}' for i in range(1, 22)]
df_raw['RUL'] = df_raw.groupby('engine_id')['time_in_cycles'].transform(max) - df_raw['time_in_cycles']

features = df_raw.drop(columns=['engine_id', 'time_in_cycles', 'RUL'])
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
df_scaled = pd.DataFrame(features_scaled, columns=features.columns)
df_scaled['engine_id'] = df_raw['engine_id']
df_scaled['time_in_cycles'] = df_raw['time_in_cycles']
df_scaled['RUL'] = df_raw['RUL']
df_scaled.to_csv("train_FD001_normalized.csv", index=False)

# === Interval training ===
def train_interval_models(df, num_intervals):
    interval_edges = np.linspace(df['RUL'].min(), df['RUL'].max(), num_intervals + 1)
    df['interval_id'] = pd.cut(df['RUL'], bins=interval_edges, labels=False, include_lowest=True)
    interval_groups = [group for _, group in df.groupby('interval_id')]
    trained_models = []
    for group in interval_groups:
        if len(group) < 10:
            trained_models.append((None, None))
            continue
        X = group.drop(columns=['engine_id', 'time_in_cycles', 'RUL', 'interval_id'])
        y = group['RUL']
        try:
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            model = make_pipeline(StandardScaler(), Ridge())
            model.fit(X_train, y_train)
            trained_models.append(('Ridge', model))
        except:
            trained_models.append((None, None))
    return interval_groups, trained_models

# === Loss calculation ===
def compute_total_loss(df, num_intervals, lambda_weight, penalty_weight, cache):
    if num_intervals not in cache:
        groups, models = train_interval_models(df.copy(), num_intervals)
        cache[num_intervals] = (groups, models)
    else:
        groups, models = cache[num_intervals]
    i1, i2 = 1, 2
    if i1 >= len(groups) or i2 >= len(groups):
        return np.inf
    model_1, model_2 = models[i1][1], models[i2][1]
    if model_1 is None or model_2 is None:
        return np.inf
    group_1, group_2 = groups[i1], groups[i2]
    merged_group = pd.concat([group_1, group_2])
    X_merged = merged_group.drop(columns=['engine_id', 'time_in_cycles', 'RUL', 'interval_id'])
    y_merged = merged_group['RUL']
    merged_model = make_pipeline(StandardScaler(), Ridge())
    merged_model.fit(X_merged, y_merged)
    Aa_pred = merged_model.predict(X_merged)
    X1 = group_1.drop(columns=['engine_id', 'time_in_cycles', 'RUL', 'interval_id'])
    X2 = group_2.drop(columns=['engine_id', 'time_in_cycles', 'RUL', 'interval_id'])
    a1_pred = model_1.predict(X1)
    a2_pred = model_2.predict(X2)
    a_a = np.concatenate([lambda_weight * a1_pred, (1 - lambda_weight) * a2_pred])
    A_a = Aa_pred
    model_diff = np.sqrt(mean_squared_error(A_a, a_a))
    penalty = penalty_weight * np.abs(np.mean(A_a - a_a))
    return model_diff + penalty

# === Bayesian Optimization ===
search_space = [
    Integer(3, 6, name='num_intervals'),
    Real(0.1, 0.9, name='lambda_weight'),
    Real(0.001, 0.1, prior='log-uniform', name='penalty_weight')
]

cache = {}

@use_named_args(search_space)
def objective(num_intervals, lambda_weight, penalty_weight):
    loss = compute_total_loss(df_scaled, num_intervals, lambda_weight, penalty_weight, cache)
    print(f"Intervals: {num_intervals}, Lambda: {lambda_weight:.3f}, Penalty: {penalty_weight:.5f} => Loss: {loss:.4f}")
    return loss

print("Starting Bayesian Optimization...")
result = gp_minimize(objective, dimensions=search_space, n_calls=30, n_random_starts=10, random_state=42)

# === Quantum model training ===
def train_quantum_surrogate(X_train, y_train, X_test, y_test):
    n_classes = 5
    y_train_classes = pd.qcut(y_train, n_classes, labels=False)
    y_test_classes = pd.qcut(y_test, n_classes, labels=False)
    feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], reps=2)
    ansatz = TwoLocal(num_qubits=X_train.shape[1], reps=1, rotation_blocks=['ry', 'rz'], entanglement='cz')
    algorithm_globals.random_seed = 42
    quantum_instance = QuantumInstance(backend=Aer.get_backend('aer_simulator_statevector'), seed_simulator=42, seed_transpiler=42)
    vqc = VQC(feature_map=feature_map, ansatz=ansatz, quantum_instance=quantum_instance)
    vqc.fit(X_train, y_train_classes)
    y_pred_classes = vqc.predict(X_test)
    bins = pd.qcut(y_train, n_classes, retbins=True)[1]
    bin_centers = (bins[:-1] + bins[1:]) / 2
    y_pred_rul = np.array([bin_centers[int(c)] for c in y_pred_classes])
    rmse = np.sqrt(mean_squared_error(y_test, y_pred_rul))
    return rmse

# === Compare with quantum surrogate ===
interval_groups, trained_models = train_interval_models(df_scaled, result.x[0])
group = next((g for g in interval_groups if len(g) >= 50), None)
if group is not None:
    X = group.drop(columns=['engine_id', 'time_in_cycles', 'RUL', 'interval_id'])
    y = group['RUL']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print("\nTraining quantum surrogate model...")
    quantum_rmse = train_quantum_surrogate(X_train.values, y_train.values, X_test.values, y_test.values)
    print(f"Quantum surrogate RMSE: {quantum_rmse:.4f}")
else:
    print("No interval group large enough for quantum surrogate training demo.")

# === Save Results ===
print("\nBest parameters:")
print(f"Intervals: {result.x[0]}")
print(f"Lambda: {result.x[1]:.4f}")
print(f"Penalty: {result.x[2]:.6f}")
print(f"Lowest Loss: {result.fun:.4f}")