In [None]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.utils import shuffle
from sklearn.metrics import r2_score
from qsharp.estimator import EstimatorParams, ErrorBudgetPartition, LogicalCounts
from qsharp.interop.qiskit import estimate, ResourceEstimatorBackend
import joblib
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

### Load the Data, Train a RandomForest Model

In [None]:
data = np.load('data_1000_zero_point_one_percent_p.npy')

X = data[:, :-3]
Y = data[:, -3:]

# Generate random indices reproducibly
np.random.seed(142)
indices = np.random.permutation(len(X))

# Split indices for training and testing sets
train_size = int(0.75 * len(X))
train_indices = indices[:train_size]
test_indices = indices[train_size:]
print(test_indices[:3])
# Split the data into training and testing sets using the generated indices
X_train, X_test = X[train_indices], X[test_indices]
Y_train, Y_test = Y[train_indices], Y[test_indices]


np.save('X_test_zero_point_one_percent.npy', X_test)
np.save('Y_test_zero_point_one_percent.npy', Y_test)

data = np.load('data_1000_01_p.npy')

X = data[:, :-3]
Y = data[:, -3:]

# Split the data into training and testing sets using the generated indices
X_train, X_test = X[train_indices], X[test_indices]
Y_train, Y_test = Y[train_indices], Y[test_indices]

np.save('X_test_one_percent.npy', X_test)
np.save('Y_test_one_percent.npy', Y_test)

data = np.load('data_1000_ten_percent_p.npy')

X = data[:, :-3]
Y = data[:, -3:]

# Split the data into training and testing sets using the generated indices
X_train, X_test = X[train_indices], X[test_indices]
Y_train, Y_test = Y[train_indices], Y[test_indices]

np.save('X_test_ten_percent.npy', X_test)
np.save('Y_test_ten_percent.npy', Y_test)

In [None]:
data = np.load('demonstration_data_ten_percent.npy')
print(np.shape(data))
print(data[0])

X = data[:, :-3]
Y = data[:, -3:]

model = RandomForestRegressor()
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10, 20],
    'min_samples_leaf': [1, 2, 4, 8],
    'bootstrap': [True, False],
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=4, scoring='r2', n_jobs=-1, verbose=1)
grid_search.fit(X, Y)

print("Best parameters found: ", grid_search.best_params_)
best_params = grid_search.best_params_
model = grid_search.best_estimator_

In [None]:
data = np.load('demonstration_data_ten_percent.npy')

X = data[:, :-3]
Y = data[:, -3:]

new_model = RandomForestRegressor(**best_params)


# Generate random indices reproducibly
np.random.seed(142)
indices = np.random.permutation(len(X))

# Split indices for training and testing sets
train_size = int(0.75 * len(X))
train_indices = indices[:train_size]
test_indices = indices[train_size:]
print(test_indices[:3])
print(len(test_indices))
# Split the data into training and testing sets using the generated indices
X_train, X_test = X[train_indices], X[test_indices]
Y_train, Y_test = Y[train_indices], Y[test_indices]

new_model.fit(X_train, Y_train)

np.save('demonstration_data_X_test_ten_percent.npy', X_test)
np.save('demonstration_data_Y_test_ten_percent.npy', Y_test)


In [None]:
joblib.dump(model, 'model_1000_ten_percent_p.pkl')