In [1]:
import os
import json
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
SIMULATION_DIRECTORY_1 = 'data/6_qubit_reservoir'
SIMULATION_DIRECTORY_2 = 'data/6_qubit_reservoir_2'

In [3]:
reservoir_dynamics = []
entangled_labels = []

for filename in os.listdir(SIMULATION_DIRECTORY_1):
    if filename.endswith('.json'):
        filepath = os.path.join(SIMULATION_DIRECTORY_1, filename)
        with open(filepath, 'r') as file:
            json_data = json.load(file)
            reservoir_dynamics.append(json_data['reservoir_dynamics'])
            entangled_labels.append(json_data['entangled_label'])

for filename in os.listdir(SIMULATION_DIRECTORY_2):
    if filename.endswith('.json'):
        filepath = os.path.join(SIMULATION_DIRECTORY_2, filename)
        with open(filepath, 'r') as file:
            json_data = json.load(file)
            reservoir_dynamics.append(json_data['reservoir_dynamics'])
            entangled_labels.append(json_data['entangled_label'])

In [4]:
# reservoir_dynamics = np.array(reservoir_dynamics)
# entangled_labels = np.array(entangled_labels)
# # Randomly sample 
# num_samples = 2390
# random_indices = np.random.choice(len(reservoir_dynamics), num_samples, replace=False)
# reservoir_dynamics = reservoir_dynamics[random_indices]
# entangled_labels = entangled_labels[random_indices]

In [5]:
reservoir_dynamics = np.array(reservoir_dynamics)
entangled_labels = np.array(entangled_labels)
entangled_labels = entangled_labels.reshape(entangled_labels.shape[0], 1)

print(f'Shape of reservoir dyanmics is {reservoir_dynamics.shape}')
print(f'Shape of entangled labels is {entangled_labels.shape}')

Shape of reservoir dyanmics is (2390, 6, 321)
Shape of entangled labels is (2390, 1)


In [6]:
n_datapoints = reservoir_dynamics.shape[0]
n_reservoir_qubits = reservoir_dynamics.shape[1]
n_timepoints = reservoir_dynamics.shape[2]

In [7]:
########## Dimensionality calculation ############
# We have to avoid the curse of dimensionality so will go with the 
# 10:1 ratio of datapoints:features for now
# TODO: Tune this as a hyperparameter
DIMENSIONALITY_RATIO = 50

required_features = int(n_datapoints // DIMENSIONALITY_RATIO)

allowed_n_timepoints = 20

# Calculate the indices to access from the original array 
time_sample_indices = np.linspace(0, n_reservoir_qubits - 1, allowed_n_timepoints, dtype=int)

sampled_reservoir_dynamics = reservoir_dynamics[:, :, time_sample_indices]

print(f"The new sampled reservoir dynmaics shape is {sampled_reservoir_dynamics.shape}")

The new sampled reservoir dynmaics shape is (2390, 6, 20)


In [8]:
# Flatten the training data column-wise
# This ensures that all readings of each particle in the reservoir 
# at a given time point are adjacent to each other in the data.
# Transpose the last two dimensions to prepare for column-wise flattening
transponsed_sampled_reservoir_dynamics = sampled_reservoir_dynamics.transpose(0, 2, 1)  

# Flatten to (n_datapoints, n_features)
reservoir_dynamics_flattened = transponsed_sampled_reservoir_dynamics.reshape(n_datapoints, -1)

print("The training data has been flattened and is now shape", reservoir_dynamics_flattened.shape)

The training data has been flattened and is now shape (2390, 120)


In [9]:
# Train the random forest!
X_train, X_test, y_train, y_test = train_test_split(reservoir_dynamics_flattened, entangled_labels, test_size=0.2, random_state=42, shuffle=True)

### Random Forest

In [10]:
rf_classifier = RandomForestClassifier(
    random_state=42,
    max_depth=10,
    n_jobs=-1
    )

In [11]:
# Step 4: Perform cross-validation on the training data
cv_scores = cross_val_score(rf_classifier, reservoir_dynamics_flattened, entangled_labels, cv=20)

print("Cross-validation scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)


Cross-validation scores: [0.59166667 0.58333333 0.64166667 0.6        0.675      0.7
 0.65       0.6        0.59166667 0.69166667 0.63865546 0.68067227
 0.68067227 0.61344538 0.60504202 0.57142857 0.66386555 0.63865546
 0.64705882 0.65546218]
Average cross-validation score: 0.6359978991596638


  estimator.fit(X_train, y_train, **fit_params)


In [12]:
rf_classifier.fit(X_train, y_train)

  rf_classifier.fit(X_train, y_train)


In [13]:
y_pred_train = rf_classifier.predict(X_train)
y_pred = rf_classifier.predict(X_test)

In [14]:
# Evaluate the model
train_accuracy = accuracy_score(y_train, y_pred_train)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Train Accuracy: 0.8169456066945606
Test Accuracy: 0.6276150627615062
Confusion Matrix:
 [[152  90]
 [ 88 148]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.63      0.63      0.63       242
         1.0       0.62      0.63      0.62       236

    accuracy                           0.63       478
   macro avg       0.63      0.63      0.63       478
weighted avg       0.63      0.63      0.63       478



### SVM

In [15]:
from sklearn.svm import SVC
SVC_classifier = SVC(kernel='rbf', C=1.0)  # Adjust C and gamma for regularization

In [16]:
cv_scores = cross_val_score(SVC_classifier, reservoir_dynamics_flattened, entangled_labels, cv=20)

print("Cross-validation scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross-validation scores: [0.6        0.55833333 0.63333333 0.66666667 0.625      0.65833333
 0.60833333 0.60833333 0.625      0.66666667 0.66386555 0.61344538
 0.69747899 0.70588235 0.62184874 0.64705882 0.62184874 0.62184874
 0.68067227 0.62184874]
Average cross-validation score: 0.6372899159663865


In [17]:
SVC_classifier.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [18]:
y_pred_train = SVC_classifier.predict(X_train)
y_pred = SVC_classifier.predict(X_test)

In [19]:
# Evaluate the model
train_accuracy = accuracy_score(y_train, y_pred_train)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Train Accuracy: 0.6359832635983264
Test Accuracy: 0.6359832635983264
Confusion Matrix:
 [[190  52]
 [122 114]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.61      0.79      0.69       242
         1.0       0.69      0.48      0.57       236

    accuracy                           0.64       478
   macro avg       0.65      0.63      0.63       478
weighted avg       0.65      0.64      0.63       478



### XGBoost

In [43]:
from xgboost import XGBClassifier
XGB_classifier = XGBClassifier(learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8, colsample_bytree=0.8)

In [44]:
cv_scores = cross_val_score(XGB_classifier, reservoir_dynamics_flattened, entangled_labels, cv=20)

print("Cross-validation scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

Cross-validation scores: [0.6        0.58333333 0.60833333 0.65       0.73333333 0.675
 0.65833333 0.63333333 0.66666667 0.69166667 0.65546218 0.67226891
 0.73109244 0.65546218 0.63865546 0.60504202 0.67226891 0.63865546
 0.72268908 0.6302521 ]
Average cross-validation score: 0.6560924369747898


In [45]:
XGB_classifier.fit(X_train, y_train)

In [46]:
y_pred_train = XGB_classifier.predict(X_train)
y_pred = XGB_classifier.predict(X_test)

In [47]:
# Evaluate the model
train_accuracy = accuracy_score(y_train, y_pred_train)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Train Accuracy: 0.6778242677824268
Test Accuracy: 0.6380753138075314
Confusion Matrix:
 [[152  90]
 [ 83 153]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.65      0.63      0.64       242
         1.0       0.63      0.65      0.64       236

    accuracy                           0.64       478
   macro avg       0.64      0.64      0.64       478
weighted avg       0.64      0.64      0.64       478

