In [20]:
import os
import json
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
SIMULATION_DIRECTORY = 'data/5_qubit_reservoir_temporal_dynamics'

In [3]:
reservoir_dynamics = []
entangled_labels = []

for filename in os.listdir(SIMULATION_DIRECTORY):
    if filename.endswith('.json'):
        filepath = os.path.join(SIMULATION_DIRECTORY, filename)
        with open(filepath, 'r') as file:
            json_data = json.load(file)
            reservoir_dynamics.append(json_data['reservoir_dynamics'])
            entangled_labels.append(json_data['entangled_label'])

In [10]:
reservoir_dynamics = np.array(reservoir_dynamics)
entangled_labels = np.array(entangled_labels)
entangled_labels = entangled_labels.reshape(entangled_labels.shape[0], 1)

print(f'Shape of reservoir dyanmics is {reservoir_dynamics.shape}')
print(f'Shape of entangled labels is {entangled_labels.shape}')

Shape of reservoir dyanmics is (5000, 5, 321)
Shape of entangled labels is (5000, 1)


In [11]:
n_datapoints = reservoir_dynamics.shape[0]
n_reservoir_qubits = reservoir_dynamics.shape[1]
n_timepoints = reservoir_dynamics.shape[2]

In [18]:
########## Dimensionality calculation ############
# We have to avoid the curse of dimensionality so will go with the 
# 10:1 ratio of datapoints:features for now
# TODO: Tune this as a hyperparameter
DIMENSIONALITY_RATIO = 10

required_features = int(n_datapoints // DIMENSIONALITY_RATIO)

allowed_n_timepoints = int(required_features // n_reservoir_qubits)

# Calculate the indices to access from the original array 
time_sample_indices = np.linspace(0, n_reservoir_qubits - 1, 100, dtype=int)

sampled_reservoir_dynamics = reservoir_dynamics[:, :, time_sample_indices]

print(f"The new sampled reservoir dynmaics shape is {sampled_reservoir_dynamics.shape}")

The new sampled reservoir dynmaics shape is (5000, 5, 100)


In [19]:
# Flatten the training data column-wise
# This ensures that all readings of each particle in the reservoir 
# at a given time point are adjacent to each other in the data.
# Transpose the last two dimensions to prepare for column-wise flattening
transponsed_sampled_reservoir_dynamics = sampled_reservoir_dynamics.transpose(0, 2, 1)  

# Flatten to (n_datapoints, n_features)
reservoir_dynamics_flattened = transponsed_sampled_reservoir_dynamics.reshape(5000, -1)

print("The training data has been flattened and is now shape", reservoir_dynamics_flattened.shape)

The training data has been flattened and is now shape (5000, 500)


In [21]:
# Train the random forest!
X_train, X_test, y_train, y_test = train_test_split(reservoir_dynamics_flattened, entangled_labels, test_size=0.3, random_state=42, shuffle=True)

In [26]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

  rf_classifier.fit(X_train, y_train)


In [27]:
y_pred_train = rf_classifier.predict(X_train)
y_pred = rf_classifier.predict(X_test)

In [28]:
# Evaluate the model
train_accuracy = accuracy_score(y_train, y_pred_train)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Train Accuracy: 0.9994285714285714
Test Accuracy: 0.5833333333333334
Confusion Matrix:
 [[442 319]
 [306 433]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.59      0.58      0.59       761
         1.0       0.58      0.59      0.58       739

    accuracy                           0.58      1500
   macro avg       0.58      0.58      0.58      1500
weighted avg       0.58      0.58      0.58      1500

