<a href="https://colab.research.google.com/github/aryanpolakhare/quantum_ml/blob/main/EnsembleQRC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install numpy pandas yfinance pennylane pennylane-lightning scikit-learn tensorflow matplotlib


Collecting pennylane
  Downloading PennyLane-0.40.0-py3-none-any.whl.metadata (10 kB)
Collecting pennylane-lightning
  Downloading PennyLane_Lightning-0.40.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (27 kB)
Collecting rustworkx>=0.14.0 (from pennylane)
  Downloading rustworkx-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting tomlkit (from pennylane)
  Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray>=0.6.11 (from pennylane)
  Downloading autoray-0.7.0-py3-none-any.whl.metadata (5.8 kB)
Collecting diastatic-malt (from pennylane)
  Downloading diastatic_malt-2.15.2-py3-none-any.whl.metadata (2.6 kB)
Collecting scipy-openblas32>=0.3.26 (from pennylane-lightning)
  Downloading scipy_openblas32-0.3.29.0.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import pennylane as qml
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

class QuantumReservoir:
    def __init__(self, n_qubits=6, n_reservoir_neurons=32, connectivity=0.7, seed=42):
        self.n_qubits = n_qubits
        self.n_reservoir_neurons = n_reservoir_neurons
        self.connectivity = connectivity
        self.dev = qml.device("default.qubit", wires=n_qubits)

        np.random.seed(seed)
        self.reservoir_weights = np.random.normal(
            0, 1/np.sqrt(n_qubits),
            size=(n_reservoir_neurons, n_qubits)
        ) * (np.random.rand(n_reservoir_neurons, n_qubits) < connectivity)

    def quantum_circuit(self, inputs):
        @qml.qnode(self.dev)
        def circuit(x):
            # Input encoding
            for i in range(min(len(x), self.n_qubits)):
                qml.RY(x[i] * np.pi, wires=i)
                qml.RZ(x[i] * np.pi / 2, wires=i)

            # Entanglement layers
            for _ in range(2):
                # All-to-all entanglement
                for i in range(self.n_qubits):
                    for j in range(i + 1, self.n_qubits):
                        qml.CNOT(wires=[i, j])
                        qml.RZ(np.pi / 4, wires=j)

                # Single qubit rotations
                for i in range(self.n_qubits):
                    qml.Hadamard(wires=i)
                    qml.RY(np.pi / 2, wires=i)

            # Measurements
            return [qml.expval(qml.PauliZ(i)) for i in range(self.n_qubits)]

        return circuit(inputs)

    def process_input(self, X):
        reservoir_states = []
        for sample in X:
            projected = np.tanh(np.dot(self.reservoir_weights, sample[:self.n_qubits]))
            quantum_state = self.quantum_circuit(projected)
            reservoir_states.append(quantum_state)
        return np.array(reservoir_states)

class StockPredictor:
    def __init__(self, ticker="AAPL", lookback_days=30):
        self.ticker = ticker
        self.lookback_days = lookback_days
        self.scaler = StandardScaler()

    def _calculate_technical_indicators(self, df):
        # Returns
        df['Returns'] = df['Close'].pct_change()

        # Moving averages
        for window in [5, 10, 20, 50]:
            df[f'SMA_{window}'] = df['Close'].rolling(window=window).mean()
            df[f'EMA_{window}'] = df['Close'].ewm(span=window).mean()

        # Volatility
        df['Daily_Std'] = df['Returns'].rolling(window=20).std()

        # Volume
        df['Volume_SMA20'] = df['Volume'].rolling(window=20).mean()
        df['Volume_Change'] = df['Volume'].pct_change()

        # Momentum
        df['ROC'] = df['Close'].pct_change(periods=10)

        # Clean NaN values
        return df.dropna()

    def prepare_data(self, start_date, end_date):
        # Get data
        stock = yf.Ticker(self.ticker)
        df = stock.history(start=start_date, end=end_date)

        if df.empty:
            raise ValueError(f"No data found for {self.ticker}")

        # Calculate indicators
        df = self._calculate_technical_indicators(df)

        # Prepare features
        feature_columns = ['Returns', 'SMA_5', 'SMA_20', 'Daily_Std',
                         'Volume_SMA20', 'ROC', 'Volume_Change']
        X = df[feature_columns].values

        # Create labels (1 if price goes up next day, 0 otherwise)
        y = (df['Returns'].shift(-1) > 0).astype(int).values[:-1]
        X = X[:-1]

        # Scale features
        X_scaled = self.scaler.fit_transform(X)

        return X_scaled, y

class QuantumEnsembleTrader:
    def __init__(self, ticker="AAPL", n_members=5, n_qubits=6):
        self.ticker = ticker
        self.n_members = n_members
        self.n_qubits = n_qubits
        self.stock_predictor = StockPredictor(ticker)
        self.ensemble = []
        self.readout_layers = []

    def train(self, start_date, end_date, return_metrics=True):
        # Prepare data
        X, y = self.stock_predictor.prepare_data(start_date, end_date)

        # Split data
        train_size = int(0.8 * len(X))
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]

        # Train ensemble
        predictions = []
        for i in range(self.n_members):
            print(f"Training ensemble member {i+1}/{self.n_members}")

            # Create and add reservoir
            reservoir = QuantumReservoir(
                n_qubits=self.n_qubits,
                seed=42 + i
            )
            self.ensemble.append(reservoir)

            # Process data through reservoir
            X_train_reservoir = reservoir.process_input(X_train)
            X_test_reservoir = reservoir.process_input(X_test)

            # Train readout layer
            readout = LogisticRegression(max_iter=1000)
            readout.fit(X_train_reservoir, y_train)
            self.readout_layers.append(readout)

            # Get predictions
            pred = readout.predict_proba(X_test_reservoir)[:, 1]
            predictions.append(pred)

        # Combine predictions
        ensemble_pred = np.mean(predictions, axis=0)
        y_pred = (ensemble_pred > 0.5).astype(int)

        # Calculate metrics
        metrics = {
            'accuracy': accuracy_score(y_test, y_pred),
            'roc_auc': roc_auc_score(y_test, y_pred),
            'report': classification_report(y_test, y_pred),
            'report_dict': classification_report(y_test, y_pred, output_dict=True)
        }

        # Print results
        print("\nEnsemble Results:")
        print(f"Accuracy: {metrics['accuracy']:.4f}")
        print(f"ROC-AUC: {metrics['roc_auc']:.4f}")
        print("\nClassification Report:")
        print(metrics['report'])

        if return_metrics:
            return metrics

def analyze_ensemble_sizes(start_date="2022-01-01", end_date="2024-01-01", max_ensemble=13):
    results = {
        'ensemble_sizes': [],
        'accuracy': [],
        'roc_auc': [],
        'precision_0': [],
        'recall_0': [],
        'precision_1': [],
        'recall_1': []
    }

    print("\nTesting ensemble sizes from 1 to", max_ensemble)
    print("-" * 50)

    for n_members in range(1, max_ensemble + 1):
        print(f"\nTesting ensemble size: {n_members}")

        trader = QuantumEnsembleTrader(
            ticker="AAPL",
            n_members=n_members,
            n_qubits=6
        )

        try:
            metrics = trader.train(start_date, end_date, return_metrics=True)

            results['ensemble_sizes'].append(n_members)
            results['accuracy'].append(metrics['accuracy'])
            results['roc_auc'].append(metrics['roc_auc'])

            report_dict = metrics['report_dict']
            results['precision_0'].append(report_dict['0']['precision'])
            results['recall_0'].append(report_dict['0']['recall'])
            results['precision_1'].append(report_dict['1']['precision'])
            results['recall_1'].append(report_dict['1']['recall'])

        except Exception as e:
            print(f"Error with ensemble size {n_members}: {str(e)}")
            continue

    return results

def plot_ensemble_results(results):
    plt.figure(figsize=(15, 10))

    # Overall Performance
    plt.subplot(2, 1, 1)
    plt.plot(results['ensemble_sizes'], results['accuracy'], 'b-', label='Accuracy', marker='o')
    plt.plot(results['ensemble_sizes'], results['roc_auc'], 'r-', label='ROC-AUC', marker='s')
    plt.xlabel('Ensemble Size')
    plt.ylabel('Score')
    plt.title('Overall Performance vs Ensemble Size')
    plt.grid(True)
    plt.legend()

    # Class-wise Performance
    plt.subplot(2, 1, 2)
    plt.plot(results['ensemble_sizes'], results['precision_0'], 'g-', label='Precision (Class 0)', marker='o')
    plt.plot(results['ensemble_sizes'], results['recall_0'], 'g--', label='Recall (Class 0)', marker='s')
    plt.plot(results['ensemble_sizes'], results['precision_1'], 'b-', label='Precision (Class 1)', marker='o')
    plt.plot(results['ensemble_sizes'], results['recall_1'], 'b--', label='Recall (Class 1)', marker='s')
    plt.xlabel('Ensemble Size')
    plt.ylabel('Score')
    plt.title('Class-wise Performance vs Ensemble Size')
    plt.grid(True)
    plt.legend()

    plt.tight_layout()
    plt.savefig('ensemble_analysis.png')
    plt.close()

def main():
    # Parameters
    START_DATE = "2022-01-01"
    END_DATE = "2024-01-01"
    MAX_ENSEMBLE = 13

    # Run analysis
    print("Starting ensemble size analysis...")
    results = analyze_ensemble_sizes(START_DATE, END_DATE, MAX_ENSEMBLE)

    # Plot results
    plot_ensemble_results(results)

    # Find best configurations
    best_acc_idx = np.argmax(results['accuracy'])
    best_roc_idx = np.argmax(results['roc_auc'])

    print("\nBest Configurations:")
    print("=" * 50)
    print(f"\nBest Accuracy: {results['accuracy'][best_acc_idx]:.4f}")
    print(f"Achieved with ensemble size: {results['ensemble_sizes'][best_acc_idx]}")
    print(f"\nBest ROC-AUC: {results['roc_auc'][best_roc_idx]:.4f}")
    print(f"Achieved with ensemble size: {results['ensemble_sizes'][best_roc_idx]}")

if __name__ == "__main__":
    main()

Starting ensemble size analysis...

Testing ensemble sizes from 1 to 13
--------------------------------------------------

Testing ensemble size: 1
Training ensemble member 1/1

Ensemble Results:
Accuracy: 0.5824
ROC-AUC: 0.5223

Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.16      0.24        38
           1       0.59      0.89      0.71        53

    accuracy                           0.58        91
   macro avg       0.55      0.52      0.48        91
weighted avg       0.56      0.58      0.51        91


Testing ensemble size: 2
Training ensemble member 1/2
Training ensemble member 2/2

Ensemble Results:
Accuracy: 0.5495
ROC-AUC: 0.4829

Classification Report:
              precision    recall  f1-score   support

           0       0.33      0.08      0.13        38
           1       0.57      0.89      0.70        53

    accuracy                           0.55        91
   macro avg       0.45      0.48      0.