<a href="https://colab.research.google.com/github/tarunprabhu45/Quantum-Text-Encoding/blob/main/quantum.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install qiskit qiskit-ibm-runtime datasets transformers scikit-learn matplotlib numpy nltk --quiet


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m75.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m83.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m42.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m340.8/340.8 kB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import logging # import the logging module
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt



In [5]:
from datasets import load_dataset
def load_common_corpus_metadata(sample_size=100):
    """
    Load metadata or a small subset of the Common Corpus dataset.
    :param sample_size: Number of samples to load for demonstration/testing purposes.
    :return: Small subset of the dataset for testing.
    """
    logging.info("Loading Common Corpus dataset metadata...")

    # Load a small subset of the dataset
    dataset = load_dataset("PleIAs/common_corpus", split=f"train[:{sample_size}]")

    logging.info(f"Loaded {len(dataset)} samples from the dataset.")
    return dataset

def preprocess_dataset_with_pca(dataset, model_name="distilbert-base-uncased", device="cpu", target_power=3):
    """
    Preprocess dataset:
    1) Extract embeddings using a Transformer model.
    2) Reduce dimensions using PCA for quantum encoding compatibility.
    """
    logging.info(f"Preprocessing dataset with model: {model_name} and PCA...")
    embedder = TransformerEmbedder(model_name=model_name, device=device)
    reducer = PCA(n_components=2 ** target_power)

    def process_text(text):
        embeddings = embedder.get_token_embeddings(text)  # Extract embeddings
        return reducer.fit_transform(embeddings)          # Apply PCA

    dataset = dataset.map(lambda batch: {"embeddings": [process_text(t) for t in batch["text"]]}, batched=True)
    return dataset

def plot_embeddings_pca(embeddings, title="PCA Embeddings"):
    """
    Visualize embeddings reduced to 2D space with PCA.
    """
    plt.figure(figsize=(8, 6))
    plt.scatter(embeddings[:, 0], embeddings[:, 1], alpha=0.7, edgecolor='k')
    plt.title(title)
    plt.xlabel("PCA Component 1")
    plt.ylabel("PCA Component 2")
    plt.grid(True)
    plt.show()

def plot_quantum_distribution(quantum_result, sample_id):
    """
    Plot the quantum probability distribution for a specific sample.
    """
    plt.figure(figsize=(10, 5))
    plt.bar(range(len(quantum_result)), quantum_result, alpha=0.7)
    plt.title(f"Quantum Probability Distribution for Sample {sample_id}")
    plt.xlabel("Quantum State Index")
    plt.ylabel("Probability")
    plt.grid(True)
    plt.show()

def classical_baseline(embeddings):
    """
    Example classical baseline function.
    Computes the average of embeddings as a simple baseline.
    """
    return np.mean(embeddings, axis=(0, 1)) if len(embeddings) else None

def compare_classical_quantum(classical_result, quantum_results):
    """
    Compare classical embeddings with quantum probability distributions.
    """
    avg_quantum = np.mean(quantum_results, axis=0)

    plt.figure(figsize=(10, 6))
    plt.plot(classical_result, label="Classical Representation", linestyle="--", marker="o")
    plt.plot(avg_quantum, label="Quantum Average Representation", linestyle="-", marker="x")
    plt.title("Classical vs Quantum Representation")
    plt.xlabel("Feature Dimension / Quantum State Index")
    plt.ylabel("Value / Probability")
    plt.legend()
    plt.grid(True)
    plt.show()

def main_pipeline_demo_with_visualizations(sample_size=10, use_ibm=False, target_power=3):
    """
    Refined pipeline with preprocessing, quantum encoding, and visualizations.
    """
    # Step 1: Load dataset
    dataset = load_common_corpus_metadata(sample_size=sample_size)

    # Step 2: Preprocess dataset with PCA
    logging.info("Preprocessing dataset with Transformers and PCA...")
    dataset = preprocess_dataset_with_pca(dataset, model_name="distilbert-base-uncased", device="cpu", target_power=target_power)

    # Step 3: Extract embeddings
    embeddings = np.vstack([sample["embeddings"] for sample in dataset])

    # Step 4: Visualize PCA-reduced embeddings
    plot_embeddings_pca(embeddings, title="PCA of Embeddings")

    # Step 5: Quantum backend and encoding
    backend = get_ibm_backend(min_qubits=5) if use_ibm else FakeAthens()
    quantum_results = process_batch_with_quantum_encoder(
        batch_embeddings=[sample["embeddings"] for sample in dataset],
        n_qubits=target_power,  # Number of qubits
        group_size=4,
        multi_level=True,
        backend=backend
    )

    # Step 6: Compute classical baseline
    classical_result = classical_baseline(embeddings)

    # Step 7: Visualize quantum results and compare
    for i, qr in enumerate(quantum_results[:3]):  # Show the first 3 samples
        if qr is not None:
            plot_quantum_distribution(qr, sample_id=i)
    compare_classical_quantum(classical_result, quantum_results)

    return quantum_results, classical_result

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    quantum_dists, cls_baseline = main_pipeline_demo_with_visualizations(
        sample_size=100,
        use_ibm=False,  # Set to True if using IBM Quantum backend
        target_power=3  # Use 3 qubits (dimensionality = 2^3 = 8)
    )


Resolving data files:   0%|          | 0/10009 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/10000 [00:00<?, ?files/s]

subset_33_2.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_33_3.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_33_4.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_33_5.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_33_6.parquet:   0%|          | 0.00/337M [00:00<?, ?B/s]

subset_33_7.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_33_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_33_9.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_34_1.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_34_10.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_34_2.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_34_3.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_34_4.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_34_5.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_34_6.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_34_7.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_34_8.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_34_9.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_35_1.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_35_10.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_35_2.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_35_3.parquet:   0%|          | 0.00/324M [00:00<?, ?B/s]

subset_35_4.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_35_5.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_35_6.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_35_7.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_35_8.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_35_9.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_36_1.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_36_10.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_36_2.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_36_3.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_36_4.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_36_5.parquet:   0%|          | 0.00/323M [00:00<?, ?B/s]

subset_36_6.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_36_7.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_36_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_36_9.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_37_1.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_37_10.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_37_2.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_37_3.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_37_4.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_37_5.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_37_6.parquet:   0%|          | 0.00/324M [00:00<?, ?B/s]

subset_37_7.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_37_8.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_37_9.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_38_1.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_38_10.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_38_2.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_38_3.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_38_4.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_38_5.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_38_6.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_38_7.parquet:   0%|          | 0.00/339M [00:00<?, ?B/s]

subset_38_8.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_38_9.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_39_1.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_39_10.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_39_2.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_39_3.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_39_4.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_39_5.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_39_6.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_39_7.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_39_8.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_39_9.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_3_1.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_3_10.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_3_2.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_3_3.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_3_4.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_3_5.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_3_6.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_3_7.parquet:   0%|          | 0.00/321M [00:00<?, ?B/s]

subset_3_8.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_3_9.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_40_1.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_40_10.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_40_2.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_40_3.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_40_4.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_40_5.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_40_6.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_40_7.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_40_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_40_9.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_41_1.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_41_10.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_41_2.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_41_3.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_41_4.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_41_5.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_41_6.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_41_7.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_41_8.parquet:   0%|          | 0.00/324M [00:00<?, ?B/s]

subset_41_9.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_42_1.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_42_10.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_42_2.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_42_3.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_42_4.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_42_5.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_42_6.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_42_7.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_42_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_42_9.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_43_1.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_43_10.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_43_2.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_43_3.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_43_4.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_43_5.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_43_6.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_43_7.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_43_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_43_9.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_44_1.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_44_10.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_44_2.parquet:   0%|          | 0.00/323M [00:00<?, ?B/s]

subset_44_3.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_44_4.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_44_5.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_44_6.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_44_7.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_44_8.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_44_9.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_45_1.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_45_10.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_45_2.parquet:   0%|          | 0.00/338M [00:00<?, ?B/s]

subset_45_3.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_45_4.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_45_5.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_45_6.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_45_7.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_45_8.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_45_9.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_46_1.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_46_10.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_46_2.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_46_3.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_46_4.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_46_5.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_46_6.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_46_7.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_46_8.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_46_9.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_47_1.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_47_10.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_47_2.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_47_3.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_47_4.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_47_5.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_47_6.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_47_7.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_47_8.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_47_9.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_48_1.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_48_10.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_48_2.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_48_3.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_48_4.parquet:   0%|          | 0.00/324M [00:00<?, ?B/s]

subset_48_5.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_48_6.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_48_7.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_48_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_48_9.parquet:   0%|          | 0.00/323M [00:00<?, ?B/s]

subset_49_1.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_49_10.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_49_2.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_49_3.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_49_4.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_49_5.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_49_6.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_49_7.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_49_8.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_49_9.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_4_1.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_4_10.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_4_2.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_4_3.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_4_4.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_4_5.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_4_6.parquet:   0%|          | 0.00/337M [00:00<?, ?B/s]

subset_4_7.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_4_8.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_4_9.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_50_1.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_50_10.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_50_2.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_50_3.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_50_4.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_50_5.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_50_6.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_50_7.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_50_8.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_50_9.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_51_1.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_51_10.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_51_2.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_51_3.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_51_4.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_51_5.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_51_6.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_51_7.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_51_8.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_51_9.parquet:   0%|          | 0.00/338M [00:00<?, ?B/s]

subset_52_1.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_52_10.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_52_2.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_52_3.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_52_4.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_52_5.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_52_6.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_52_7.parquet:   0%|          | 0.00/324M [00:00<?, ?B/s]

subset_52_8.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_52_9.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_53_1.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_53_10.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_53_2.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_53_3.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_53_4.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_53_5.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_53_6.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_53_7.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_53_8.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_53_9.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_54_1.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_54_10.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_54_2.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_54_3.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_54_4.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_54_5.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_54_6.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_54_7.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_54_8.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_54_9.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_55_1.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_55_10.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_55_2.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_55_3.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_55_4.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_55_5.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_55_6.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_55_7.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_55_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_55_9.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_56_1.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_56_10.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_56_2.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_56_3.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_56_4.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_56_5.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_56_6.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_56_7.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_56_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_56_9.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_57_1.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_57_10.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_57_2.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_57_3.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_57_4.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_57_5.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_57_6.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_57_7.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_57_8.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_57_9.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_58_1.parquet:   0%|          | 0.00/323M [00:00<?, ?B/s]

subset_58_10.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_58_2.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_58_3.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_58_4.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_58_5.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_58_6.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_58_7.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_58_8.parquet:   0%|          | 0.00/337M [00:00<?, ?B/s]

subset_58_9.parquet:   0%|          | 0.00/336M [00:00<?, ?B/s]

subset_59_1.parquet:   0%|          | 0.00/323M [00:00<?, ?B/s]

subset_59_10.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_59_2.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_59_3.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_59_4.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_59_5.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_59_6.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_59_7.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_59_8.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_59_9.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_5_1.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_5_10.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_5_2.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_5_3.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_5_4.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_5_5.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_5_6.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_5_7.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_5_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_5_9.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_60_1.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_60_10.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_60_2.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_60_3.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_60_4.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_60_5.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_60_6.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_60_7.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_60_8.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_60_9.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_61_1.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_61_10.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_61_2.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_61_3.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_61_4.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_61_5.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_61_6.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_61_7.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_61_8.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_61_9.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_62_1.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_62_10.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_62_2.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_62_3.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_62_4.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_62_5.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_62_6.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_62_7.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_62_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_62_9.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_63_1.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_63_10.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_63_2.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_63_3.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_63_4.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_63_5.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_63_6.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_63_7.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_63_8.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_63_9.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_64_1.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_64_10.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_64_2.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_64_3.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_64_4.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_64_5.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_64_6.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_64_7.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_64_8.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_64_9.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_65_1.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_65_10.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_65_2.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_65_3.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_65_4.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_65_5.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_65_6.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_65_7.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_65_8.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_65_9.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_66_1.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_66_10.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_66_2.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_66_3.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_66_4.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_66_5.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_66_6.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_66_7.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_66_8.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_66_9.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_67_1.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_67_10.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_67_2.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_67_3.parquet:   0%|          | 0.00/325M [00:00<?, ?B/s]

subset_67_4.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]

subset_67_5.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_67_6.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_67_7.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_67_8.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_67_9.parquet:   0%|          | 0.00/327M [00:00<?, ?B/s]

subset_68_1.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_68_10.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_68_2.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_68_3.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_68_4.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_68_5.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_68_6.parquet:   0%|          | 0.00/333M [00:00<?, ?B/s]

subset_68_7.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_68_8.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_68_9.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_69_1.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_69_10.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_69_2.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_69_3.parquet:   0%|          | 0.00/328M [00:00<?, ?B/s]

subset_69_4.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]

subset_69_5.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_69_6.parquet:   0%|          | 0.00/334M [00:00<?, ?B/s]

subset_69_7.parquet:   0%|          | 0.00/329M [00:00<?, ?B/s]

subset_69_8.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_69_9.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

subset_6_1.parquet:   0%|          | 0.00/335M [00:00<?, ?B/s]

subset_6_10.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

subset_6_2.parquet:   0%|          | 0.00/326M [00:00<?, ?B/s]



subset_6_3.parquet:   0%|          | 0.00/330M [00:00<?, ?B/s]



subset_6_4.parquet:   0%|          | 0.00/332M [00:00<?, ?B/s]

OSError: [Errno 28] No space left on device