In [None]:
import pandas as pd
import uuid
import numpy as np
import os
from multiprocessing import Pool
from pathlib import Path
from collections.abc import Iterable

class SampleGenerator:

    def __init__(self,
                 batch_size: int,
                 num_classes: int,
                 path: str) -> None:
        
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.path = path
    
        Path(self.path).mkdir(parents=False, exist_ok=True)

    def _build_batches(self, num_rows) -> Iterable[int]:

        for _ in range(num_rows//self.batch_size):
            yield self.batch_size
        
        remainder = num_rows % self.batch_size

        if remainder > 0:
            yield remainder

    def generate(self, num_rows: int) -> pd.DataFrame:

        df = pd.DataFrame(
            {**{"id_client": [str(uuid.uuid4()) for _ in range(num_rows)]},
             **{f"class_{c}": np.random.uniform(size=num_rows)
                for c in range(self.num_classes)}
            }
        )

        df.to_parquet(f"{self.path}/{num_rows}-{uuid.uuid4()}.parquet")

    def generate_parallel(self,
                          num_rows: int) -> pd.DataFrame:

        batches = self._build_batches(num_rows)

        with Pool(os.cpu_count()) as p:
            p.map(self.generate, batches)

In [None]:
NUM_ROWS = 100_000
BATCH_SIZE = 5_000
NUM_CLASSES = 10

generator = SampleGenerator(BATCH_SIZE, NUM_CLASSES, "samples")

In [None]:
%timeit generator.generate_parallel(NUM_ROWS)

In [None]:
%timeit generator.generate(NUM_ROWS)