To generate 1,000 subsets of the training set, we can use the ShuffleSplit class from Scikit-Learn. This class generates a user-defined number of independent train/test dataset splits. Here is how we can generate 1,000 subsets of the training set with 100 instances each:

In [1]:
from sklearn.datasets import make_moons

# Step 1: Generate moons dataset
X, y = make_moons(n_samples=10000, noise=0.4)

from sklearn.model_selection import train_test_split

# Step 2: Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
from sklearn.model_selection import ShuffleSplit

n_trees = 1000
n_instances = 100

subsets = []
rs = ShuffleSplit(n_splits=n_trees, test_size=n_instances, random_state=42)

for train_index, _ in rs.split(X_train):
    X_subset = X_train[train_index]
    y_subset = y_train[train_index]
    subsets.append((X_subset, y_subset))

len(subsets)

1000