In [2]:

import time

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression

from dpyacl.core.stop_criteria import MaxIteration
from dpyacl.experiment.context import CrossValidationExperiment
from dpyacl.metrics import Accuracy
from dpyacl.metrics.evaluation import F1, HammingLoss
from dpyacl.oracle import SimulatedOracle
from dpyacl.scenario.scenario import PoolBasedSamplingScenario
from dpyacl.strategies.single_label import QueryMarginSampling


In [1]:

from dask.distributed import Client
client = Client('tcp://192.168.2.100:8786')
client

0,1
Client  Scheduler: tcp://192.168.2.100:8786  Dashboard: http://192.168.2.100:8787/status,Cluster  Workers: 3  Cores: 6  Memory: 9.00 GB


In [13]:
iterations = []
for i in range (0,30) :


    ml_technique = LogisticRegression(solver='sag')
    stopping_criteria = MaxIteration(25)
    query_strategy = QueryMarginSampling()
    performance_metrics = [Accuracy(),  F1(average='weighted'), HammingLoss()]

    results = {}
    for item in ["sequential", "multithread"]:
        X,y=load_breast_cancer(return_X_y=True) #import the dataset

        if item == "sequential":
            experiment = CrossValidationExperiment(
                client,
                X,
                y,
                scenario_type=PoolBasedSamplingScenario,
                ml_technique=ml_technique,
                performance_metrics=performance_metrics,
                query_strategy=query_strategy,
                oracle=SimulatedOracle(labels=y),
                stopping_criteria=stopping_criteria,
                self_partition=True,
                kfolds=5,
                test_ratio=0.3,
                initial_label_rate=0.05,
                all_class=True,
                rebalance=True,
                batch_size=1
            )

            start_time = time.time()
            experiment.evaluate(client=client, multithread= False, verbose=True)
            end_time = time.time() - start_time
            results[item]=end_time
        else:
            experiment = CrossValidationExperiment(
                client,
                X,
                y,
                scenario_type=PoolBasedSamplingScenario,
                ml_technique=ml_technique,
                performance_metrics=performance_metrics,
                query_strategy=query_strategy,
                oracle=SimulatedOracle(labels=y),
                stopping_criteria=stopping_criteria,
                self_partition=True,
                 kfolds=5,
                test_ratio=0.3,
                initial_label_rate=0.05,
                all_class=True,
                rebalance=True,
                batch_size=1
            )

            start_time = time.time()
            experiment.evaluate(client=client, multithread= True, verbose=True)
            end_time = time.time() - start_time
            results[item]=end_time

    iterations.append(results)



  label_pred = self._ml_technique.predict(da.rechunk(self._X[self._test_idx, :]))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=label_pred))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=label_pred))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=label_pred))
  label_pred = self._ml_technique.predict(da.rechunk(self._X[self._test_idx, :]))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=label_pred))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=label_pred))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=label_pred))
  label_pred = self._ml_technique.predict(da.rechunk(self._X[self._test_idx, :]))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=label_pred))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=label_pred))
  value = delayed(metric.compute(y_true=self._Y[self._test_idx], y_pred=la

Label: [0], Cost: [1]

| round | initially labeled data | number of queries | cost | accuracy_score: |  f1_score:   | hamming_loss: |
|   0   |    35 (5.00% of all)   |         1         |  0   |   0.570 ± 0.00  | 0.569 ± 0.00 |  0.430 ± 0.00 |Label: [0], Cost: [1]
|   0   |    35 (5.00% of all)   |         2         |  0   |   0.575 ± 0.01  | 0.574 ± 0.00 |  0.425 ± 0.01 |Label: [0], Cost: [1]
|   0   |    35 (5.00% of all)   |         3         |  0   |   0.576 ± 0.00  | 0.574 ± 0.00 |  0.424 ± 0.00 |Label: [1], Cost: [1]
|   0   |    35 (5.00% of all)   |         4         |  0   |   0.576 ± 0.00  | 0.575 ± 0.00 |  0.424 ± 0.00 |Label: [1], Cost: [1]
|   0   |    35 (5.00% of all)   |         5         |  0   |   0.575 ± 0.00  | 0.574 ± 0.00 |  0.425 ± 0.00 |Label: [0], Cost: [1]
|   0   |    35 (5.00% of all)   |         6         |  0   |   0.574 ± 0.00  | 0.573 ± 0.00 |  0.426 ± 0.00 |Label: [0], Cost: [1]
|   0   |    35 (5.00% of all)   |         7         |  0   |   0.574 ± 0.

In [14]:
print(iterations)

[{'sequential': 269.9536166191101, 'multithread': 71.43061804771423}, {'sequential': 276.42609691619873, 'multithread': 90.49321675300598}, {'sequential': 285.89510440826416, 'multithread': 64.82392835617065}, {'sequential': 288.82084703445435, 'multithread': 81.95073795318604}, {'sequential': 288.6633687019348, 'multithread': 91.96501731872559}, {'sequential': 286.99408984184265, 'multithread': 93.45700263977051}, {'sequential': 295.03978872299194, 'multithread': 111.25710940361023}, {'sequential': 289.3507270812988, 'multithread': 105.24348258972168}, {'sequential': 291.99275946617126, 'multithread': 74.48514938354492}, {'sequential': 292.4395639896393, 'multithread': 49.70874619483948}, {'sequential': 294.6522171497345, 'multithread': 82.28424882888794}, {'sequential': 292.9196720123291, 'multithread': 87.26326417922974}, {'sequential': 292.1298477649689, 'multithread': 94.92160677909851}, {'sequential': 276.5257089138031, 'multithread': 74.15293097496033}, {'sequential': 281.697107