In [None]:
%pip install labtech

In [None]:
import labtech as lt
from sklearn.base import clone
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import ComplementNB
from sklearn.metrics import accuracy_score

In [None]:
class ClassifierTask:
    pass

@lt.task
class RandomForestClassifierTask:
    leaf_max: int

    def run(self):
        return RandomForestClassifier(max_leaf_nodes=self.leaf_max, random_state=1)

@lt.task
class NaiveBayesTask:
    alpha: float

    def run(self):
        return ComplementNB()

class DataTask:
    pass

@lt.task
class BowTask(DataTask):

    def run(self):
        txt_train = fetch_20newsgroups(subset='train')
        txt_test = fetch_20newsgroups(subset='test')
    
        vectorizer = CountVectorizer(binary=True)
        bow_train = vectorizer.fit_transform(txt_train.data)
        bow_test = vectorizer.transform(txt_test.data)
        return (bow_train, bow_test, txt_train.target, txt_test.target)

@lt.task
class EmbeddingsTask(DataTask):

    def run(self):
        txt_train = fetch_20newsgroups(subset='train')
        txt_test = fetch_20newsgroups(subset='test')
    
        vectorizer = CountVectorizer(binary=True)
        bow_train = vectorizer.fit_transform(txt_train.data)
        bow_test = vectorizer.transform(txt_test.data)
        return (bow_train, bow_test, txt_train.target, txt_test.target)

@lt.task
class ClassificationTask:
    data_task: DataTask
    classifier_task: ClassifierTask

    def run(self):
        bow_train, bow_test, target_train, target_test = self.data_task.result
        classifier = clone(self.classifier_task.result)
        classifier.fit(bow_train, target_train)
        target_pred = classifier.predict(bow_test)
        return (target_pred, target_test)

@lt.task
class EvaluationTask:
    classification_task: ClassificationTask

    def run(self):
        target_pred, target_test = self.classification_task.result
        return accuracy_score(target_test, target_pred)

In [None]:
data_tasks = [
    BowTask(),
    EmbeddingsTask(),
]

classifier_tasks = [
    *[
        RandomForestClassifierTask(leaf_max=leaf_max)
        for leaf_max in [10, 50, 90]
    ],
    NaiveBayesTask(),
]

experiments = [
    EvaluationTask(
        classification_task=ClassificationTask(
            data_task=data_task,
            classifier_task=classifier_task,
        ),
    )
    for data_task in data_tasks
    for classifier_task in classifier_tasks
]

In [None]:
from labtech.diagram import display_task_diagram

display_task_diagram(experiments)

In [None]:
lab = lt.Lab(storage=None)
results = lab.run_tasks(experiments)
results