In [None]:
#!pip install codecarbon

In [None]:
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import MultinomialNB

from codecarbon import OfflineEmissionsTracker


In [None]:
# Configure training Emission Tracker
train_tracker = OfflineEmissionsTracker(
    cloud_provider="gcp", 
    cloud_region="europe-west1", 
    output_file="emissions_training_ml.csv",
    project_name="ml_classifier_training",
)

# Load dataset

In [None]:
categories = ['alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)

# Create and train model

In [None]:
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', OneVsRestClassifier(LinearSVC(class_weight="balanced"), n_jobs=-1)),
])

train_tracker.start()
# Compute intensive code goes here
pipeline.fit(newsgroups_train.data, newsgroups_train.target)
train_tracker.stop()

print(f"Total energy consumed for the training: {train_tracker.final_emissions_data.cpu_energy} KW")

train_predictions = pipeline.predict(newsgroups_train.data)
test_predictions = pipeline.predict(newsgroups_test.data)

print("Train classification report:")
print(classification_report(newsgroups_train.target, train_predictions, target_names=newsgroups_train.target_names))
print("Test classification report:")
print(classification_report(newsgroups_test.target, test_predictions, target_names=newsgroups_test.target_names))

# Benchmark inference

In [1]:
inference_tracker = OfflineEmissionsTracker(
    cloud_provider="gcp", 
    cloud_region="europe-west1", 
    output_file="emissions_predict_ml.csv",
    project_name="ml_classifier_prediction",
)

predicted_sentence = copy(newsgroups_test.target)
inference_tracker.start()
for i in range(10e6):
    pipeline.predict(predicted_sentence)
inference_tracker.stop()

print(f"Total energy consumed for a prediction of 1M sentences: {inference_tracker.final_emissions_data.cpu_energy} KW")

NameError: name 'OfflineEmissionsTracker' is not defined