In [1]:
!pip install codecarbon

Collecting codecarbon
  Using cached codecarbon-2.1.4-py3-none-any.whl (174 kB)
Collecting py-cpuinfo
  Using cached py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Collecting fuzzywuzzy
  Using cached fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Collecting pynvml
  Using cached pynvml-11.4.1-py3-none-any.whl (46 kB)
Installing collected packages: py-cpuinfo, fuzzywuzzy, pynvml, codecarbon
Successfully installed codecarbon-2.1.4 fuzzywuzzy-0.18.0 py-cpuinfo-9.0.0 pynvml-11.4.1


In [7]:
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import MultinomialNB

from codecarbon import OfflineEmissionsTracker
from copy import deepcopy

In [None]:
# Configure training Emission Tracker
train_tracker = OfflineEmissionsTracker(
    measure_power_secs=3600,
    cloud_provider="gcp", 
    cloud_region="europe-west1", 
    output_file="emissions_training_ml.csv",
    project_name="ml_classifier_training",
)

# Load dataset

In [4]:
categories = ['alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)

# Create and train model

In [5]:
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', OneVsRestClassifier(LinearSVC(class_weight="balanced"), n_jobs=-1)),
])

train_tracker.start()
# Compute intensive code goes here
pipeline.fit(newsgroups_train.data, newsgroups_train.target)
train_tracker.stop()

print(f"Total energy consumed for the training: {train_tracker.final_emissions_data.cpu_energy} KW")

train_predictions = pipeline.predict(newsgroups_train.data)
test_predictions = pipeline.predict(newsgroups_test.data)

print("Train classification report:")
print(classification_report(newsgroups_train.target, train_predictions, target_names=newsgroups_train.target_names))
print("Test classification report:")
print(classification_report(newsgroups_test.target, test_predictions, target_names=newsgroups_test.target_names))

[codecarbon INFO @ 22:53:58] Energy consumed for RAM : 0.000003 kWh. RAM Power : 5.5055108070373535 W
[codecarbon INFO @ 22:53:58] Energy consumed for all GPUs : 0.000017 kWh. All GPUs Power : 32.675 W
[codecarbon INFO @ 22:53:58] Energy consumed for all CPUs : 0.000022 kWh. All CPUs Power : 42.5 W
[codecarbon INFO @ 22:53:58] 0.000041 kWh of electricity used since the begining.


Total energy consumed for the training: 2.1576739682091607e-05 KW
Train classification report:
                    precision    recall  f1-score   support

       alt.atheism       1.00      1.00      1.00       480
     comp.graphics       1.00      1.00      1.00       584
         sci.space       1.00      1.00      1.00       593
talk.religion.misc       1.00      1.00      1.00       377

          accuracy                           1.00      2034
         macro avg       1.00      1.00      1.00      2034
      weighted avg       1.00      1.00      1.00      2034

Test classification report:
                    precision    recall  f1-score   support

       alt.atheism       0.86      0.82      0.84       319
     comp.graphics       0.92      0.96      0.94       389
         sci.space       0.95      0.95      0.95       394
talk.religion.misc       0.79      0.78      0.78       251

          accuracy                           0.89      1353
         macro avg       0.88   

# Benchmark inference

In [None]:
inference_tracker = OfflineEmissionsTracker(
    measure_power_secs=3600,
    cloud_provider="gcp", 
    cloud_region="europe-west1", 
    output_file="emissions_predict_ml.csv",
    project_name="ml_classifier_prediction",
)

predicted_sentence = deepcopy(newsgroups_test.data)
inference_tracker.start()
for i in range(int(10)):
    pipeline.predict(predicted_sentence)
inference_tracker.stop()

print(f"Total energy consumed for a prediction of 1M sentences: {inference_tracker.final_emissions_data.cpu_energy} KW")

[codecarbon INFO @ 23:49:05] Energy consumed for RAM : 0.004649 kWh. RAM Power : 5.5055108070373535 W
[codecarbon INFO @ 23:49:05] Energy consumed for all GPUs : 0.027583 kWh. All GPUs Power : 32.675 W
[codecarbon INFO @ 23:49:05] Energy consumed for all CPUs : 0.035940 kWh. All CPUs Power : 42.5 W
[codecarbon INFO @ 23:49:05] 0.068173 kWh of electricity used since the begining.
[codecarbon INFO @ 23:49:05] Energy consumed for RAM : 0.004672 kWh. RAM Power : 5.5055108070373535 W
[codecarbon INFO @ 23:49:05] Energy consumed for all GPUs : 0.027719 kWh. All GPUs Power : 32.675 W
[codecarbon INFO @ 23:49:05] Energy consumed for all CPUs : 0.036117 kWh. All CPUs Power : 42.5 W
[codecarbon INFO @ 23:49:05] 0.068508 kWh of electricity used since the begining.
