# CSCN8010 Final Project - Energy Estimator

## Group #8
* Eris Leksi
* Erica Holden
* Reham Abuarquob

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from util.Vectorizer import Vectorizer

df = pd.read_csv('./data/alpaca_llama3_70b_server.csv')

# Drop unnecessary column
df = df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])

y = df['energy_consumption_llm_total']


In [2]:
vectorizer = Vectorizer(df['prompt'].tolist())

df['prompt_vector'] = df['prompt'].apply(lambda x: vectorizer.sentence_vector(x))
X_vec = np.vstack(df['prompt_vector'].values)  # shape: (num_samples, vector_size)

X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
# Calculate accuracy as the percentage of predictions within 10% of actual values
accuracy = (abs(y_test - y_pred) / y_test < 0.1).mean()
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {model.score(X_test, y_test)}')
print(f'Accuracy: {accuracy}')


Mean Squared Error: 3.031265295843615e-06
R^2 Score: 0.13862684937826575
Accuracy: 0.0970873786407767


In [3]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
prompt_vectors = model.encode(df['prompt'].tolist())

X_vec = prompt_vectors  # shape: (num_samples, embedding_dim)
y = df['energy_consumption_llm']

from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)
model = XGBRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
# Calculate accuracy as the percentage of predictions within 10% of actual values
accuracy = (abs(y_test - y_pred) / y_test < 0.1).mean()
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {model.score(X_test, y_test)}')
print(f'Accuracy: {accuracy}')

  from .autonotebook import tqdm as notebook_tqdm
  return forward_call(*args, **kwargs)


Mean Squared Error: 2.2930986353188646e-06
R^2 Score: 0.34838643160024074
Accuracy: 0.11650485436893204


In [None]:
# Save the model and vectorizer for future use
import joblib
joblib.dump(model, 'xgboost_model.pkl')
joblib.dump(vectorizer, 'text_vectorizer.pkl')