# CSCN8010 Final Project - Energy Estimator

## Group #8
* Eris Leksi
* Erica Holden
* Reham Abuarquob

In [13]:
import pandas as pd

df = pd.read_csv('./data/alpaca_llama3_70b_server.csv')

# Drop unnecessary column
df = df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])


In [None]:
# Use random forest regressor for energy estimation
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

class EnergyEstimator:
    def __init__(self, model=None):
        if model is None:
            self.model = RandomForestRegressor(n_estimators=100, random_state=42)
        else:
            self.model = model
    
    def train(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model.fit(X_train, y_train)
        predictions = self.model.predict(X_test)
        
        mse = mean_squared_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)
        
        print(f'Mean Squared Error: {mse}')
        print(f'R^2 Score: {r2}')

In [17]:
estimator = EnergyEstimator()
X = df.drop(columns=['energy_consumption_llm'])
X = pd.get_dummies(X)  # Convert categorical columns to numeric using one-hot encoding
y = df['energy_consumption_llm']

estimator.train(X, y)

Mean Squared Error: 9.618702535403386e-10
R^2 Score: 0.9997266721550511


## Conclusion

Using the Random Forest Regressor we were able to train a model for estimating energy usage with an R^2 of ~0.9997, so we are saving the model to be loaded elsewhere.

In [20]:
# Save the trained model
import joblib
joblib.dump(estimator.model, './models/energy_estimator_model.pkl')

['./models/energy_estimator_model.pkl']