### 4 bit quantized Llama-3.1-70B-Instruct: Few-shot prompting given domain context

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from utils import *
from openai import RateLimitError
import random
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import re
import os
import time

# model name and/or local path to model
model_name = "Llama-3.1-70B-Instruct-bnb-4bit"
# Change this to local path or model name on huggingface
model_path = "/mimer/NOBACKUP/Datasets/LLM/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/d401984d962a6cbeba8514c16409f9631e72d2c1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_use_double_quant = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,
                                             dtype = torch.bfloat16,
                                             use_cache = True,
                                             device_map = "auto",
                                             quantization_config = bnb_config
)

# Create messages

data_path = os.path.join('data', 'transformed_data.txt')
with open(data_path, 'r') as f:
    data = f.readlines()

system_message_path = os.path.join("data", "system_message_context.txt")
with open(system_message_path, "r") as f:
    system_txt = f.read().strip()

system_message = {"role": "system", "content": system_txt}

# Randomly sample n lines for training and N lines for testing
n = 10
N = 25

# Initialize empty lists to store results
result_list = []
indices = list(range(len(data)))
# Repeat the process 10 times
for i in range(10):
    random.seed(i)
    #np.random.seed(i)
    test_prompts, true_values, predictions = gather_LLM_results(data,
                                                                n,
                                                                N,
                                                                None,
                                                                model_name,
                                                                indices,
                                                                system_message,
                                                                None,
                                                                model,
                                                                tokenizer)

    append_to_result_list(test_prompts, true_values, predictions, result_list)


In [3]:
# Save results
save_results_to_csv(model_name, "ICL_finetuned",  result_list)

Results for 250 iterations are saved to a single CSV file.


### Llama-3.2-3B-Instruct: few-shot prompting without domain context

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from utils import *
from openai import RateLimitError
import random
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import re
import os
import time

# model name and/or local path to model
model_name = "Llama-3.1-70B-Instruct-bnb-4bit"
# Change this to local path or model name on huggingface
model_path = "/mimer/NOBACKUP/Datasets/LLM/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/d401984d962a6cbeba8514c16409f9631e72d2c1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_use_double_quant = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,
                                             dtype = torch.bfloat16,
                                             use_cache = True,
                                             device_map = "auto",
                                             quantization_config = bnb_config
)

# Create messages

data_path = os.path.join('data', 'transformed_data.txt')
with open(data_path, 'r') as f:
    data = f.readlines()

system_message_path = os.path.join("data", "system_message.txt")
with open(system_message_path, "r") as f:
    system_txt = f.read().strip()

system_message = {"role": "system", "content": system_txt}

# Randomly sample n lines for training and N lines for testing
n = 10
N = 25

# Initialize empty lists to store results
result_list = []
indices = list(range(len(data)))
# Repeat the process 10 times
for i in range(10):
    random.seed(i)
    #np.random.seed(i)
    test_prompts, true_values, predictions = gather_LLM_results(data,
                                                                n,
                                                                N,
                                                                None,
                                                                model_name,
                                                                indices,
                                                                system_message,
                                                                None,
                                                                model,
                                                                tokenizer)

    append_to_result_list(test_prompts, true_values, predictions, result_list)


In [5]:
# Save results

save_results_to_csv(model_name, "ICL", result_list)

Results for 250 iterations are saved to a single CSV file.


### Gaussian Process Regression (GPR)

In this section, we will benchmark the performance of the Gaussian Process Regression (GPR) model using the `scikit-learn` library. GPR is a non-parametric, Bayesian approach to regression that provides uncertainty estimates of the predictions. It is based on the assumption that any finite set of data points can be modeled by a multivariate Gaussian distribution.

All training and test sets used for the experiments will be stored in the `results` folder, allowing for easy access and reproducibility of the study.

In [6]:
import random
import os
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel, RBF
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import re
import matplotlib.pyplot as plt
from sklearn.gaussian_process.kernels import ConstantKernel, Matern


#data = pd.read_csv(r'data/numeric_data.csv')
file_path = os.path.join('data', 'numeric_data.csv')
data = pd.read_csv(file_path)

kernel = ConstantKernel(1.0, (1e-3, 1e3)) * Matern(length_scale=10, nu=1.5)

gpr = GaussianProcessRegressor(kernel=kernel)

# DataFrames to store results
train_results_df = pd.DataFrame()
test_results_df = pd.DataFrame(columns=['Iteration', 'Idx_Sample', 'Input Features', 'True Values', 'Predicted Values'])

n = 10
N = 25

indices = list(range(len(data)))
for i in range(10):
    random.seed(i)
    #np.random.seed(i)
    #random_indices = np.random.choice(len(data), n+N, False)
    random.shuffle(indices)
    # Sample the data based on the provided indices
    train_data = data.iloc[indices[:n]]
    test_data = data.iloc[indices[n:n+N]]

    target_column = 'fc_28dGroundTruth'
    idx_column = 'Idx_Sample'
    X_train = train_data.drop(columns=[target_column, idx_column], axis=1)

    # Normalize input features
    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)

    # Scale the target variable for training
    y_scaler = MinMaxScaler()
    y_train = y_scaler.fit_transform(train_data[target_column].copy().to_numpy().reshape(-1, 1))

    gpr.fit(X_train, y_train)

    # Test data
    X_test = test_data.drop(columns=[target_column, idx_column], axis=1)
    X_test = X_scaler.transform(X_test)

    # Predict on test data
    predictions = gpr.predict(X_test)
    predictions = predictions.reshape(-1, 1)
    predictions = y_scaler.inverse_transform(predictions)

    # Store true and predicted values
    true_values = test_data[target_column].copy().to_numpy().reshape(-1, 1)
    idx_sample = test_data[idx_column].copy().to_numpy()

    # Store train data
    
    train_results_df = pd.concat([train_results_df, train_data], ignore_index=True)

    # Store test data
    iteration_df = pd.DataFrame({
        'Iteration': i+1,
        'Idx_Sample': idx_sample,
        'Input Features': list(X_test),
        'True Values': true_values.flatten(),
        'Predicted Values': predictions.flatten()
    })

    test_results_df = pd.concat([test_results_df, iteration_df], ignore_index=True)

    # Calculate R2 score and mean absolute error
    r2 = r2_score(true_values, predictions)
    mae = mean_absolute_error(true_values, predictions)   
    mse = mean_squared_error(true_values, predictions)

    # Evaluation
    print(f"Iteration: {i+1}")
    print(f"R-squared: {r2:.2f}")
    print(f"MAE: {mae:.2f}")
    print(f"MSE: {mse:.2f}")


train_results_file = os.path.join('results', model_name, 'GPR', 'train.csv')

# Make needed directories
dir_name = os.path.dirname(train_results_file)
os.makedirs(dir_name, exist_ok=True)

train_results_df.to_csv(train_results_file, index=False)

test_results_file = os.path.join('results', model_name, 'GPR', 'test.csv')
test_results_df.to_csv(test_results_file, index=False)



Iteration: 1
R-squared: 0.78
MAE: 4.35
MSE: 29.76
Iteration: 2
R-squared: 0.01
MAE: 8.30
MSE: 100.57
Iteration: 3
R-squared: 0.62
MAE: 4.34
MSE: 28.64
Iteration: 4
R-squared: 0.35
MAE: 6.41
MSE: 59.51
Iteration: 5
R-squared: 0.28
MAE: 7.41
MSE: 80.64
Iteration: 6
R-squared: 0.64
MAE: 4.89
MSE: 31.87
Iteration: 7
R-squared: 0.78
MAE: 4.45
MSE: 30.08
Iteration: 8
R-squared: 0.69
MAE: 4.66
MSE: 30.84
Iteration: 9
R-squared: 0.48
MAE: 6.17
MSE: 59.95
Iteration: 10
R-squared: 0.65
MAE: 5.70
MSE: 45.49


  test_results_df = pd.concat([test_results_df, iteration_df], ignore_index=True)


### Random Forest (M5-Tree with Linear Tree Models)

In this section, we will benchmark the performance of the Random Forest (RF) model using an M5-Tree with linear tree models and well-calibrated uncertainty estimates, implemented in the `lolopy` library. RF is an ensemble learning method that constructs multiple decision trees and combines their output for improved prediction accuracy and reduced overfitting. The M5-Tree with linear tree models enhances the standard RF by incorporating linear regression models in the tree leaves, providing better performance on certain types of data. 

EDIT: No longer uses 'lolopy' library

All training and test sets used for the experiments will be stored in the `results` folder, allowing for easy access and reproducibility of the study.

In [7]:
import random
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import re
import matplotlib.pyplot as plt


file_path = os.path.join('data', 'numeric_data.csv')
data = pd.read_csv(file_path)  
        

# DataFrames to store results
train_results_df = pd.DataFrame()
test_results_df = pd.DataFrame(columns=['Iteration', 'Idx_Sample', 'Input Features', 'True Values', 'Predicted Values'])

n = 10
N = 25

indices = list(range(len(data)))

for i in range(10):
    random.seed(i)
    # np.random.seed(i)

    #random_indices = np.random.choice(len(data), n+N, False)
    random.shuffle(indices)
    # Sample the data based on the provided indices
    train_data = data.iloc[indices[:n]]
    test_data = data.iloc[indices[n:n+N]]

    target_column = 'fc_28dGroundTruth'
    idx_column = 'Idx_Sample'
    X_train = train_data.drop(columns=[target_column, idx_column], axis=1)

    # Normalize input features
    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)

    # Scale the target variable for training
    y_scaler = MinMaxScaler()
    y_train = y_scaler.fit_transform(train_data[target_column].copy().to_numpy().reshape(-1, 1))

    rf = RandomForestRegressor()
    rf.fit(X_train, y_train)

    # Test data
    X_test = test_data.drop(columns=[target_column, idx_column], axis=1)
    X_test = X_scaler.transform(X_test)

    # Predict on test data
    predictions = rf.predict(X_test)
    predictions = predictions.reshape(-1, 1)
    predictions = y_scaler.inverse_transform(predictions)

    # Store true and predicted values
    true_values = test_data[target_column].copy().to_numpy().reshape(-1, 1)
    idx_sample = test_data[idx_column].copy().to_numpy()

    # Store train data
    #train_results_df = train_results_df.append(train_data)
    train_results_df = pd.concat([train_results_df, train_data], ignore_index=True)

    # Store test data
    iteration_df = pd.DataFrame({
        'Iteration': i+1,
        'Idx_Sample': idx_sample,
        'Input Features': list(X_test),
        'True Values': true_values.flatten(),
        'Predicted Values': predictions.flatten()
    })

    test_results_df = pd.concat([test_results_df, iteration_df], ignore_index=True)

    # Calculate R2 score and mean absolute error
    r2 = r2_score(true_values, predictions)
    mae = mean_absolute_error(true_values, predictions)   
    mse = mean_squared_error(true_values, predictions)

    # Evaluation
    print(f"Iteration: {i+1}")
    print(f"R-squared: {r2:.2f}")
    print(f"MAE: {mae:.2f}")
    print(f"MSE: {mse:.2f}")


train_results_file = os.path.join('results', model_name, 'RF', 'train.csv')

dir_name = os.path.dirname(train_results_file)
os.makedirs(dir_name, exist_ok=True)

train_results_df.to_csv(train_results_file, index=False)

test_results_file = os.path.join('results', model_name, 'RF', 'test.csv')
test_results_df.to_csv(test_results_file, index=False)

  return fit_method(estimator, *args, **kwargs)
  test_results_df = pd.concat([test_results_df, iteration_df], ignore_index=True)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


Iteration: 1
R-squared: 0.63
MAE: 5.76
MSE: 50.06
Iteration: 2
R-squared: 0.66
MAE: 5.04
MSE: 34.32
Iteration: 3
R-squared: 0.49
MAE: 4.68
MSE: 38.53


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


Iteration: 4
R-squared: 0.46
MAE: 5.91
MSE: 49.23
Iteration: 5
R-squared: 0.63
MAE: 5.27
MSE: 41.04
Iteration: 6
R-squared: 0.62
MAE: 4.55
MSE: 33.86


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


Iteration: 7
R-squared: 0.71
MAE: 5.10
MSE: 39.54
Iteration: 8
R-squared: 0.59
MAE: 5.13
MSE: 40.12
Iteration: 9
R-squared: 0.45
MAE: 6.13
MSE: 63.85
Iteration: 10
R-squared: 0.54
MAE: 6.21
MSE: 59.58


  return fit_method(estimator, *args, **kwargs)
