# Imports and Mounting

In [2]:
import statsmodels.api as sm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from scipy.stats import beta

from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

Mounted at /content/gdrive


# Downloading all files

## Iterating through A100

In [None]:
%cd '/content/gdrive/MyDrive/benchmarks_bs1/A100/chat'

/content/gdrive/MyDrive/benchmarks_bs1/A100/chat


In [None]:
A100_subfolders = [f.path for f in os.scandir() if f.is_dir()]

In [None]:
A100_subfolders

['./h2oai--h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2',
 './tatsu-lab--alpaca-7B',
 './project-baize--baize-v2-7B',
 './metaai--llama-13B',
 './openaccess-ai-collective--manticore-13b-chat-pyg',
 './BAIR--koala-7b',
 './lmsys--vicuna-7B',
 './Neutralzz--BiLLa-7B-SFT',
 './BAIR--koala-13b',
 './nomic-ai--gpt4all-13b-snoozy',
 './togethercomputer--RedPajama-INCITE-7B-Chat',
 './OpenAssistant--oasst-sft-1-pythia-12b',
 './camel-ai--CAMEL-13B-Combined-Data',
 './metaai--llama-7B',
 './Salesforce--xgen-7b-8k-inst',
 './FreedomIntelligence--phoenix-inst-chat-7b',
 './StabilityAI--stablelm-tuned-alpha-7b',
 './metaai--Llama-2-7b-chat-hf',
 './metaai--Llama-2-13b-chat-hf',
 './databricks--dolly-v2-12b',
 './lmsys--vicuna-13B',
 './lmsys--fastchat-t5-3b-v1.0']

In [None]:
A100_files = {}

for folder in A100_subfolders:
    files = os.listdir(folder)
    for file in files:
        file_path = os.path.join(folder, file)
        print(f"Processing file: {file_path}")
        # Do something with the file, e.g., open and read its contents
        with open(file_path, 'r') as f:
            A100_files[folder] = {}
            A100_files[folder]['model'] = pd.read_json(f)
            # Process the content as needed

Processing file: ./h2oai--h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2/benchmark_batch_1.json
Processing file: ./tatsu-lab--alpaca-7B/benchmark_batch_1.json
Processing file: ./project-baize--baize-v2-7B/benchmark_batch_1.json
Processing file: ./metaai--llama-13B/benchmark_batch_1.json
Processing file: ./openaccess-ai-collective--manticore-13b-chat-pyg/benchmark_batch_1.json
Processing file: ./BAIR--koala-7b/benchmark_batch_1.json
Processing file: ./lmsys--vicuna-7B/benchmark_batch_1.json
Processing file: ./Neutralzz--BiLLa-7B-SFT/benchmark_batch_1.json
Processing file: ./BAIR--koala-13b/benchmark_batch_1.json
Processing file: ./nomic-ai--gpt4all-13b-snoozy/benchmark_batch_1.json
Processing file: ./togethercomputer--RedPajama-INCITE-7B-Chat/benchmark_batch_1.json
Processing file: ./OpenAssistant--oasst-sft-1-pythia-12b/benchmark_batch_1.json
Processing file: ./camel-ai--CAMEL-13B-Combined-Data/benchmark_batch_1.json
Processing file: ./metaai--llama-7B/benchmark_batch_1.json
P

## Iterating through A40

In [None]:
%cd '/content/gdrive/MyDrive/benchmarks_bs1/A40/chat'

/content/gdrive/MyDrive/benchmarks_bs1/A40/chat


In [None]:
A40_subfolders = [f.path for f in os.scandir() if f.is_dir()]

In [None]:
A40_files = {}

for folder in A40_subfolders:
    files = os.listdir(folder)
    for file in files:
        file_path = os.path.join(folder, file)
        print(f"Processing file: {file_path}")
        # Do something with the file, e.g., open and read its contents
        with open(file_path, 'r') as f:
            A40_files[folder] = {}
            A40_files[folder]['model'] = pd.read_json(f)
            # Process the content as needed

Processing file: ./BAIR--koala-7b/benchmark_batch_1.json
Processing file: ./h2oai--h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2/benchmark_batch_1.json
Processing file: ./openaccess-ai-collective--manticore-13b-chat-pyg/benchmark_batch_1.json
Processing file: ./tatsu-lab--alpaca-7B/benchmark_batch_1.json
Processing file: ./metaai--llama-13B/benchmark_batch_1.json
Processing file: ./project-baize--baize-v2-7B/benchmark_batch_1.json
Processing file: ./lmsys--vicuna-7B/benchmark_batch_1.json
Processing file: ./Neutralzz--BiLLa-7B-SFT/benchmark_batch_1.json
Processing file: ./togethercomputer--RedPajama-INCITE-7B-Chat/benchmark_batch_1.json
Processing file: ./BAIR--koala-13b/benchmark_batch_1.json
Processing file: ./Salesforce--xgen-7b-8k-inst/benchmark_batch_1.json
Processing file: ./nomic-ai--gpt4all-13b-snoozy/benchmark_batch_1.json
Processing file: ./metaai--llama-7B/benchmark_batch_1.json
Processing file: ./FreedomIntelligence--phoenix-inst-chat-7b/benchmark_batch_1.json
Pro

In [None]:
A40_files.keys()

dict_keys(['./BAIR--koala-7b', './h2oai--h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2', './openaccess-ai-collective--manticore-13b-chat-pyg', './tatsu-lab--alpaca-7B', './metaai--llama-13B', './project-baize--baize-v2-7B', './lmsys--vicuna-7B', './Neutralzz--BiLLa-7B-SFT', './togethercomputer--RedPajama-INCITE-7B-Chat', './BAIR--koala-13b', './Salesforce--xgen-7b-8k-inst', './nomic-ai--gpt4all-13b-snoozy', './metaai--llama-7B', './FreedomIntelligence--phoenix-inst-chat-7b', './camel-ai--CAMEL-13B-Combined-Data', './StabilityAI--stablelm-tuned-alpha-7b', './BlinkDL--RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192.pth', './metaai--Llama-2-13b-chat-hf', './OpenAssistant--oasst-sft-1-pythia-12b', './metaai--Llama-2-7b-chat-hf', './databricks--dolly-v2-12b', './lmsys--vicuna-13B', './lmsys--fastchat-t5-3b-v1.0'])

## Iterating through V100

In [None]:
%cd '/content/gdrive/MyDrive/benchmarks_bs1/V100/chat'

/content/gdrive/MyDrive/benchmarks_bs1/V100/chat


In [None]:
V100_subfolders = [f.path for f in os.scandir() if f.is_dir()]

In [None]:
V100_files = {}

for folder in V100_subfolders:
    files = os.listdir(folder)
    for file in files:
        file_path = os.path.join(folder, file)
        print(f"Processing file: {file_path}")
        # Do something with the file, e.g., open and read its contents
        with open(file_path, 'r') as f:
            V100_files[folder] = {}
            V100_files[folder]['model'] = pd.read_json(f)
            # Process the content as needed

Processing file: ./project-baize--baize-v2-7B/benchmark_batch_1.json
Processing file: ./tatsu-lab--alpaca-7B/benchmark_batch_1.json
Processing file: ./lmsys--vicuna-7B/benchmark_batch_1.json
Processing file: ./h2oai--h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2/benchmark_batch_1.json
Processing file: ./metaai--llama-13B/benchmark_batch_1.json
Processing file: ./openaccess-ai-collective--manticore-13b-chat-pyg/benchmark_batch_1.json
Processing file: ./BAIR--koala-7b/benchmark_batch_1.json
Processing file: ./Neutralzz--BiLLa-7B-SFT/benchmark_batch_1.json
Processing file: ./metaai--Llama-2-13b-chat-hf/benchmark_batch_1.json
Processing file: ./StabilityAI--stablelm-tuned-alpha-7b/benchmark_batch_1.json
Processing file: ./OpenAssistant--oasst-sft-1-pythia-12b/benchmark_batch_1.json
Processing file: ./nomic-ai--gpt4all-13b-snoozy/benchmark_batch_1.json
Processing file: ./BAIR--koala-13b/benchmark_batch_1.json
Processing file: ./togethercomputer--RedPajama-INCITE-7B-Chat/benchmark_

# How to use this notebook

- file names are stored in ```{GPU}_Subfolders```
- to access the associated model use the format ```{GPU}_files[{file name}]['model']```

# getting standard errors

In [None]:
def make_cubic_model(dictionary, model_name):
  model_data = dictionary[model_name]['model']

  X = model_data["response_length"]
  y = model_data["energy"]

  df = pd.DataFrame({'X': X, 'X_squared': X**2, 'X_cubed': X**3,'y': y})

  model = sm.OLS(df['y'], df[['X', 'X_squared', 'X_cubed']]).fit()

  dictionary[model_name]['cubic X'] = model.params[0]
  dictionary[model_name]['cubic X2'] = model.params[1]
  dictionary[model_name]['cubic X2'] = model.params[2]

  return model

In [None]:
name = V100_subfolders[1]

model_data = V100_files[name]['model']

In [None]:
model_data

Unnamed: 0,model,throughput,response_length,latency,energy,input,output
0,tatsu-lab--alpaca-7B,30.364889,214,7.047613,1382.811,[A chat between a human user (prompter) and an...,[The final reconciliation process for calculat...
1,tatsu-lab--alpaca-7B,19.659229,20,1.017334,206.124,[A chat between a human user (prompter) and an...,[10\n9\n8\n7\n6\n5\n4\n3\n2\n1]
2,tatsu-lab--alpaca-7B,28.711321,72,2.507722,511.901,[A chat between a human user (prompter) and an...,[The Relying Party SHOULD respond with a succe...
3,tatsu-lab--alpaca-7B,28.234386,66,2.337575,479.590,[A chat between a human user (prompter) and an...,[This story is about Jonathan Livingston Seagu...
4,tatsu-lab--alpaca-7B,30.472511,147,4.824020,965.459,[A chat between a human user (prompter) and an...,[The Truman Show is a 1998 movie that tells th...
...,...,...,...,...,...,...,...
2973,tatsu-lab--alpaca-7B,33.153446,17,0.512767,92.244,[A chat between a human user (prompter) and an...,"[Hello, I'm glad you reached out. How can I he..."
2974,tatsu-lab--alpaca-7B,33.241416,17,0.511410,93.379,[A chat between a human user (prompter) and an...,[Hi there! I'm here to help. How can I help yo...
2975,tatsu-lab--alpaca-7B,31.239944,276,8.834843,1450.034,[A chat between a human user (prompter) and an...,[Hello there! Thanks for reaching out! Here ar...
2976,tatsu-lab--alpaca-7B,30.748046,9,0.292702,52.749,[A chat between a human user (prompter) and an...,"[Hi, how can I help you today?]"


In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd

# Assuming you have a pandas DataFrame df with columns 'response_length' and 'energy_use'

# Fit an OLS model
model = make_cubic_model(V100_files, name)

# Get summary of the model with OLS standard errors
print(model.summary())

# Calculate robust standard errors (using HC3 estimator by default)
robust_model = model.get_robustcov_results(cov_type='HC2')

# Get summary of the model with robust standard errors
print(robust_model.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.997
Model:                            OLS   Adj. R-squared (uncentered):              0.997
Method:                 Least Squares   F-statistic:                          3.938e+05
Date:                Sat, 09 Dec 2023   Prob (F-statistic):                        0.00
Time:                        18:22:20   Log-Likelihood:                         -15732.
No. Observations:                2978   AIC:                                  3.147e+04
Df Residuals:                    2975   BIC:                                  3.149e+04
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [None]:
frame['mean_ci_upper']

0       1139.906876
1        105.214642
2        377.823076
3        346.374046
4        774.468356
           ...     
2973      89.454906
2974      89.454906
2975    1488.930811
2976      47.392797
2977     105.214642
Name: mean_ci_upper, Length: 2978, dtype: float64

In [None]:
frame['mean_ci_lower']

0       1131.361903
1        103.292692
2        374.162026
3        342.724135
4        770.814600
           ...     
2973      87.769797
2974      87.769797
2975    1475.661737
2976      46.426209
2977     103.292692
Name: mean_ci_lower, Length: 2978, dtype: float64

In [None]:
frame['mean']

0       1135.634390
1        104.253667
2        375.992551
3        344.549091
4        772.641478
           ...     
2973      88.612352
2974      88.612352
2975    1482.296274
2976      46.909503
2977     104.253667
Name: mean, Length: 2978, dtype: float64

In [None]:
X = model_data["response_length"]
y = model_data["energy"]

X = [i for i]

df = pd.DataFrame({'X': X, 'X_squared': X**2, 'X_cubed': X**3,'y': y})

# Generate predicted values and confidence intervals
predictions = robust_model.get_prediction(df[['X', 'X_squared', 'X_cubed']])
frame = predictions.summary_frame(alpha=0.05) # 95% confidence interval

# Plot the data
plt.scatter(X, y, label='Data', s=1, alpha=0.1) # Assuming the first column is the constant

# Plot the confidence interval
plt.fill_between(X, frame['mean_ci_lower'], frame['mean_ci_upper'], color='r', alpha=0.1, label='95% CI')
#plt.scatter(X, frame['mean'], label='Regression Line', s=1, color='g')

plt.xlabel('Independent Variable')
plt.ylabel('Dependent Variable')
plt.legend()
plt.show()

ValueError: ignored

# Functions for modeling and graphing

In [None]:
def plot_length_energy(dictionary, model_name, color='c'):
  model_data = dictionary[model_name]['model']

  plt.scatter(model_data["response_length"], model_data["energy"], alpha=0.2, s=5, color=color)
  plt.legend([model_name], loc='upper left')
  plt.show()

In [None]:
def make_linear_model(dictionary, model_name):
  model_data = dictionary[model_name]['model']

  X = model_data["response_length"]
  y = model_data["energy"]

  #X = sm.add_constant(X)

  model = sm.OLS(y, X).fit()

  dictionary[model_name]['linear X'] = model.params[0]

  return model

In [None]:
def make_quadratic_model(dictionary, model_name):
  model_data = dictionary[model_name]['model']

  X = model_data["response_length"]
  y = model_data["energy"]

  df = pd.DataFrame({'X': X, 'X_squared': X**2, 'y': y})
  #X_quad = sm.add_constant(df[['X', 'X_squared']])

  model = sm.OLS(df['y'], df[['X', 'X_squared']]).fit()

  dictionary[model_name]['quad X'] = model.params[0]
  dictionary[model_name]['quad X2'] = model.params[1]

  return model

In [None]:
def plot_resid(model):
  # Obtain residuals
  residuals = model.resid

  # Plot residuals
  plt.scatter(model.fittedvalues, residuals)
  plt.xlabel('Fitted values')
  plt.ylabel('Residuals')
  plt.title('Residual Plot')
  plt.axhline(y=0, color='r', linestyle='--')  # Add a horizontal line at y=0 for reference
  plt.show()

In [None]:
# plots all positive residuals which are above 1.5 * std of residuals

def plot_outliers(model, plot=True, std=1.5, intercept=False):
  residuals = model.resid

  outliers = residuals > std * np.std(residuals) # You can adjust the threshold as needed

  # Subset the second series based on the first series
  subset_values = residuals[outliers]

  if plot:
    # Plot residuals
    plt.scatter(model.fittedvalues[outliers], subset_values)
    plt.xlabel('Fitted values')
    plt.ylabel('Residuals')
    plt.title('Residual Plot')
    plt.axhline(y=0, color='r', linestyle='--')  # Add a horizontal line at y=0 for reference
    plt.show()

  X = model.fittedvalues[outliers]
  y = subset_values

  resid_model = sm.OLS(y, X).fit()

  return model.fittedvalues[outliers], subset_values, resid_model

In [None]:
def plot_nonoutliers(model, plot=True, std=1.5):
  residuals = model.resid

  nonoutliers = np.abs(residuals) < std * np.std(residuals)  # You can adjust the threshold as needed

  # Subset the second series based on the first series
  subset_values = residuals[nonoutliers]

  return model.fittedvalues[nonoutliers], subset_values

# predicting slope from architecture

## extracting coefficients from linear and quadratic models

In [None]:
for name in V100_subfolders:
  make_quadratic_model(V100_files, name)
  make_linear_model(V100_files, name)

In [None]:
for name in A100_subfolders:
  make_quadratic_model(A100_files, name)
  make_linear_model(A100_files, name)

In [None]:
for name in A40_subfolders:
  make_quadratic_model(A40_files, name)
  make_linear_model(A40_files, name)

## importing dataframe

In [None]:
llm_data = pd.read_csv("/content/gdrive/MyDrive/benchmarks_bs1/243 model data - Sheet1.csv")
llm_data

Unnamed: 0,Model name,# of parameters (billions),# of transformer layers,# of attention heads,hidden dimension,ffn intermediate dimension,type of attention operation,vocab size,model type,torch_dtype,gpu model
0,BAIR--koala-7b,7,32,32,4096.0,11008.0,silu,32000,llama,float16,
1,BAIR--koala-13b,13,40,40,5120.0,13824.0,silu,32000,llama,float16,
2,FreedomIntelligence--phoenix-inst-chat-7b,7,30,32,4096.0,,,250880,bloom,float16,
3,Neutralzz--BiLLa-7B-SFT,7,32,32,4096.0,11008.0,silu,46943,llama,float16??,
4,OpenAssistant--oasst-sft-1-pythia-12b,12,36,40,5120.0,20480.0,gelu,50288,gpt_neox,float16,
5,Salesforce--xgen-7b-8k-inst,7,32,32,4096.0,11008.0,silu,51200,llama,float32,
6,StabilityAI--stablelm-tuned-alpha-7b,7,16,48,6144.0,24576.0,gelu,50432,gpt_neox,float32,
7,camel-ai--CAMEL-13B-Combined-Data,13,40,40,5120.0,13824.0,silu,32000,llama,float32,
8,databricks--dolly-v2-12b,12,36,40,5120.0,20480.0,gelu,50280,gpt_neox,bfloat16,
9,h2oai--h2ogpt-gm-oasst1-en-2048-open-llama-7b-...,7,32,32,4096.0,11008.0,silu,32000,llama,float16,


In [None]:
llm_A40 = llm_data.copy()
llm_A40['gpu model'] = "A40"

In [None]:
llm_A100 = llm_data.copy()
llm_A100['gpu model'] = "A100"

In [None]:
llm_V100 = llm_data.copy()
llm_V100['gpu model'] = "V100"

## combining dataframes

In [None]:
def combine_dfs(dictionary, df):
  dict_df = pd.DataFrame(dictionary).T
  dict_df.reset_index(inplace=True)
  dict_df['index'] = dict_df['index'].str[2:]
  dict_df = dict_df.drop(columns=['model'])

  dict_df.rename(columns={'index': 'Model name'}, inplace=True)

  return pd.merge(dict_df, df, on='Model name', how='inner')

In [None]:
A100_res = combine_dfs(A100_files, llm_A100)
A40_res = combine_dfs(A40_files, llm_A40)
V100_res = combine_dfs(V100_files, llm_V100)

In [None]:
result_df = pd.concat([A100_res, A40_res, V100_res], ignore_index=True)

In [None]:
result_df

Unnamed: 0,Model name,quad X,quad X2,linear X,# of parameters (billions),# of transformer layers,# of attention heads,hidden dimension,ffn intermediate dimension,type of attention operation,vocab size,model type,torch_dtype,gpu model
0,h2oai--h2ogpt-gm-oasst1-en-2048-open-llama-7b-...,4.609858,0.001045,5.041432,7,32,32,4096.0,11008.0,silu,32000,llama,float16,A100
1,tatsu-lab--alpaca-7B,8.726825,0.003731,10.005352,7,32,32,4096.0,11008.0,silu,32001,llama,float32,A100
2,project-baize--baize-v2-7B,4.630994,0.001655,5.358599,7,32,32,4096.0,11008.0,silu,32000,llama,float16,A100
3,metaai--llama-13B,8.456063,0.00028,8.58316,13,40,40,5120.0,13824.0,silu,32000,llama,float16,A100
4,openaccess-ai-collective--manticore-13b-chat-pyg,7.878409,0.001793,8.649268,13,40,40,5120.0,13824.0,silu,32000,llama,float64,A100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,FreedomIntelligence--phoenix-inst-chat-7b,5.028359,0.00053,5.245387,7,30,32,4096.0,,,250880,bloom,float16,V100
61,metaai--Llama-2-7b-chat-hf,4.960225,0.000151,5.034588,7,32,32,4096.0,11008.0,silu,32000,llama,float16,V100
62,lmsys--fastchat-t5-3b-v1.0,1.69876,0.008535,5.654666,3,24,32,,,gelu,32110,t5,float16,V100
63,databricks--dolly-v2-12b,9.59995,0.000779,9.906658,12,36,40,5120.0,20480.0,gelu,50280,gpt_neox,bfloat16,V100


## split into features and response + transformations

In [None]:
X = result_df.iloc[:, -10:]
X = pd.get_dummies(X, columns = ['model type', 'type of attention operation', 'torch_dtype', 'gpu model'])

y = result_df['linear X']
result_df

Unnamed: 0,Model name,quad X,quad X2,linear X,# of parameters (billions),# of transformer layers,# of attention heads,hidden dimension,ffn intermediate dimension,type of attention operation,vocab size,model type,torch_dtype,gpu model
0,h2oai--h2ogpt-gm-oasst1-en-2048-open-llama-7b-...,4.609858,0.001045,5.041432,7,32,32,4096.0,11008.0,silu,32000,llama,float16,A100
1,tatsu-lab--alpaca-7B,8.726825,0.003731,10.005352,7,32,32,4096.0,11008.0,silu,32001,llama,float32,A100
2,project-baize--baize-v2-7B,4.630994,0.001655,5.358599,7,32,32,4096.0,11008.0,silu,32000,llama,float16,A100
3,metaai--llama-13B,8.456063,0.00028,8.58316,13,40,40,5120.0,13824.0,silu,32000,llama,float16,A100
4,openaccess-ai-collective--manticore-13b-chat-pyg,7.878409,0.001793,8.649268,13,40,40,5120.0,13824.0,silu,32000,llama,float64,A100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,FreedomIntelligence--phoenix-inst-chat-7b,5.028359,0.00053,5.245387,7,30,32,4096.0,,,250880,bloom,float16,V100
61,metaai--Llama-2-7b-chat-hf,4.960225,0.000151,5.034588,7,32,32,4096.0,11008.0,silu,32000,llama,float16,V100
62,lmsys--fastchat-t5-3b-v1.0,1.69876,0.008535,5.654666,3,24,32,,,gelu,32110,t5,float16,V100
63,databricks--dolly-v2-12b,9.59995,0.000779,9.906658,12,36,40,5120.0,20480.0,gelu,50280,gpt_neox,bfloat16,V100


In [None]:
# from sklearn.impute import KNNImputer

# imputer = KNNImputer(n_neighbors=2, weights="uniform")
# imputer.fit_transform(X)

In [None]:
sm.OLS(y, imputer.fit_transform(X)).fit()

NameError: ignored

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.impute import SimpleImputer
X = pd.get_dummies(result_df.iloc[:, -10:], columns=['model type', 'type of attention operation', 'torch_dtype', 'gpu model'])
y = pd.to_numeric(result_df['linear X'], errors='coerce')
if y.isnull().any():
    valid_indices = ~y.isnull()
    y = y[valid_indices]
    X = X.loc[valid_indices]
    # maybe y.fillna(y.mean(), inplace=True) is better
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_imputed = np.asarray(X_imputed, dtype=float)
X_imputed = sm.add_constant(X_imputed)
model = sm.OLS(y, X_imputed).fit()
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:               linear X   R-squared:                       0.716
Model:                            OLS   Adj. R-squared:                  0.636
Method:                 Least Squares   F-statistic:                     9.001
Date:                Fri, 08 Dec 2023   Prob (F-statistic):           2.36e-09
Time:                        05:21:02   Log-Likelihood:                -125.09
No. Observations:                  65   AIC:                             280.2
Df Residuals:                      50   BIC:                             312.8
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.2248      0.476      0.472      0.6

In [1]:
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X = pd.get_dummies(result_df.iloc[:, -10:], columns=['model type', 'type of attention operation', 'torch_dtype', 'gpu model'])
y = pd.to_numeric(result_df['linear X'], errors='coerce')

if y.isnull().any():
    valid_indices = ~y.isnull()
    y = y[valid_indices]
    X = X.loc[valid_indices]

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_imputed = np.asarray(X_imputed, dtype=float)
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)
# Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_predictions)
print("Random Forest MSE:", rf_mse)


xgb_model = XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)
xgb_predictions = xgb_model.predict(X_test)
xgb_mse = mean_squared_error(y_test, xgb_predictions)
print("XGBoost MSE:", xgb_mse)



NameError: ignored