In [9]:
import pandas as pd
import numpy as np

In [10]:
# Define paths to the individual CSV files
csv_files_output = {
    '70B_8GPUs': 'emission_data/vLLM_meta-llama/CodeLlama-70b-Instruct-hf_8GPUs_emission_data.csv',
    '34B_8GPUs': 'emission_data/vLLM_meta-llama/CodeLlama-34b-Instruct-hf_8GPUs_emission_data.csv',
    '13B_8GPUs': 'emission_data/vLLM_meta-llama/CodeLlama-13b-Instruct-hf_8GPUs_emission_data.csv',
    '7B_8GPUs': 'emission_data/vLLM_meta-llama/CodeLlama-7b-Instruct-hf_8GPUs_emission_data.csv',
    '34B_4GPUs': 'emission_data/vLLM_meta-llama/CodeLlama-34b-Instruct-hf_4GPUs_emission_data.csv',
    '13B_4GPUs': 'emission_data/vLLM_meta-llama/CodeLlama-13b-Instruct-hf_4GPUs_emission_data.csv',
    '7B_4GPUs': 'emission_data/vLLM_meta-llama/CodeLlama-7b-Instruct-hf_4GPUs_emission_data.csv',
    '7B_1GPUs': 'emission_data/vLLM_meta-llama/CodeLlama-7b-Instruct-hf_1GPUs_emission_data.csv',
}

# Read the emissions data
emissions_data = pd.read_csv('emissions_params.csv')

In [11]:
# Initialize lists to store metadata
total_time = []
time_per_prompt = []
tok_per_sec = []
setup = []
parameters = []
num_gpus = []
num_prompts = []
total_emissions = []
cpu_energy = []
gpu_energy = []
ram_energy = []
total_energy = []
total_output_tokens = []
total_input_tokens = []
avg_input_tokens = []
avg_output_tokens = []

In [12]:
# Read and extract metadata from each CSV file
for model, file in csv_files_output.items():
    data = pd.read_csv(file)
    time = data.loc[data['Metric'] == 'Total Time', 'Value'].values[0]
    time_p_prompt = data.loc[data['Metric'] == 'AVG. Time / Prompt', 'Value'].values[0] / 1000 #Time is in ms
    tok_p_sec = data.loc[data['Metric'] == 'AVG. Tokens / Second', 'Value'].values[0]
    prompts = data.loc[data['Metric'] == 'Total Prompts', 'Value'].values[0]
    output_tokens = data.loc[data['Metric'] == 'Total Output Tokens', 'Value'].values[0]
    input_tokens = data.loc[data['Metric'] == 'Total Input Tokens', 'Value'].values[0]
    avg_i_tok = data.loc[data['Metric'] == 'AVG. Input Tokens / Prompt', 'Value'].values[0]
    avg_o_tok = data.loc[data['Metric'] == 'AVG. Output Tokens / Prompt', 'Value'].values[0]

    # Extract parameters and number of GPUs from the key
    param_str, gpus_str = model.split('_')
    param_value = int(param_str[:-1])  # Remove the 'B' and convert to int
    gpus_value = int(gpus_str.split('GPUs')[0])
    
    parameters.append(param_value)
    num_gpus.append(gpus_value)

    total_time.append(float(time))
    time_per_prompt.append(float(time_p_prompt))
    tok_per_sec.append(float(tok_p_sec))

    num_prompts.append(int(prompts))
    setup.append(model)
    total_output_tokens.append(float(output_tokens))
    total_input_tokens.append(float(input_tokens))
    avg_input_tokens.append(float(avg_i_tok))
    avg_output_tokens.append(float(avg_o_tok))    

In [13]:
# Extract emissions data
for idx, model in enumerate(csv_files_output.keys()):

    model_emissions = emissions_data
    total_emissions.append(model_emissions['emissions'].values[idx])
    cpu_energy.append(model_emissions['cpu_energy'].values[idx])
    gpu_energy.append(model_emissions['gpu_energy'].values[idx])
    ram_energy.append(model_emissions['ram_energy'].values[idx])
    total_energy.append(model_emissions['energy_consumed'].values[idx])


In [14]:
print(total_emissions)
print(cpu_energy)
print(gpu_energy)
print(ram_energy)
print(total_energy)

[0.8697600057623235, 0.4509315464112079, 0.2388394208735168, 0.1640960954110507, 0.1898612653772712, 0.1109668080551995, 0.0756439920633987, 0.0756439920633987]
[0.077260215018938, 0.0422043971629308, 0.0223365311098363, 0.0154791704585982, 0.0330105411355627, 0.0192068019948866, 0.013490448976222, 0.013490448976222]
[0.7352734845905394, 0.3652718549561389, 0.1936305076487266, 0.1320215299282504, 0.199628528919362, 0.1169004674258546, 0.0786555925022014, 0.0786555925022014]
[0.4954778315939251, 0.2706690507018804, 0.1432179255290137, 0.0992795419801035, 0.0528888029181205, 0.0307730924669254, 0.0216131716719476, 0.0216131716719476]
[1.3080115312034024, 0.6781453028209502, 0.3591849642875767, 0.2467802423669523, 0.2855278729730455, 0.1668803618876667, 0.1137592131503711, 0.1137592131503711]


In [15]:
# Prepare data for regression and visualization
total_time = np.array(total_time)
time_per_prompt = np.array(time_per_prompt)
tok_per_sec = np.array(tok_per_sec)
parameters = np.array(parameters)
num_gpus = np.array(num_gpus)
num_prompts = np.array(num_prompts)
total_output_tokens = np.array(total_output_tokens)
total_input_tokens = np.array(total_input_tokens)
avg_input_tokens = np.array(avg_input_tokens)
avg_output_tokens = np.array(avg_output_tokens)
total_emissions = np.array(total_emissions)
cpu_energy = np.array(cpu_energy)
gpu_energy = np.array(gpu_energy)
ram_energy = np.array(ram_energy)
total_energy = np.array(total_energy)
setup = np.array(setup)

In [16]:
print(total_time)

[5364.2286284  2930.25584149 1550.81419516 1074.69416142 2291.93837547
 1333.53209519  936.62999272  936.62999272]


In [17]:
idle_gpu_power = 28*num_gpus # 28W per GPU

total_idle_gpu_energy = (idle_gpu_power/1000)*(total_time/3600) # Convert W into kw and s into h
idle_gpu_energy_per_million_prompts = total_idle_gpu_energy / num_prompts * 1_000_000

print(total_idle_gpu_energy)
print(idle_gpu_energy_per_million_prompts)

gpu_energy_without_idle = gpu_energy - total_idle_gpu_energy
gpu_energy_without_idle_per_million_prompts = gpu_energy_without_idle / num_prompts * 1_000_000

print(gpu_energy)
print(gpu_energy_without_idle_per_million_prompts)

[0.33377423 0.18232703 0.09649511 0.06686986 0.07130475 0.04148767
 0.0291396  0.0072849 ]
[44.5032301  24.31027068 12.86601406  8.91598119  9.50729993  5.53168869
  3.88527997  0.97131999]
[0.73527348 0.36527185 0.19363051 0.13202153 0.19962853 0.11690047
 0.07865559 0.07865559]
[53.53323451 24.39264331 12.95138696  8.68688947 17.10983726 10.0550403
  6.60213236  9.51609234]


In [18]:
# Calculate emissions per 1,000,000 prompts
emissions_per_million_prompts = {
    'Total Emissions': total_emissions / num_prompts * 1_000_000,
    'CPU Energy': cpu_energy / num_prompts * 1_000_000,
    'GPU Energy': gpu_energy / num_prompts * 1_000_000,
    'GPU Energy (without idle)': gpu_energy_without_idle_per_million_prompts,
    'GPU Energy (idle)': idle_gpu_energy_per_million_prompts,
    'RAM Energy': ram_energy / num_prompts * 1_000_000,
    'Total Energy': total_energy / num_prompts * 1_000_000
}

In [19]:
print(f"Idle GPU Energy per 1.000.000 prompts: {emissions_per_million_prompts['GPU Energy']}")
print(f"Idle GPU Energy per 1.000.000 prompts: {idle_gpu_energy_per_million_prompts}")
print(f"GPU Energy without idle per 1.000.000 prompts: {gpu_energy_without_idle_per_million_prompts}")

Idle GPU Energy per 1.000.000 prompts: [98.03646461 48.70291399 25.81740102 17.60287066 26.61713719 15.58672899
 10.48741233 10.48741233]
Idle GPU Energy per 1.000.000 prompts: [44.5032301  24.31027068 12.86601406  8.91598119  9.50729993  5.53168869
  3.88527997  0.97131999]
GPU Energy without idle per 1.000.000 prompts: [53.53323451 24.39264331 12.95138696  8.68688947 17.10983726 10.0550403
  6.60213236  9.51609234]


In [20]:
print(emissions_per_million_prompts)

{'Total Emissions': array([115.96800077,  60.12420619,  31.84525612,  21.87947939,
        25.31483538,  14.79557441,  10.08586561,  10.08586561]), 'CPU Energy': array([10.301362  ,  5.62725296,  2.97820415,  2.06388939,  4.40140548,
        2.56090693,  1.79872653,  1.79872653]), 'GPU Energy': array([98.03646461, 48.70291399, 25.81740102, 17.60287066, 26.61713719,
       15.58672899, 10.48741233, 10.48741233]), 'GPU Energy (without idle)': array([53.53323451, 24.39264331, 12.95138696,  8.68688947, 17.10983726,
       10.0550403 ,  6.60213236,  9.51609234]), 'GPU Energy (idle)': array([44.5032301 , 24.31027068, 12.86601406,  8.91598119,  9.50729993,
        5.53168869,  3.88527997,  0.97131999]), 'RAM Energy': array([66.06371088, 36.08920676, 19.0957234 , 13.23727226,  7.05184039,
        4.103079  ,  2.88175622,  2.88175622]), 'Total Energy': array([174.40153749,  90.41937371,  47.89132857,  32.90403232,
        38.07038306,  22.25071492,  15.16789509,  15.16789509])}


In [21]:
# Create the dataframe
df = pd.DataFrame({
    'model_setup': setup,
    'parameters': parameters,
    'num_gpus': num_gpus,
    'num_prompts': num_prompts,
    'total_time': total_time,
    'time_per_prompt': time_per_prompt,
    'tok_per_sec': tok_per_sec,
    'total_out_tok': total_output_tokens,
    'total_in_tok': total_input_tokens,
    'avg_out_tok': avg_output_tokens,
    'avg_in_tok': avg_input_tokens,
    'emissions_per_1M_prompts': emissions_per_million_prompts['Total Emissions'],
    'total_energy_per_1M_prompts': emissions_per_million_prompts['Total Energy'],
    'cpu_energy_per_1M_prompts': emissions_per_million_prompts['CPU Energy'],
    'gpu_energy_per_1M_prompts': emissions_per_million_prompts['GPU Energy'],
    'ram_energy_per_1M_prompts': emissions_per_million_prompts['RAM Energy'],
    'idle_gpu_energy_per_1M_prompts': emissions_per_million_prompts['GPU Energy (idle)'],
    'non_idle_gpu_energy_per_1M_prompts': emissions_per_million_prompts['GPU Energy (without idle)'],
})

df

Unnamed: 0,model_setup,parameters,num_gpus,num_prompts,total_time,time_per_prompt,tok_per_sec,total_out_tok,total_in_tok,avg_out_tok,avg_in_tok,emissions_per_1M_prompts,total_energy_per_1M_prompts,cpu_energy_per_1M_prompts,gpu_energy_per_1M_prompts,ram_energy_per_1M_prompts,idle_gpu_energy_per_1M_prompts,non_idle_gpu_energy_per_1M_prompts
0,70B_8GPUs,70,8,7500,5364.228628,0.71523,215.326206,1155059.0,1905000.0,154.007867,254.0,115.968001,174.401537,10.301362,98.036465,66.063711,44.50323,53.533235
1,34B_8GPUs,34,8,7500,2930.255841,0.390701,334.550651,980319.0,1920000.0,130.7092,256.0,60.124206,90.419374,5.627253,48.702914,36.089207,24.310271,24.392643
2,13B_8GPUs,13,8,7500,1550.814195,0.206775,554.112158,859325.0,1920000.0,114.576667,256.0,31.845256,47.891329,2.978204,25.817401,19.095723,12.866014,12.951387
3,7B_8GPUs,7,8,7500,1074.694161,0.143293,921.779456,990631.0,1920000.0,132.084133,256.0,21.879479,32.904032,2.063889,17.602871,13.237272,8.915981,8.686889
4,34B_4GPUs,34,4,7500,2291.938375,0.305592,427.230073,979185.0,1920000.0,130.558,256.0,25.314835,38.070383,4.401405,26.617137,7.05184,9.5073,17.109837
5,13B_4GPUs,13,4,7500,1333.532095,0.177804,648.803282,865200.0,1920000.0,115.36,256.0,14.795574,22.250715,2.560907,15.586729,4.103079,5.531689,10.05504
6,7B_4GPUs,7,4,7500,936.629993,0.124884,1057.411152,990403.0,1920000.0,132.053733,256.0,10.085866,15.167895,1.798727,10.487412,2.881756,3.88528,6.602132
7,7B_1GPUs,7,1,7500,936.629993,0.124884,1057.411152,990403.0,1920000.0,132.053733,256.0,10.085866,15.167895,1.798727,10.487412,2.881756,0.97132,9.516092


In [22]:
# Store the results in a CSV file
df.to_csv('results/data/params_test.csv', index=False)