In [56]:
import pandas as pd
import numpy as np
import altair as alt
from sklearn.linear_model import LinearRegression

In [57]:
# Define paths to the individual CSV files
csv_files_output = {
    '1': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_1_examples.csv',
    '2': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_2_examples.csv',
    '3': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_3_examples.csv',
    '5': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_5_examples.csv',
    '10': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_10_examples.csv',
    '15': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_15_examples.csv',
    '20': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_20_examples.csv',
    '30': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_30_examples.csv',
    '40': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_40_examples.csv',
    '60': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_60_examples.csv',
    '90': 'emission_data/vllm_output_tok_meta-llama/Meta-Llama-3-8B-Instruct_emission_data_90_examples.csv'
}

# Read the emissions data
emissions_data = pd.read_csv('emissions_output_tok_vllm.csv')

In [58]:
# Initialize lists to store metadata
total_time = []
time_per_prompt = []
tok_per_sec = []
parameters_output = []
num_examples_output = []
num_prompts_output = []
total_emissions_output = []
cpu_energy_output = []
gpu_energy_output = []
ram_energy_output = []
total_energy_output = []
total_output_tokens_output = []
total_input_tokens_output = []
avg_input_tokens_output = []
avg_output_tokens_output = []

In [59]:
# Read and extract metadata from each CSV file
for model, file in csv_files_output.items():
    data = pd.read_csv(file)
    time = data.loc[data['Metric'] == 'Total Time', 'Value'].values[0]
    time_p_prompt = data.loc[data['Metric'] == 'AVG. Time / Prompt', 'Value'].values[0] / 1000 #Time is in ms
    tok_p_sec = data.loc[data['Metric'] == 'AVG. Tokens / Second', 'Value'].values[0]
    prompts = data.loc[data['Metric'] == 'Total Prompts', 'Value'].values[0]
    output_tokens = data.loc[data['Metric'] == 'Total Output Tokens', 'Value'].values[0]
    input_tokens = data.loc[data['Metric'] == 'Total Input Tokens', 'Value'].values[0]
    avg_i_tok = data.loc[data['Metric'] == 'AVG. Input Tokens / Prompt', 'Value'].values[0]
    avg_o_tok = data.loc[data['Metric'] == 'AVG. Output Tokens / Prompt', 'Value'].values[0]
    total_time.append(float(time))
    time_per_prompt.append(float(time_p_prompt))
    tok_per_sec.append(float(tok_p_sec))
    parameters_output.append(8)
    num_examples_output.append(int(model))
    num_prompts_output.append(int(prompts))
    total_output_tokens_output.append(float(output_tokens))
    total_input_tokens_output.append(float(input_tokens))
    avg_input_tokens_output.append(float(avg_i_tok))
    avg_output_tokens_output.append(float(avg_o_tok))    

In [60]:
# Extract emissions data
for model in csv_files_output.keys():
    model_emissions = emissions_data[emissions_data['project_name'].str.contains("vLLM_Inference_1000_prompts_output_tok_" + model + "_")]
    total_emissions_output.append(model_emissions['emissions'].values[0])
    cpu_energy_output.append(model_emissions['cpu_energy'].values[0])
    gpu_energy_output.append(model_emissions['gpu_energy'].values[0])
    ram_energy_output.append(model_emissions['ram_energy'].values[0])
    total_energy_output.append(model_emissions['energy_consumed'].values[0])


In [61]:
print(avg_input_tokens_output)
print(avg_output_tokens_output)
print(total_output_tokens_output)
print(total_emissions_output)

[186.66, 186.66, 186.66, 186.66, 186.66, 186.66, 186.66, 186.66, 186.66, 186.66, 186.66]
[18.173, 21.523, 40.863, 69.858, 182.237, 288.208, 398.264, 594.771, 781.428, 1202.476, 1796.304]
[18173.0, 21523.0, 40863.0, 69858.0, 182237.0, 288208.0, 398264.0, 594771.0, 781428.0, 1202476.0, 1796304.0]
[0.0034967513216419, 0.0038288557217628, 0.0047209939368813, 0.0058326642843592, 0.0107183823003618, 0.0155925593968694, 0.0204828999682617, 0.0303342998617103, 0.0402502108596118, 0.0663906878731937, 0.1103408511731546]


In [62]:
# Prepare data for regression and visualization
total_time = np.array(total_time)
time_per_prompt = np.array(time_per_prompt)
tok_per_sec = np.array(tok_per_sec)
parameters_output = np.array(parameters_output)
num_examples_output = np.array(num_examples_output)
num_prompts_output = np.array(num_prompts_output)
total_output_tokens_output = np.array(total_output_tokens_output)
total_input_tokens_output = np.array(total_input_tokens_output)
avg_input_tokens_output = np.array(avg_input_tokens_output)
avg_output_tokens_output = np.array(avg_output_tokens_output)
total_emissions_output = np.array(total_emissions_output)
cpu_energy_output = np.array(cpu_energy_output)
gpu_energy_output = np.array(gpu_energy_output)
ram_energy_output = np.array(ram_energy_output)
total_energy_output = np.array(total_energy_output)

In [63]:
print(total_time)

[  49.20257998   51.66708994   63.47096586   80.3412075   144.78470874
  211.3589766   276.60654068  408.54310036  541.17958927  882.30840707
 1447.26580811]


In [64]:
# Calculate emissions per 10,000 prompts
emissions_per_thousand_prompts = {
    'Total Emissions Output Tok': total_emissions_output / num_prompts_output * 10_000,
    'CPU Energy Output Tok': cpu_energy_output / num_prompts_output * 10_000,
    'GPU Energy Output Tok': gpu_energy_output / num_prompts_output * 10_000,
    'RAM Energy Output Tok': ram_energy_output / num_prompts_output * 10_000,
    'Total Energy Output Tok': total_energy_output / num_prompts_output * 10_000
}

In [65]:
print(emissions_per_thousand_prompts)

{'Total Emissions Output Tok': array([0.03496751, 0.03828856, 0.04720994, 0.05832664, 0.10718382,
       0.15592559, 0.204829  , 0.303343  , 0.40250211, 0.66390688,
       1.10340851]), 'CPU Energy Output Tok': array([0.00708905, 0.00744669, 0.00914967, 0.01157489, 0.02085703,
       0.03044518, 0.03984242, 0.05884579, 0.07794874, 0.12708085,
       0.20845374]), 'GPU Energy Output Tok': array([0.03413982, 0.03822099, 0.04719003, 0.05759958, 0.10691888,
       0.15527704, 0.20436698, 0.30308119, 0.40250862, 0.66785381,
       1.11722544]), 'RAM Energy Output Tok': array([0.01135794, 0.01191357, 0.01465821, 0.01854159, 0.03341531,
       0.04877059, 0.06382809, 0.09426339, 0.12485606, 0.20349921,
       0.33371082]), 'Total Energy Output Tok': array([0.05258682, 0.05758126, 0.07099791, 0.08771606, 0.16119122,
       0.23449282, 0.3080375 , 0.45619037, 0.60531342, 0.99843388,
       1.65939   ])}


In [66]:
print(parameters_output)

[8 8 8 8 8 8 8 8 8 8 8]


In [67]:
# Perform regression analysis
def perform_regression(x, y):
    x = x.reshape(-1, 1)
    model = LinearRegression()
    model.fit(x, y)
    predicted = model.predict(x)
    return model, predicted

In [68]:
models = {}
predictions = {}
for name, y in emissions_per_thousand_prompts.items():
    model, predicted = perform_regression(avg_output_tokens_output, y)
    models[name] = model
    predictions[name] = predicted
    print(f"{name} - Intercept: {model.intercept_}, Coefficient: {model.coef_[0]}")

Total Emissions Output Tok - Intercept: -0.0007795195459148752, Coefficient: 0.0005799787512447661
CPU Energy Output Tok - Intercept: 0.0005904601720052721, Coefficient: 0.00010979374634339546
GPU Energy Output Tok - Intercept: -0.0027304291004494052, Coefficient: 0.0005866435892202829
RAM Energy Output Tok - Intercept: 0.0009676678935606475, Coefficient: 0.00017577901084810286
Total Energy Output Tok - Intercept: -0.0011723010348828056, Coefficient: 0.0008722163464117812


In [69]:
idle_gpu_power = 28*4 # 28W per GPU, 4 GPUs

total_idle_gpu_energy = (idle_gpu_power/1000)*(total_time/3600) # Convert W into kw and s into h
idle_gpu_energy_per_thousand_prompts = total_idle_gpu_energy / num_prompts_output * 10_000

print(f"Idle GPU Energy per 10,000 prompts: {idle_gpu_energy_per_thousand_prompts}")

Idle GPU Energy per 10,000 prompts: [0.01530747 0.01607421 0.01974652 0.02499504 0.04504413 0.06575613
 0.08605537 0.1271023  0.16836698 0.27449595 0.45026047]


In [70]:
# Define the test types and model types
test_types = ['Output-tok-vllm']
model_types = ['llama3']

# Define the parameters
parameters = np.concatenate([parameters_output])
num_examples = np.concatenate([num_examples_output])
num_prompts = np.concatenate([num_prompts_output])
total_out_tok = np.concatenate([total_output_tokens_output])
total_in_tok = np.concatenate([total_input_tokens_output])
avg_out_tok = np.concatenate([avg_output_tokens_output])
avg_in_tok = np.concatenate([avg_input_tokens_output])

pred_emissions_per_10k_prompts = predictions['Total Emissions Output Tok']

pred_cpu_energy_per_10k_prompts = predictions['CPU Energy Output Tok']

pred_gpu_energy_per_10k_prompts = predictions['GPU Energy Output Tok']

pred_ram_energy_per_10k_prompts = predictions['RAM Energy Output Tok']

pred_total_energy_per_10k_prompts = predictions['Total Energy Output Tok']

actual_emissions_per_10k_prompts = emissions_per_thousand_prompts['Total Emissions Output Tok']

actual_cpu_energy_per_10k_prompts = emissions_per_thousand_prompts['CPU Energy Output Tok']

actual_gpu_energy_per_10k_prompts = emissions_per_thousand_prompts['GPU Energy Output Tok']

actual_ram_energy_per_10k_prompts = emissions_per_thousand_prompts['RAM Energy Output Tok']

actual_total_energy_per_10k_prompts = emissions_per_thousand_prompts['Total Energy Output Tok']

idle_gpu_energy_per_10k_prompts = idle_gpu_energy_per_thousand_prompts

In [71]:
# Repeat test types and model types for each data point
test_type_column = np.concatenate([
    np.repeat(test_types[0], len(parameters_output))
])

model_type_column = np.concatenate([
    np.repeat(model_types[0], len(parameters_output))
])

In [72]:
# Create the dataframe
df = pd.DataFrame({
    'test_type': test_type_column,
    'model_type': model_type_column,
    'parameters': parameters,
    'num_examples': num_examples,
    'num_prompts': num_prompts,
    'total_time': total_time,
    'time_per_prompt': time_per_prompt,
    'tok_per_sec': tok_per_sec,
    'total_out_tok': total_out_tok,
    'total_in_tok': total_in_tok,
    'avg_out_tok': avg_out_tok,
    'avg_in_tok': avg_in_tok,
    'actual_emissions_per_10k_prompts': actual_emissions_per_10k_prompts,
    'actual_total_energy_per_10k_prompts': actual_total_energy_per_10k_prompts,
    'actual_cpu_energy_per_10k_prompts': actual_cpu_energy_per_10k_prompts,
    'actual_gpu_energy_per_10k_prompts': actual_gpu_energy_per_10k_prompts,
    'actual_ram_energy_per_10k_prompts': actual_ram_energy_per_10k_prompts,
    'pred_emissions_per_10k_prompts': pred_emissions_per_10k_prompts,
    'pred_total_energy_per_10k_prompts': pred_total_energy_per_10k_prompts,
    'pred_cpu_energy_per_10k_prompts': pred_cpu_energy_per_10k_prompts,
    'pred_gpu_energy_per_10k_prompts': pred_gpu_energy_per_10k_prompts,
    'pred_ram_energy_per_10k_prompts': pred_ram_energy_per_10k_prompts,
    'idle_gpu_energy_per_10k_prompts': idle_gpu_energy_per_10k_prompts
})

df

Unnamed: 0,test_type,model_type,parameters,num_examples,num_prompts,total_time,time_per_prompt,tok_per_sec,total_out_tok,total_in_tok,...,actual_total_energy_per_10k_prompts,actual_cpu_energy_per_10k_prompts,actual_gpu_energy_per_10k_prompts,actual_ram_energy_per_10k_prompts,pred_emissions_per_10k_prompts,pred_total_energy_per_10k_prompts,pred_cpu_energy_per_10k_prompts,pred_gpu_energy_per_10k_prompts,pred_ram_energy_per_10k_prompts,idle_gpu_energy_per_10k_prompts
0,Output-tok-vllm,llama3,8,1,1000,49.20258,0.049203,369.350551,18173.0,186660.0,...,0.052587,0.007089,0.03414,0.011358,0.00976,0.014678,0.002586,0.007931,0.004162,0.015307
1,Output-tok-vllm,llama3,8,2,1000,51.66709,0.051667,416.570781,21523.0,186660.0,...,0.057581,0.007447,0.038221,0.011914,0.011703,0.0176,0.002954,0.009896,0.004751,0.016074
2,Output-tok-vllm,llama3,8,3,1000,63.470966,0.063471,643.806179,40863.0,186660.0,...,0.070998,0.00915,0.04719,0.014658,0.02292,0.034469,0.005077,0.021242,0.008151,0.019747
3,Output-tok-vllm,llama3,8,5,1000,80.341208,0.080341,869.516431,69858.0,186660.0,...,0.087716,0.011575,0.0576,0.018542,0.039737,0.059759,0.00826,0.038251,0.013247,0.024995
4,Output-tok-vllm,llama3,8,10,1000,144.784709,0.144785,1258.675737,182237.0,186660.0,...,0.161191,0.020857,0.106919,0.033415,0.104914,0.157778,0.020599,0.104178,0.033001,0.045044
5,Output-tok-vllm,llama3,8,15,1000,211.358977,0.211359,1363.594793,288208.0,186660.0,...,0.234493,0.030445,0.155277,0.048771,0.166375,0.250207,0.032234,0.166345,0.051629,0.065756
6,Output-tok-vllm,llama3,8,20,1000,276.606541,0.276607,1439.82134,398264.0,186660.0,...,0.308037,0.039842,0.204367,0.063828,0.230205,0.3462,0.044317,0.230909,0.070974,0.086055
7,Output-tok-vllm,llama3,8,30,1000,408.5431,0.408543,1455.834157,594771.0,186660.0,...,0.45619,0.058846,0.303081,0.094263,0.344175,0.517597,0.065893,0.346188,0.105516,0.127102
8,Output-tok-vllm,llama3,8,40,1000,541.179589,0.54118,1443.934722,781428.0,186660.0,...,0.605313,0.077949,0.402509,0.124856,0.452432,0.680402,0.086386,0.455689,0.138326,0.168367
9,Output-tok-vllm,llama3,8,60,1000,882.308407,0.882308,1362.87492,1202476.0,186660.0,...,0.998434,0.127081,0.667854,0.203499,0.696631,1.047647,0.132615,0.702694,0.212338,0.274496


In [73]:
# Define chart width and height
chart_width = 600
chart_height = 350

x_title = 'Average Output Tokens per Prompt'
x_data = 'avg_out_tok'
test_type = 'Output-tok-vllm'

scatter = alt.Chart(df).mark_circle(size=100).encode(
    x=alt.X(x_data, title=x_title),
    y=alt.Y('actual_emissions_per_10k_prompts', title='Actual Emissions per 10,000 Prompts'),
    color = alt.Color('test_type:N', title='Test Type').sort(df['test_type'].unique()),
    tooltip=[
        alt.Tooltip('parameters', title='Parameters (billions)'),
        alt.Tooltip('actual_emissions_per_10k_prompts', title='Actual Emissions per 10,000 Prompts'),
        alt.Tooltip('pred_emissions_per_10k_prompts', title='Predicted Emissions per 10,000 Prompts'),
        alt.Tooltip('avg_out_tok', title='Average Output Tokens per Prompt'),
        alt.Tooltip('avg_in_tok', title='Average Input Tokens per Prompt'),
        alt.Tooltip('num_examples', title='Number of Examples'),
        alt.Tooltip('num_prompts', title='Number of Prompts'),
        alt.Tooltip('model_type', title='Model Type'),
        alt.Tooltip('test_type', title='Test Type'),
        alt.Tooltip('test_type', title='Test Type'),
    ]
).properties(
    title=f'Actual Emissions for {test_type} per 10,000 Prompts',
    width=chart_width,
    height=chart_height
)

# Create line plots for predicted emissions
line = alt.Chart(df).mark_line().encode(
    x=alt.X(x_data, title=x_title),
    y=alt.Y('pred_emissions_per_10k_prompts', title='Predicted Emissions per 10,000 Prompts'),
    color = alt.Color('test_type:N', title='Test Type').sort(df['test_type'].unique()),
    tooltip=[
        alt.Tooltip('parameters', title='Parameters (billions)'),
        alt.Tooltip('actual_emissions_per_10k_prompts', title='Actual Emissions per 10,000 Prompts'),
        alt.Tooltip('pred_emissions_per_10k_prompts', title='Predicted Emissions per 10,000 Prompts'),
        alt.Tooltip('avg_out_tok', title='Average Output Tokens per Prompt'),
        alt.Tooltip('avg_in_tok', title='Average Input Tokens per Prompt'),
        alt.Tooltip('num_examples', title='Number of Examples'),
        alt.Tooltip('num_prompts', title='Number of Prompts'),
        alt.Tooltip('model_type', title='Model Type'),
        alt.Tooltip('test_type', title='Test Type'),
    ]
).properties(
    width=chart_width,
    height=chart_height
)

# Create a combined chart with overlays for all emission types
combined_chart = alt.layer(scatter+line).resolve_scale(
    x='independent'
).properties(
    title='Emissions per Ten-Thousand Prompts by Test Type',
    width=1300,  # Adjusted width for combined chart
    height=700  # Adjusted height for combined chart
)

combined_chart.show()

In [253]:
# Define chart width and height
chart_width = 600
chart_height = 350

llama2_note = 'Note: Emissions normalized to number of output tokens for Llama2 because the Llama2 and Llama3 output differed drastically'

# Function to create charts for each test type
def create_chart(df, test_type, color, remove_x_title=False):
    chart_data = df[df['test_type'] == test_type]

    if test_type == 'Output-tok': 
        x_title = 'Average Output Tokens per Prompt'
        x_data = 'avg_out_tok'
    elif test_type == 'Input-tok':
        x_title = 'Average Input Tokens per Prompt'
        x_data = 'avg_in_tok'
    elif test_type == 'Llama2 Params':
        x_title = 'Parameters (billions)'
        x_data = 'parameters'
    elif test_type == 'Llama3 Params':
        x_title = 'Parameters (billions)'
        x_data = 'parameters'
    
    scatter = alt.Chart(chart_data).mark_circle(size=100).encode(
        x=alt.X(x_data, title=x_title),
        y=alt.Y('actual_emissions_per_10k_prompts', title='Actual Emissions per 10,000 Prompts'),
        color = alt.Color('test_type:N', title='Test Type').sort(df['test_type'].unique()),
        tooltip=[
            alt.Tooltip('parameters', title='Parameters (billions)'),
            alt.Tooltip('actual_emissions_per_10k_prompts', title='Actual Emissions per 10,000 Prompts'),
            alt.Tooltip('pred_emissions_per_10k_prompts', title='Predicted Emissions per 10,000 Prompts'),
            alt.Tooltip('avg_out_tok', title='Average Output Tokens per Prompt'),
            alt.Tooltip('avg_in_tok', title='Average Input Tokens per Prompt'),
            alt.Tooltip('num_examples', title='Number of Examples'),
            alt.Tooltip('num_prompts', title='Number of Prompts'),
            alt.Tooltip('model_type', title='Model Type'),
            alt.Tooltip('test_type', title='Test Type'),
            alt.Tooltip('test_type', title='Test Type'),
        ]
    ).properties(
        title=f'Actual Emissions for {test_type} per 10,000 Prompts',
        width=chart_width,
        height=chart_height
    )

    # Create line plots for predicted emissions
    line = alt.Chart(chart_data).mark_line().encode(
        x=alt.X(x_data, title=x_title),
        y=alt.Y('pred_emissions_per_10k_prompts', title='Predicted Emissions per 10,000 Prompts'),
        color = alt.Color('test_type:N', title='Test Type').sort(df['test_type'].unique()),
        tooltip=[
            alt.Tooltip('parameters', title='Parameters (billions)'),
            alt.Tooltip('actual_emissions_per_10k_prompts', title='Actual Emissions per 10,000 Prompts'),
            alt.Tooltip('pred_emissions_per_10k_prompts', title='Predicted Emissions per 10,000 Prompts'),
            alt.Tooltip('avg_out_tok', title='Average Output Tokens per Prompt'),
            alt.Tooltip('avg_in_tok', title='Average Input Tokens per Prompt'),
            alt.Tooltip('num_examples', title='Number of Examples'),
            alt.Tooltip('num_prompts', title='Number of Prompts'),
            alt.Tooltip('model_type', title='Model Type'),
            alt.Tooltip('test_type', title='Test Type'),
        ]
    ).properties(
        width=chart_width,
        height=chart_height
    )
    
    # Add note for Llama2
    if test_type == 'Llama2 Params':
        chart_data['Note'] = llama2_note
        #print(chart_data)
        scatter = scatter.encode(
            tooltip=[
                alt.Tooltip('parameters', title='Parameters (billions)'),
                alt.Tooltip('actual_emissions_per_10k_prompts', title='Actual Emissions per 10,000 Prompts'),
                alt.Tooltip('pred_emissions_per_10k_prompts', title='Predicted Emissions per 10,000 Prompts'),
                alt.Tooltip('avg_out_tok', title='Average Output Tokens per Prompt'),
                alt.Tooltip('avg_in_tok', title='Average Input Tokens per Prompt'),
                alt.Tooltip('num_examples', title='Number of Examples'),
                alt.Tooltip('num_prompts', title='Number of Prompts'),
                alt.Tooltip('model_type', title='Model Type'),
                alt.Tooltip('test_type', title='Test Type'),
                alt.Tooltip('Note', title='Normalization Note')
            ]
        )
    
    if remove_x_title:
        scatter = scatter.encode(
            x=alt.X(x_data, title=None, axis=None),
        )
        line = line.encode(
            x=alt.X(x_data, title=None, axis=None),
        )


    return scatter + line

In [258]:
# Create charts for each test type
charts = []
colors = ['blue', 'green', 'red', 'purple']
for test_type, color in zip(df['test_type'].unique(), colors):
    charts.append(create_chart(df, test_type, color))

# Arrange the charts in a grid
grid_chart = alt.vconcat(*[alt.hconcat(*charts[i:i+2]) for i in range(0, len(charts), 2)]).resolve_scale(
    y='independent'
)

grid_chart.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chart_data['Note'] = llama2_note


In [261]:
# Create charts for each test type
stacked_charts = []
stacked_df = df[df.num_examples != 90]
for test_type, color in zip(stacked_df['test_type'].unique(), colors):
    stacked_charts.append(create_chart(stacked_df, test_type, color, remove_x_title=True))


# Create a combined chart with overlays for all emission types
combined_chart = alt.layer(*stacked_charts).resolve_scale(
    x='independent'
).properties(
    title='Emissions per Ten-Thousand Prompts by Test Type',
    width=1300,  # Adjusted width for combined chart
    height=700  # Adjusted height for combined chart
)

combined_chart.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chart_data['Note'] = llama2_note


In [33]:
# Store the results in a CSV file
result_df.to_csv('results/emission_regression.csv', index=False)