# README

This notebook contains analysis for different local LLMs each beoing prompted with the task generating an emails with different output length requirements.

## Links

repo: https://github.com/envite-consulting/ollama-llm-energy-measurement/tree/main

OpenAPI GMT: https://api.green-coding.io/docs#/

<hr>

## TODO

- [ ] import runs from GMT

- [ ] add interactive element to select run_ids

# Notebook

## Imports

In [1]:
import requests
import pandas as pd
from IPython.display import HTML

## Input Parameters

In [2]:
API_KEY = ''

In [3]:
REPOSITORY = 'https://github.com/envite-consulting/ollama-llm-energy-measurement'

## Getting Run_IDs

In [4]:
def get_run_ids():
    url = "https://api.green-coding.io/v2/runs"  # Replace with the actual endpoint from the OpenAPI spec
    headers = {
        "x-authentication": API_KEY
    }
    params = {
        "uri": REPOSITORY
    }

    response = requests.get(url, headers=headers, params=params)
    # Print the status of the request and the number of runs
    print(f"Request Status: {response.status_code} {response.reason}")
    response_json = response.json()
    print(f"Number of Runs: {len(response_json.get('data', []))}")
    return response

response = get_run_ids()

Request Status: 200 OK
Number of Runs: 20


### Saving in Dataframe

In [5]:
def display_gmt_data_as_dataframe(response):
    # Extract the 'data' property from the response
    data = response.json().get('data', [])

    # Create a DataFrame with the first 7 entries of each row
    df = pd.DataFrame(data, columns=['Run_ID', 'Name', 'Repo', 'Branch', 'Time', 'unknown', 'Usage_Scenario'] + [f'Extra_{i}' for i in range(len(data[0]) - 7)])
    df = df[['Run_ID', 'Name', 'Repo', 'Branch', 'Time', 'Usage_Scenario']]  # Keep only the required columns, excluding 'unknown'

    return df

gmt_dataframe = display_gmt_data_as_dataframe(response)

# Uncomment to see the dataframe
gmt_dataframe

Unnamed: 0,Run_ID,Name,Repo,Branch,Time,Usage_Scenario
0,72fe8e85-e503-4756-a840-41889e561288,EMail Ollama Measurement Deepseek 1.5B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T16:25:34.204084+02:00,usage_scenario_email_deepseekr1_1_5b.yml
1,e14e9506-75ce-4a10-bc5a-e0a87748d702,EMail Ollama Measurement Deepseek 1.5B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T16:16:35.031520+02:00,usage_scenario_email_deepseekr1_1_5b.yml
2,bd9415bf-9664-4ad5-b739-b593e7d3ff8d,EMail Ollama Measurement Deepseek 1.5B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T16:08:34.851600+02:00,usage_scenario_email_deepseekr1_1_5b.yml
3,099086d9-793e-437d-86a0-e113679ec78e,EMail Ollama Measurement Llama 3.2 3B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T15:59:36.678543+02:00,usage_scenario_email_llama32_3b.yml
4,d10d0b34-18b1-47c0-b2e8-1ffd52aea9c8,EMail Ollama Measurement Llama 3.2 3B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T15:50:47.698796+02:00,usage_scenario_email_llama32_3b.yml
5,a0253185-b2bb-4b4e-8fee-cb0894536646,EMail Ollama Measurement Llama 3.2 3B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T15:42:00.154661+02:00,usage_scenario_email_llama32_3b.yml
6,f751405c-a81a-4642-b8a4-321c135d8331,EMail Ollama Measurement Llama 3.2 1B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T15:34:38.301686+02:00,usage_scenario_email_llama32_1b.yml
7,bc6e44f4-9e65-49dc-8ce7-50b22c95b796,EMail Ollama Measurement Llama 3.2 1B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T15:27:16.846380+02:00,usage_scenario_email_llama32_1b.yml
8,f4bc5ea5-3a8e-4ee0-920b-d65c79e56e11,EMail Ollama Measurement Llama 3.2 1B,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T15:19:55.832297+02:00,usage_scenario_email_llama32_1b.yml
9,d5c758ac-89dd-497f-822a-f76b65984e39,EMail Ollama Measurement Gemma 3 4b,https://github.com/envite-consulting/ollama-ll...,emailmeasurement,2025-06-27T12:53:01.361964+02:00,usage_scenario_email_gemma3_4b.yml


## Select Runs

In [6]:
# this is hardcoded for now, a select element should be added later

# hard-coding DeepSeek runs for now
selected_ids = [
    '72fe8e85-e503-4756-a840-41889e561288',
    'e14e9506-75ce-4a10-bc5a-e0a87748d702',
    'bd9415bf-9664-4ad5-b739-b593e7d3ff8d'
]

selected_id= '72fe8e85-e503-4756-a840-41889e561288'

### Fetch Measurement Data for Runs

In [7]:
def fetch_measurement_data(run_id):
    url = f"https://api.green-coding.io/v1/measurements/single/{run_id}"
    headers = {
        "x-authentication": API_KEY
    }
    response = requests.get(url, headers=headers)
    return response.json()

# Fetch data for each selected Run_ID
    measurement_data = fetch_measurement_data(selected_id)[data]
   

### Fetch Measurement Data for Phases

In [8]:
def fetch_phase_stats(selected_id):
    url = f"https://api.green-coding.io/v1/phase_stats/single/{selected_id}"
    headers = {
        "x-authentication": API_KEY
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json().get('data', {}).get('data', {})
    else:
        print(f"Failed to fetch phase stats for Run ID {selected_id}: {response.status_code} {response.reason}")
        return null


# Example usage
phase_stats = fetch_phase_stats(selected_id)
phase_stats['[BASELINE]']

{'phase_time_syscall_system': {'type': 'TOTAL',
  'unit': 'us',
  'data': {'[SYSTEM]': {'name': '[SYSTEM]',
    'p_value': None,
    'is_significant': None,
    'data': {'72fe8e85-e503-4756-a840-41889e561288': {'mean': 60158757,
      'max': None,
      'min': None,
      'max_mean': None,
      'min_mean': None,
      'stddev': None,
      'sr_avg_avg': None,
      'sr_max_max': None,
      'sr_95p_max': None,
      'ci': None,
      'p_value': None,
      'is_significant': None,
      'values': [60158757]}}}}},
 'cpu_energy_rapl_msr_component': {'type': 'TOTAL',
  'unit': 'uJ',
  'data': {'Package_0': {'name': 'Package_0',
    'p_value': None,
    'is_significant': None,
    'data': {'72fe8e85-e503-4756-a840-41889e561288': {'mean': 495558540,
      'max': None,
      'min': None,
      'max_mean': None,
      'min_mean': None,
      'stddev': None,
      'sr_avg_avg': 99247,
      'sr_max_max': 100826,
      'sr_95p_max': 99446,
      'ci': None,
      'p_value': None,
      'is_sign

> create a dataframe for each phase in phase_stats giving an overview of the contained measurements. include the name, the type, the unit  and the value of the mesaurement

In [15]:
# Adjusting the code to handle the structure of phase_stats correctly

phase_dataframes = {}

for phase, measurements in phase_stats.items():
    rows = []
    for measurement_name, measurement_details in measurements.items():
        if 'data' in measurement_details:
            for component_name, component_details in measurement_details['data'].items():
                for run_id, run_data in component_details['data'].items():
                    rows.append({
                        'Name': measurement_name,
                        'Component': component_name,
                        'Type': measurement_details['type'],
                        'Unit': measurement_details['unit'],
                        'Value': run_data['mean']
                    })
    phase_dataframes[phase] = pd.DataFrame(rows)

phase_dataframes['[BASELINE]']

Unnamed: 0,Name,Component,Type,Unit,Value
0,phase_time_syscall_system,[SYSTEM],TOTAL,us,60158757
1,cpu_energy_rapl_msr_component,Package_0,TOTAL,uJ,495558540
2,cpu_power_rapl_msr_component,Package_0,MEAN,mW,8238
3,cpu_utilization_cgroup_system,GMT Overhead,MEAN,Ratio,55
4,cpu_utilization_procfs_system,[SYSTEM],MEAN,Ratio,56
5,gpu_energy_nvidia_nvml_component,NVIDIA GeForce GTX 1080-0,TOTAL,uJ,1081319915
6,gpu_power_nvidia_nvml_component,NVIDIA GeForce GTX 1080-0,MEAN,mW,17974
7,lmsensors_temperature_component,coretemp-isa-0000_Package-id-0,MEAN,centi°C,4011
8,lmsensors_temperature_component,coretemp-isa-0000_Core-0,MEAN,centi°C,4007
9,memory_energy_rapl_msr_component,DRAM_0,TOTAL,uJ,40810513


The dataframes for each phase in `phase_stats` have been created, providing an overview of the measurements, including the name, type, unit, and value. Let me know if you need further processing or analysis.

In [18]:
relevant_measurements = [
    'phase_time_syscall_system',
    'cpu_energy_rapl_msr_component',
    'cpu_utilization_procfs_system',
    'gpu_energy_nvidia_nvml_component',
    'memory_energy_rapl_msr_component',
]

> for each phase, create a new dataframe only containing the rows that have the names given in relevant_measurements

In [21]:
filtered_phase_dataframes = {}

for phase, df in phase_dataframes.items():
    filtered_phase_dataframes[phase] = df[df['Name'].isin(relevant_measurements)]

filtered_phase_dataframes['[BASELINE]']

Unnamed: 0,Name,Component,Type,Unit,Value
0,phase_time_syscall_system,[SYSTEM],TOTAL,us,60158757
1,cpu_energy_rapl_msr_component,Package_0,TOTAL,uJ,495558540
4,cpu_utilization_procfs_system,[SYSTEM],MEAN,Ratio,56
5,gpu_energy_nvidia_nvml_component,NVIDIA GeForce GTX 1080-0,TOTAL,uJ,1081319915
9,memory_energy_rapl_msr_component,DRAM_0,TOTAL,uJ,40810513


The filtered dataframes for each phase now only contain rows with names matching the relevant measurements. Let me know if you need further analysis or processing.

> Generate a single dataframe combining the info from all dataframes in filtered_phase_dataframes. each column should be a phase (key name in filtered_phase_dataframes), the rows are the same as the rows in the dataframes in filtered_phase_dataframes. for the value of each Cell, concatenate value and unit 

In [27]:
# Create a combined dataframe
combined_df = pd.DataFrame()

# Iterate through each phase and its dataframe
for phase, df in filtered_phase_dataframes.items():
    # Create a new column with concatenated value and unit
    df[phase] = df['Value'].astype(str) + ' ' + df['Unit']
    # Set the index to 'Name' for alignment
    df.set_index('Name', inplace=True)
    # Add the phase column to the combined dataframe
    if combined_df.empty:
        combined_df = df[[phase]]
    else:
        combined_df = combined_df.join(df[[phase]], how='outer')

# Reset the index for the final dataframe
combined_df.reset_index(inplace=True)
combined_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[phase] = df['Value'].astype(str) + ' ' + df['Unit']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[phase] = df['Value'].astype(str) + ' ' + df['Unit']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[phase] = df['Value'].astype(str) + ' ' + df['Unit']
A value is trying to be set on a copy of

Unnamed: 0,Name,[BASELINE],[INSTALLATION],[BOOT],[IDLE],Download deepseek-r1 1.5b,Load gemma3-4b into memory,Run test run,Run second test run,Run email generation prompt with length condition 16 words,Run email generation prompt with length 32 words,Run email generation prompt with length condition 64 words,Run email generation prompt with length condition 128 words,Run email generation prompt with length condition 256 words,Run email generation prompt with length condition 512 words,Run email generation prompt with length condition 1024 words,Run email generation prompt with length condition 2056 words,[REMOVE],[RUNTIME]
0,cpu_energy_rapl_msr_component,495558540 uJ,11169489 uJ,10458127 uJ,493079296 uJ,2702118672 uJ,47187120 uJ,713506395 uJ,187200586 uJ,63460128 uJ,81716232 uJ,921579956 uJ,113845987 uJ,180158228 uJ,173179534 uJ,175720187 uJ,197360194 uJ,4986200 uJ,5557033219 uJ
1,cpu_utilization_procfs_system,56 Ratio,168 Ratio,1078 Ratio,68 Ratio,441 Ratio,1262 Ratio,1777 Ratio,1772 Ratio,1757 Ratio,1761 Ratio,1770 Ratio,1802 Ratio,1783 Ratio,1794 Ratio,1788 Ratio,1785 Ratio,82 Ratio,1119 Ratio
2,gpu_energy_nvidia_nvml_component,1081319915 uJ,23708959 uJ,18238181 uJ,1084687414 uJ,1894811880 uJ,169496182 uJ,4917219170 uJ,1260283866 uJ,432649250 uJ,542742565 uJ,6406672429 uJ,776188374 uJ,1197507918 uJ,1161763793 uJ,1186500545 uJ,1339810186 uJ,50449005 uJ,21285646158 uJ
3,memory_energy_rapl_msr_component,40810513 uJ,893913 uJ,763850 uJ,40784300 uJ,74176694 uJ,2674607 uJ,22154273 uJ,5910669 uJ,2090376 uJ,2576466 uJ,28531201 uJ,3544720 uJ,5608237 uJ,5333589 uJ,5396577 uJ,6075647 uJ,740472 uJ,164073056 uJ
4,phase_time_syscall_system,60158757 us,1294496 us,1020475 us,60104053 us,105051748 us,2887885 us,27434513 us,7272998 us,2625339 us,3091447 us,35231346 us,4398668 us,6785437 us,6552374 us,6636758 us,7457584 us,1100198 us,215426097 us


The combined dataframe has been successfully created, with each column representing a phase and the values concatenated with their respective units. Let me know if you need further adjustments or analysis.

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=7e5ef0c2-3ed5-4773-825f-1c7babd264b0' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>