<a href="https://colab.research.google.com/github/antalvdb/mblm/blob/main/MBLM_graphics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#MBLM Graphics

In [None]:
import pandas as pd

# initialize data of lists.
data = {'Model': ['IB1-IG',
                  'TRIBL2', 'TRIBL2', 'TRIBL2', 'TRIBL2', 'TRIBL2',
                  'IGTree', 'IGTree', 'IGTree', 'IGTree', 'IGTree',
                  'GPT-2', 'GPT-2', 'GPT-2', 'GPT-2',
                  'GPT-Neo', 'GPT-Neo'],
        'Training set size': [51327,
                              51327, 486655, 4890203, 100000000, 500000000,
                              51327, 486655, 4890203, 100000000, 500000000,
                              8892000000, 8892000000, 8892000000, 8892000000,
                              196735500000, 196735500000],
        'Token prediction accuracy': [0.0940576134962002,
                                      0.101171766203795, 0.132626427406199, 0.170538733935463, 0.218024111725619, 0.246489617542861,
                                      0.110462340349341, 0.137273703895277, 0.174762264751521, 0.22028408864839, 0.24814460448398,
                                      0.311731587952094, 0.336613217681932, 0.349602116738948, 0.357832331993793,
                                      0.362163291290335, 0.372329208610193],
        'Tokens per second': [40.84,
                              59.57, 42.5, 68.03, 118.21, 100.42,
                              244.43, 318.71, 783.5, 739.04, 504.42,
                              30.68, 10.72, 5.86, 3.43,
                              3.9, 1.99],
        'kWh usage': [0.381481,
                      0.261655, 0.358419, 0.22401, 0.131488, 0.190552,
                      0.063788, 0.048881, 0.019922, 0.021115, 0.038263,
                      0.503662, 1.438911, 2.6263, 4.702076,
                      3.941196, 7.780539]}

# Create DataFrame
df = pd.DataFrame(data)

print(df)

In [None]:
import altair as alt

chart = alt.Chart(df).mark_line(point=True).encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y='Token prediction accuracy',
    color='Model', # Add color encoding based on the 'Model' column
    tooltip=['Model', 'Training set size', 'Token prediction accuracy']
).properties(
    #title='Token Prediction Accuracy vs. Training Set Size', # Add the title here
    width=800,  # Adjust width to maintain 16:9 aspect ratio
    height=450  # Adjust height to maintain 16:9 aspect ratio
)

chart = chart.interactive()

chart.display()

# Task
Generate a plot using the dataframe `df` with 'Training set size' on the x-axis (logarithmic scale) and 'Token prediction accuracy' on the y-axis. Plot a line connecting the two points for 'IB1-IG' and label this line in the legend. Add a line for 'GPT2' and label its four points as 'small', 'medium', 'large', and 'xl', with 'small' corresponding to the lowest 'Training set size' value. Set the aspect ratio of the plot to 16:9.

## Prepare data

### Subtask:
Add a new column to the dataframe to store the labels for the GPT-2 model's data points.


**Reasoning**:
Create a new column in the dataframe to store the labels for the GPT-2 model based on the provided instructions.



In [None]:
gpt2_labels = []
gpt2_count = 0
gptneo_labels = []
gptneo_count = 0
for index, row in df.iterrows():
    if row['Model'] == 'GPT-2':
        if gpt2_count == 0:
            gpt2_labels.append('small')
        elif gpt2_count == 1:
            gpt2_labels.append('medium')
        elif gpt2_count == 2:
            gpt2_labels.append('large')
        elif gpt2_count == 3:
            gpt2_labels.append('xl')
        else:
            gpt2_labels.append(None) # Should not happen based on the data
        gpt2_count += 1
        gptneo_labels.append(None)
    elif row['Model'] == 'GPT-Neo':
        if gptneo_count == 0:
            gptneo_labels.append('1.3B')
        elif gptneo_count == 1:
            gptneo_labels.append('2.7B')
        else:
            gptneo_labels.append(None) # Should not happen based on the data
        gptneo_count += 1
        gpt2_labels.append(None)
    else:
        gpt2_labels.append(None)
        gptneo_labels.append(None)


df['GPT-2 Label'] = gpt2_labels
df['GPT-Neo Label'] = gptneo_labels
df['Combined Label'] = df['GPT-2 Label'].fillna(df['GPT-Neo Label'])


display(df)

## Add labels to chart

### Subtask:
Modify the chart code to include a text layer that displays the labels for the GPT-2 data points.


**Reasoning**:
Create a text layer for the chart to display the GPT-2 labels.



In [None]:
text = alt.Chart(df).mark_text(align='left', baseline='middle', dy=0, dx=5).encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y='Token prediction accuracy',
    text='Combined Label',
    tooltip=['Model', 'Training set size', 'Token prediction accuracy', 'GPT-2 Label', 'GPT-Neo Label']
).transform_filter(
    alt.FieldOneOfPredicate(field='Model', oneOf=['GPT-2', 'GPT-Neo'])
)

chart_with_text = chart + text

chart_with_text.display()

## Compute and Display Regression Lines

Now, compute the log-regression lines for the 'IGTree' and 'TRIBL2' models and add them to the chart with a separate legend.

In [None]:
import numpy as np
from scipy.stats import linregress
import altair as alt

# Filter data for IGTree and TRIBL2 models
igtree_df = df[df['Model'] == 'IGTree'].copy()
tribl2_df = df[df['Model'] == 'TRIBL2'].copy()

# Calculate log of Training set size for regression
igtree_df['log_training_size'] = np.log(igtree_df['Training set size'])
tribl2_df['log_training_size'] = np.log(tribl2_df['Training set size'])

# Perform linear regression for IGTree
slope_igtree, intercept_igtree, r_value_igtree, p_value_igtree, std_err_igtree = linregress(
    igtree_df['log_training_size'], igtree_df['Token prediction accuracy']
)

# Print regression formula and values for IGTree
print(f"IGTree Regression:")
print(f"  Formula: Token Prediction Accuracy = {intercept_igtree:.4f} + {slope_igtree:.4f} * log(Training set size)")
print(f"  R-value: {r_value_igtree:.4f}")
print(f"  R-squared: {r_value_igtree**2:.4f}")

# Calculate and print average increase per 10-fold increase for IGTree
average_increase_igtree = slope_igtree * np.log(10)
print(f"  Average increase per 10-fold increase in training set size: {average_increase_igtree:.4f}")
print("-" * 20)


# Perform linear regression for TRIBL2
slope_tribl2, intercept_tribl2, r_value_tribl2, p_value_tribl2, std_err_tribl2 = linregress(
    tribl2_df['log_training_size'], tribl2_df['Token prediction accuracy']
)

# Print regression formula and values for TRIBL2
print(f"TRIBL2 Regression:")
print(f"  Formula: Token Prediction Accuracy = {intercept_tribl2:.4f} + {slope_tribl2:.4f} * log(Training set size)")
print(f"  R-value: {r_value_tribl2:.4f}")
print(f"  R-squared: {r_value_tribl2**2:.4f}")

# Calculate and print average increase per 10-fold increase for TRIBL2
average_increase_tribl2 = slope_tribl2 * np.log(10)
print(f"  Average increase per 10-fold increase in training set size: {average_increase_tribl2:.4f}")
print("-" * 20)

# Estimate token prediction accuracy at 1e+11 and 1e+12
estimated_accuracy_igtree_1e11 = intercept_igtree + slope_igtree * np.log(1e11)
estimated_accuracy_igtree_1e12 = intercept_igtree + slope_igtree * np.log(1e12)

estimated_accuracy_tribl2_1e11 = intercept_tribl2 + slope_tribl2 * np.log(1e11)
estimated_accuracy_tribl2_1e12 = intercept_tribl2 + slope_tribl2 * np.log(1e12)

print(f"Estimated Token Prediction Accuracy:")
print(f"  IGTree at 1e+11: {estimated_accuracy_igtree_1e11:.4f}")
print(f"  IGTree at 1e+12: {estimated_accuracy_igtree_1e12:.4f}")
print(f"  TRIBL2 at 1e+11: {estimated_accuracy_tribl2_1e11:.4f}")
print(f"  TRIBL2 at 1e+12: {estimated_accuracy_tribl2_1e12:.4f}")
print("-" * 20)


# Create new dataframes for the regression lines with a 'Model' column for legend
regression_df_igtree = pd.DataFrame({
    'Training set size': df['Training set size'],
    'Token prediction accuracy': intercept_igtree + slope_igtree * np.log(df['Training set size']),
    'Model': 'IGTree Regression'
})

regression_df_tribl2 = pd.DataFrame({
    'Training set size': df['Training set size'],
    'Token prediction accuracy': intercept_tribl2 + slope_tribl2 * np.log(df['Training set size']),
    'Model': 'TRIBL2 Regression'
})

# Create the chart for the original data points with 'Models' legend
base_chart = alt.Chart(df).mark_line(point=True).encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y='Token prediction accuracy',
    color=alt.Color('Model', legend=alt.Legend(title='Models')), # Set legend title to 'Models'
    tooltip=['Model', 'Training set size', 'Token prediction accuracy']
).properties(
    title='Token Prediction Accuracy vs. Training Set Size',
    width=800,
    height=450
)

base_chart = base_chart.interactive()

# Create a text layer for the labels, filtering for GPT-2 and GPT-Neo
text = alt.Chart(df).mark_text(align='left', baseline='middle', dy=0, dx=5).encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y='Token prediction accuracy',
    text='Combined Label',
    tooltip=['Model', 'Training set size', 'Token prediction accuracy', 'GPT-2 Label', 'GPT-Neo Label']
).transform_filter(
    alt.FieldOneOfPredicate(field='Model', oneOf=['GPT-2', 'GPT-Neo'])
)


# Create charts for the regression lines with a separate 'Regression' legend
# Set the legend title only for the first regression line chart
regression_line_igtree = alt.Chart(regression_df_igtree).mark_line().encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y='Token prediction accuracy',
    color=alt.Color('Model', legend=alt.Legend(title='Regression')), # Set legend title to 'Regression'
    strokeDash=alt.value([5, 5]), # Dashed line for IGTree Regression
    tooltip=['Training set size', 'Token prediction accuracy', 'Model']
)

# The second regression line chart will inherit the legend properties from the first
regression_line_tribl2 = alt.Chart(regression_df_tribl2).mark_line().encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y='Token prediction accuracy',
    color='Model', # Do not set legend title here
    strokeDash=alt.value([2, 2]), # Dotted line for TRIBL2 Regression
    tooltip=['Training set size', 'Token prediction accuracy', 'Model']
)

# Combine the base chart with the text layer and the regression lines
final_chart = base_chart + text + regression_line_igtree + regression_line_tribl2

final_chart.display()

## Plot Token Generation Latency vs. Training Set Size

Generate a plot showing the relationship between Token Generation Latency and Training Set Size.

In [None]:
import altair as alt

# Calculate Token Generation Latency
df['Token generation latency'] = 1 / df['Tokens per second']

# Define the order of models for the legend
model_order = ['IB1-IG', 'TRIBL2', 'IGTree', 'GPT-2', 'GPT-Neo']

# Create the base chart
base_chart_latency = alt.Chart(df).encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y=alt.Y('Token generation latency', title='Token generation latency (s)'), # Use latency on the y-axis and set title
    color=alt.Color('Model', sort=model_order, legend=alt.Legend(title='Models')), # Single legend title 'Models'
    tooltip=['Model', 'Training set size', 'Token generation latency', 'Tokens per second'] # Update tooltip
).properties(
    title='Token Generation Latency vs. Training Set Size', # Update title
    width=800,
    height=450
)

# Add points and lines as layers to the chart
chart_latency = base_chart_latency.mark_circle().encode( # Changed mark_point() to mark_circle() for solid points
    # No color encoding here, inherits from base chart
    opacity=alt.value(1) # Ensure points are visible
) + base_chart_latency.mark_line().encode(
    # No color encoding here, inherits from base chart
    strokeDash=alt.StrokeDash('Model',
                              scale=alt.Scale(domain=model_order,
                                              range=[0, 0, 0, 0, 0])) # Solid lines for original data
)

# Create a text layer for the labels (using the original df)
text_latency = alt.Chart(df).mark_text(align='left', baseline='middle', dy=0, dx=5).encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y='Token generation latency', # Use latency on the y-axis for text placement
    text='Combined Label',
    tooltip=['Model', 'Training set size', 'Token generation latency', 'GPT-2 Label', 'GPT-Neo Label'] # Update tooltip
).transform_filter(
    alt.FieldOneOfPredicate(field='Model', oneOf=['GPT-2', 'GPT-Neo'])
)

# Combine the chart with points and lines with the text layer
final_chart_latency = chart_latency + text_latency

final_chart_latency.display()

## Plot kWh Usage vs. Training Set Size

Generate a plot showing the relationship between kWh Usage and Training Set Size.

In [None]:
import altair as alt

# Carbon intensity for Germany in g/kWh
carbon_intensity_germany = 344

# Calculate CO2 emissions equivalent in grams
df['CO2 emissions equivalent (g)'] = df['kWh usage'] * carbon_intensity_germany

# Define the order of models for the legend
model_order = ['IB1-IG', 'TRIBL2', 'IGTree', 'GPT-2', 'GPT-Neo']

# Create the base chart
base_chart_co2 = alt.Chart(df).encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y=alt.Y('CO2 emissions equivalent (g)', title='CO2 emissions equivalent (g)'), # Use CO2 emissions on the y-axis and set title
    color=alt.Color('Model', sort=model_order, legend=alt.Legend(title='Models')), # Single legend title 'Models'
    tooltip=['Model', 'Training set size', 'CO2 emissions equivalent (g)', 'kWh usage'] # Update tooltip
).properties(
    title='CO2 Emissions Equivalent vs. Training Set Size', # Update title
    width=800,
    height=450
)

# Add points and lines as layers to the chart
chart_co2 = base_chart_co2.mark_circle().encode( # Changed mark_point() to mark_circle() for solid points
    # No color encoding here, inherits from base chart
    opacity=alt.value(1) # Ensure points are visible
) + base_chart_co2.mark_line().encode(
    # No color encoding here, inherits from base chart
    strokeDash=alt.StrokeDash('Model',
                              scale=alt.Scale(domain=model_order,
                                              range=[0, 0, 0, 0, 0])) # Solid lines for original data
)

# Create a text layer for the labels (using the original df)
text_co2 = alt.Chart(df).mark_text(align='left', baseline='middle', dy=0, dx=5).encode(
    x=alt.X('Training set size', scale=alt.Scale(type='log')),
    y='CO2 emissions equivalent (g)', # Use CO2 emissions on the y-axis for text placement
    text='Combined Label',
    tooltip=['Model', 'Training set size', 'CO2 emissions equivalent (g)', 'kWh usage', 'GPT-2 Label', 'GPT-Neo Label'] # Update tooltip
).transform_filter(
    alt.FieldOneOfPredicate(field='Model', oneOf=['GPT-2', 'GPT-Neo'])
)

# Create horizontal line layers for comparison without legend
car_line = alt.Chart(pd.DataFrame({'y': [560]})).mark_rule(color='gray').encode(
    y='y',
    tooltip=['y']
)

washing_machine_line = alt.Chart(pd.DataFrame({'y': [275]})).mark_rule(color='gray').encode(
    y='y',
    tooltip=['y']
)

tumble_dryer_line = alt.Chart(pd.DataFrame({'y': [1000]})).mark_rule(color='gray').encode(
    y='y',
    tooltip=['y']
)

milk_line = alt.Chart(pd.DataFrame({'y': [2400]})).mark_rule(color='gray').encode( # Changed y-value and variable name
    y='y',
    tooltip=['y']
)

steel_line = alt.Chart(pd.DataFrame({'y': [1891]})).mark_rule(color='gray').encode( # Added steel line
    y='y',
    tooltip=['y']
)


# Create text layers for labels of comparison lines
car_text = alt.Chart(pd.DataFrame({'y': [560], 'text': ['Car, 10m']})).mark_text(align='right', baseline='bottom', dx=-5, dy=-5).encode(
    y='y',
    text='text',
    color=alt.value('gray')
)

washing_machine_text = alt.Chart(pd.DataFrame({'y': [275], 'text': ['Washing machine']})).mark_text(align='right', baseline='bottom', dx=-5, dy=-5).encode(
    y='y',
    text='text',
    color=alt.value('gray')
)

tumble_dryer_text = alt.Chart(pd.DataFrame({'y': [1000], 'text': ['Tumble dryer']})).mark_text(align='right', baseline='bottom', dx=-5, dy=-5).encode(
    y='y',
    text='text',
    color=alt.value('gray')
)

milk_text = alt.Chart(pd.DataFrame({'y': [2400], 'text': ['Milk 1l production']})).mark_text(align='right', baseline='bottom', dx=-5, dy=-5).encode( # Changed y-value and text label
    y='y',
    text='text',
    color=alt.value('gray')
)

steel_text = alt.Chart(pd.DataFrame({'y': [1891], 'text': ['Steel 1kg production (furnace)']})).mark_text(align='right', baseline='bottom', dx=-5, dy=-5).encode( # Added steel text
    y='y',
    text='text',
    color=alt.value('gray')
)


# Combine the chart with points and lines, the text layer, and the comparison lines and text
final_chart_co2 = chart_co2 + text_co2 + car_line + washing_machine_line + tumble_dryer_line + milk_line + steel_line + car_text + washing_machine_text + tumble_dryer_text + milk_text + steel_text

final_chart_co2.display()