In [19]:
import os
import subprocess
import re
import csv
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
import numpy as np
import time
import plotly.figure_factory as ff
from PIL import Image

In [20]:
def regexParse2(content, videoType):
    # Declare local dataframes
    dfPower, dfFrequency, dfUsage = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    # Regex findall returns matches which are all strings
    # Used list(map(int, regexMatches))) to convert the list of strings to a list of int or float so that in Excel it's an integer
    dfPower['Efficiency Cluster'] = pd.Series(map(int, re.findall(r'E-Cluster Power:\s*([\d.]+)\s*mW', content)))
    dfFrequency['Efficiency Cluster'] = pd.Series(map(int, re.findall(r'E-Cluster HW active frequency:\s*([\d.]+)\s*MHz', content)))
    dfUsage['Efficiency Cluster'] = pd.Series(map(float, re.findall(r'E-Cluster HW active residency:\s*([\d.]+)%', content)))

    dfPower['Performance Cluster'] = pd.Series(map(int, re.findall(r'P-Cluster Power:\s*([\d.]+)\s*mW', content)))
    dfFrequency['Performance Cluster'] = pd.Series(map(int, re.findall(r'P-Cluster HW active frequency:\s*([\d.]+)\s*MHz', content)))
    dfUsage['Performance Cluster'] = pd.Series(map(float, re.findall(r'P-Cluster HW active residency:\s*([\d.]+)%', content)))

    dfPower['DRAM'] = pd.Series(map(int, re.findall(r'DRAM Power:\s*([\d.]+)\s*mW', content)))
    dfPower['Cluster'] = pd.Series(map(int, re.findall(r'Clusters Total Power:\s*([\d.]+)\s*mW', content)))
    dfPower['Package'] = pd.Series(map(int, re.findall(r'Package Power:\s*([\d.]+)\s*mW', content)))

    dfPower['GPU'] = pd.Series(map(int, re.findall(r'GPU Power:\s*([\d.]+)\s*mW\nPackage Power', content)))
    dfFrequency['GPU'] = pd.Series(map(int, re.findall(r'GPU active frequency:\s*([\d.]+)\s*MHz', content)))
    dfUsage['GPU'] = pd.Series(map(float, re.findall(r'GPU active residency:\s*([\d.]+)%', content)))

    # Other components power needs to be extracted out of total package power
    # e.g. result from logs
        # DRAM Power: 14 mW
        # Clusters Total Power: 30 mW
        # GPU Power: 14 mW
        # Package Power: 99 mW
    dfPower['Other'] = dfPower['Package'] - (dfPower['Cluster'] + dfPower['DRAM'] + dfPower['GPU'])

    # Check if the number of datapoints from all videos are equal
    if((len(dfPower) != len(dfFrequency)) or (len(dfPower) != len(dfUsage)) or (len(dfUsage) != len(dfFrequency))):
        print("The lengths of the dataframes are not equal. Check the regexes.") 
    else:
        dataPoints = len(dfPower)
        dfPower['time'] = dfFrequency['time'] = dfUsage['time'] = list(range(1, dataPoints + 1))
        dfPower['Video Type'] = dfFrequency['Video Type'] = dfUsage['Video Type'] = [videoType] * dataPoints

    return dfPower, dfFrequency, dfUsage

def regexParse(content, model):
    # Declare local dataframes
    dfPower, dfFrequency, dfUsage = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    # Efficiency Cluster Power, Frequency, and Usage
    dfPower['Efficiency Cluster'] = pd.Series(map(int, re.findall(r'E-Cluster HW active frequency:\s*([\d]+)\s*MHz', content)))
    dfFrequency['Efficiency Cluster'] = pd.Series(map(float, re.findall(r'E-Cluster HW active residency:\s*([\d.]+)%', content)))

    # Performance Cluster Power, Frequency, and Usage
    dfPower['Performance Cluster'] = pd.Series(map(int, re.findall(r'P-Cluster HW active frequency:\s*([\d]+)\s*MHz', content)))
    dfFrequency['Performance Cluster'] = pd.Series(map(float, re.findall(r'P-Cluster HW active residency:\s*([\d.]+)%', content)))

    # DRAM, Cluster, and Package Power
    dfPower['CPU Power'] = pd.Series(map(int, re.findall(r'CPU Power:\s*([\d.]+)\s*mW', content)))
    dfPower['GPU Power'] = pd.Series(map(int, re.findall(r'GPU Power:\s*([\d.]+)\s*mW', content)))
    dfPower['ANE Power'] = pd.Series(map(int, re.findall(r'ANE Power:\s*([\d.]+)\s*mW', content)))
    dfPower['Combined Power'] = pd.Series(map(int, re.findall(r'Combined Power \(CPU \+ GPU \+ ANE\):\s*([\d.]+)\s*mW', content)))

    # GPU Frequency and Usage
    dfFrequency['GPU'] = pd.Series(map(int, re.findall(r'GPU HW active frequency:\s*([\d]+)\s*MHz', content)))
    dfUsage['GPU'] = pd.Series(map(float, re.findall(r'GPU HW active residency:\s*([\d.]+)%', content)))

    # Ensure all data points have the same length
    min_length = min(len(dfPower), len(dfFrequency), len(dfUsage))
    dfPower = dfPower[:min_length]
    dfFrequency = dfFrequency[:min_length]
    dfUsage = dfUsage[:min_length]

    # Create time and videoType columns
    dataPoints = len(dfPower)
    dfPower['time'] = dfFrequency['time'] = dfUsage['time'] = list(range(1, dataPoints + 1))
    dfPower['Model'] = dfFrequency['Model'] = dfUsage['Model'] = [model] * dataPoints

    return dfPower, dfFrequency, dfUsage

In [47]:
directory_path = os.getcwd()

# Current directory should have a folder named powermetric-logs which contains the output logs of powermetric runs
powerLogsFolderName = "logs"

# Build the full path to the logs folder
pathLogsFolder = directory_path + '/' + powerLogsFolderName + '/'

# Get the list of all log files in the logs folder
powerLogsList = os.listdir(pathLogsFolder)

# Create local dataframes
dfPower, dfFrequency, dfUsage = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

# Parse each log file
for logsFile in powerLogsList:

    if not os.path.isfile(pathLogsFolder + logsFile): 
        print('File does not exist.')
    else:
        file = open(pathLogsFolder + logsFile, 'r', encoding="utf8", errors='ignore')
        content = file.read()
    
    if (logsFile.find('mp4') >= 0) or (logsFile.find('webm') >= 0):
        # Transform 4K-AV1.mp4.txt -> 4K-AV1 because that's what we want in the charts
        f_name = os.path.splitext(logsFile)[0]
        videoType = str.split(f_name, '.')[0]
    else:
        # Used for file paths like Safari-VP9-HW.txt i.e. without the video container (mp4, webm)
        videoType = os.path.splitext(logsFile)[0]

    # Parse the content and build Data Frames
    dfPowerTemp, dfFrequencyTemp, dfUsageTemp = regexParse(content, videoType)
    dfPower     = pd.concat([dfPower, dfPowerTemp], ignore_index=True)
    dfFrequency = pd.concat([dfFrequency, dfFrequencyTemp], ignore_index=True)
    dfUsage     = pd.concat([dfUsage, dfUsageTemp], ignore_index=True)

dfPower[['Model', 'Output Length', 'Input Length']] = dfPower['Model'].str.extract(r'(\w+)-(\d+)-(short|long)')


print(dfPower.head())
print(dfFrequency.head())
print(dfUsage.head())

   Efficiency Cluster  Performance Cluster  CPU Power  GPU Power  ANE Power  \
0                1287                  747        153         24          0   
1                1050                  611         39         24          0   
2                1094                  652         86          6          0   
3                1227                  618        106          6          0   
4                1052                  608         36         24          0   

   Combined Power  time   Model Output Length Input Length  
0             177     1  FlanT5           100         long  
1              45     2  FlanT5           100         long  
2             110     3  FlanT5           100         long  
3             112     4  FlanT5           100         long  
4              42     5  FlanT5           100         long  
   Efficiency Cluster  Performance Cluster  GPU  time            Model
0               55.80                 5.21  707     1  FlanT5-100-long
1               2

In [106]:
import pandas as pd
import plotly.express as px

def plot_average_power_bar(dataframe, title):
    # Calculate the average Combined Power for each model
    barDf = dataframe.groupby('Model')['Combined Power'].mean().reset_index()

    # Create a horizontal bar chart
    fig = px.bar(barDf, y='Model', x='Combined Power', template='plotly_dark',
                 orientation='h', hover_name='Model', 
                 width=700, height=250, 
                 color_discrete_sequence=["#57FFBC"],
                 labels={"Combined Power": "Power Consumption (mW)"})

    fig.update_xaxes(zeroline=True, title_font=dict(size=12), color="#707070",
                     title_font_color="#707070", tickfont=dict(size=9), 
                     gridcolor='#242424', zerolinecolor='#242424')
    fig.update_yaxes(zeroline=True, showgrid=False, color="#FFF", 
                     title_font_color="#707070", tickfont=dict(size=11), title_text='')
    fig.update_traces(hovertemplate='%{x:.0f} (mW)', texttemplate='%{x:.0f} mW', 
                      textfont=dict(size=11))
    
    fig.update_layout(autosize=True, hovermode=False, legend_title_text='',
                      showlegend=False, font=dict(family="SF Pro Display, Roboto, Droid Sans, Arial"),
                      title={
                          'text': "<b>Average Power Consumption</b> <br> <sup> Apple Mac Mini M1 | VLC 3.0.12.1 (local files) | MacOS 11.2.2 </sup>",
                          'y': 0.90,
                          'x': 0.54,
                          'xanchor': 'center',
                          'yanchor': 'top',
                          'font': dict(size=18, color='#FFF')},
                      margin=dict(r=30, b=15, t=60),
                      margin_pad=10,
                      modebar=dict(orientation='v'),
                      plot_bgcolor='#181F26',
                      paper_bgcolor='#181F26'
                     )

    # Save the figure as SVG and show
    fig.write_image("./power-average-bar.svg")  # Save as svg
    fig.show()

# Example usage with your DataFrame
print(dfPower.head())
plot_average_power_bar(dfPower, "Power Consumption")


   Efficiency Cluster  Performance Cluster  CPU Power  GPU Power  ANE Power  \
0                1287                  747        153         24          0   
1                1050                  611         39         24          0   
2                1094                  652         86          6          0   
3                1227                  618        106          6          0   
4                1052                  608         36         24          0   

   Combined Power  time   Model Output Length Input Length  
0             177     1  FlanT5           100         long  
1              45     2  FlanT5           100         long  
2             110     3  FlanT5           100         long  
3             112     4  FlanT5           100         long  
4              42     5  FlanT5           100         long  


In [89]:
# Group the dataframe by Model and Input/Output Length, taking the mean of Combined Power for the other settings
power_avg_output = (dfPower.groupby(['Model', 'Output Length'])['Combined Power']
                   .mean()
                   .reset_index())

power_avg_input = (dfPower.groupby(['Model', 'Input Length'])['Combined Power']
                   .mean()
                   .reset_index())

# Plot the average power consumption for different output lengths of each model
def plot_average_power_output(dataframe, title):

    if title == "Input":
        color_discrete_map={"0-60 tokens": "#73A4FF", "60-110 tokens": "#01F0B0"}
        length_labels = {'short': '0-60 tokens', 'long': '60-110 tokens'}
        category_order = {"Input Length": ["0-60 tokens", "60-110 tokens"]}
    elif title == "Output":
        color_discrete_map={"50 tokens": "#FF715A","100 tokens": "#73A4FF", "200 tokens": "#01F0B0"}
        length_labels = {'50': '50 tokens', '100': '100 tokens', '200': '200 tokens'}
        category_order = {"Output Length": ["50 tokens", "100 tokens", "200 tokens"]}
    
    # Map the new labels to the appropriate column in the dataframe
    dataframe[f'{title} Length'] = dataframe[f'{title} Length'].map(length_labels)

    fig = px.bar(dataframe, 
                 x='Model', 
                 y='Combined Power', 
                 color=f'{title} Length',  # Different bars for each output length
                 barmode='group',  # Group bars by model
                 title=title, 
                 color_discrete_map=color_discrete_map,
                 category_orders=category_order,
                 labels={'Combined Power': 'Mean Average Combined Power (mW)', f'{title} Length': f'{title} Length'},
                 template='plotly_dark', 
                 width=800, 
                 height=500,
                 text_auto=True)

    fig.update_yaxes(title_font = dict(size=12), title_font_color = "#707070", color="#707070",  tickfont = dict(size = 9), gridcolor='#242424', zerolinecolor = '#242424') #, range=[0, 1100])
    fig.update_xaxes(zeroline = True, showgrid=False, color="#FFF", title_font_color = "#707070", tickfont = dict(size = 11), title_text='')
    fig.update_traces(textfont= dict(size=8), width=[0.16, 0.16, 0.16])
    fig.update_layout(autosize=True, hovermode=False, 
                      legend_title_text='', 
                      legend=dict(orientation="h", yanchor="bottom", y=1, 
                                  xanchor="center", x=0.5, title=f"{title} Length"),
                      font=dict(family="SF Pro Display, Roboto, Droid Sans, Arial"),
                      title={
                          'text': f"<b>Mean Average Power Consumption by {title} Length</b> <br>",
                          'y': 0.93,
                          'x': 0.54,
                          'xanchor': 'center',
                          'yanchor': 'top',
                          'font': dict(size=18, color='#FFF')},
                      margin=dict(r=30, b=0, t=80), margin_pad=10,
                      modebar=dict(orientation='v'),
                      plot_bgcolor='#191C1F',
                      paper_bgcolor='#191C1F')

    fig.write_image("./power-average-bar-output.png")  # Save as PNG
    fig.show()

# Call the function to plot for all models
plot_average_power_output(power_avg_output, "Output")
plot_average_power_output(power_avg_input, "Input")



In [67]:
def plot_power_over_time(dataframe, title):
    # Create the area plot with facets for each model
    fig = px.area(dfPower, x='time', y=['Combined Power'], template='plotly_dark', 
    width = 1700, height = 350, facet_col='Model', line_shape="spline", facet_col_wrap=5,
    labels={"value": "Power Consumption (mW)", "time": "Time (s)"}, color_discrete_map={"Combined Power": "#57FFBC"},
    category_orders={"Model": ["Bloom", "FlanT5", "OPT"]})  # Ensure correct model ordering
    
    # Remove the facet annotation texts ("Model=Bloom", etc.)
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    
    # Customize annotations' font
    for annotation in fig['layout']['annotations']:
        annotation['font'] = dict(family="SF Pro Display, Roboto, Droid Sans, Arial", size=11)
    
    # Update y-axis settings
    fig.update_yaxes(type='linear', title_font=dict(size=12), color="#707070", title_font_color="#707070", 
                     tickfont=dict(size=9), gridcolor='#242424', zerolinecolor='#242424')
    
    # Update x-axis settings
    fig.update_xaxes(showgrid=False, title_font=dict(size=10), color="#707070", title_font_color="#707070", 
                     tickfont=dict(size=9))
    
    # Customize the hover template and trace smoothing
    fig.update_traces(hovertemplate='%{y} (mW)', line_smoothing=1.3)
    
    # Dynamically adjust the x-axis range for each model using Plotly's facet mechanism
    for i, model in enumerate(["Bloom", "FlanT5", "OPT"]):
        # Get the maximum 'time' for the current model
        max_time = dataframe[dataframe['Model'] == model]['time'].max()
        
        # Update the x-axis range for the subplot corresponding to the current model
        fig.update_xaxes(matches=None, range=[0, max_time], row=1, col=i+1)  # Using col as the facet position
        
    # Customize layout and title
    fig.update_layout(autosize=True, hovermode="x", showlegend=False, 
                      font=dict(family="SF Pro Display, Roboto, Droid Sans, Arial"),
                      title={
                          'text': "<b>Combined Power Consumption over Time</b> <br> <sup> Apple Mac M1 | MacOS 14.2.1 </sup>",
                          'y': 0.92,
                          'x': 0.54,
                          'xanchor': 'center',
                          'yanchor': 'top',
                          'font': dict(size=18, color='#FFF')},
                      margin=dict(r=50, t=80), margin_pad=10,
                      modebar=dict(orientation='v'), plot_bgcolor='#191C1F', paper_bgcolor='#191C1F')

    # Save the figure as a PNG and show it
    fig.write_image("./power-combined-time.png")
    fig.show()

# Call the function to generate the plot
plot_power_over_time(dfPower, "title")



In [66]:
def plot_power_over_time(dataframe, title):#changed
    # Line chart for Total Power Over Time, faceted by Video Type
    fig = px.line(dataframe,#changed
                  x='time',
                  y=['Efficiency Cluster', 'Performance Cluster', 'CPU Power', 'GPU Power', 'ANE Power', 'Combined Power'],
                  width=700,
                  height=350 * len(dataframe['Model'].unique()),  # Adjust height based on number of models
                  facet_row='Model',#changed
                  render_mode="svg",
                  labels={"value": f"{title} (mW)", "time": "Time (s)"}, #changed title
                  category_orders={"Model": sorted(dataframe['Model'].unique())}  #changed
                  )

    # Update layout settings for the figure
    fig.update_yaxes(type='linear', title_font=dict(size=12), color="#707070", 
                     title_font_color="#707070", tickfont=dict(size=9), 
                     gridcolor='#242424', zerolinecolor='#242424')
    fig.update_xaxes(showgrid=False, title_font=dict(size=10), color="#707070", 
                     title_font_color="#707070", tickfont=dict(size=9))
    fig.update_traces(hovertemplate='%{y} MHz', line_smoothing=1.3)

    # Update layout for the total power plot
    fig.update_layout(
        title={
            'text': f"<b>{title} over Time</b> <br> <sup> Apple Mac M1 | MacOS 14.2.1 </sup>",
            'y': 0.92, 'x': 0.54, 'xanchor': 'center', 'yanchor': 'top',
            'font': dict(size=18, color='#FFF')},
        legend_title_text='',
        autosize=True,
        hovermode="x",
        font=dict(family="SF Pro Display, Roboto, Droid Sans, Arial"),
        margin=dict(r=30, b=0, t=80),
        plot_bgcolor='#191C1F',
        paper_bgcolor='#191C1F',
    )

    # Save the figure as HTML and SVG
    fig.write_image("./power-over-time.svg")  # Save as svg
    fig.show()

# Example usage:
print(dfPower.shape)
print(dfPower.head)
print(dfFrequency.head)
plot_power_over_time(dfPower, "Power Consumption")


(22957, 10)
<bound method NDFrame.head of        Efficiency Cluster  Performance Cluster  CPU Power  GPU Power  \
0                    1287                  747        153         24   
1                    1050                  611         39         24   
2                    1094                  652         86          6   
3                    1227                  618        106          6   
4                    1052                  608         36         24   
...                   ...                  ...        ...        ...   
22952                1039                  637         40        210   
22953                1007                  624         38        210   
22954                1006                  627         33        198   
22955                1007                  629         37        201   
22956                1008                 1724       1361        190   

       ANE Power  Combined Power  time   Model Output Length Input Length  
0              0 