In [1]:
import os
import subprocess
import re
import csv
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
import numpy as np
import time
import plotly.figure_factory as ff
from PIL import Image

In [2]:
def regexParse2(content, videoType):
    # Declare local dataframes
    dfPower, dfFrequency, dfUsage = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    # Regex findall returns matches which are all strings
    # Used list(map(int, regexMatches))) to convert the list of strings to a list of int or float so that in Excel it's an integer
    dfPower['Efficiency Cluster'] = pd.Series(map(int, re.findall(r'E-Cluster Power:\s*([\d.]+)\s*mW', content)))
    dfFrequency['Efficiency Cluster'] = pd.Series(map(int, re.findall(r'E-Cluster HW active frequency:\s*([\d.]+)\s*MHz', content)))
    dfUsage['Efficiency Cluster'] = pd.Series(map(float, re.findall(r'E-Cluster HW active residency:\s*([\d.]+)%', content)))

    dfPower['Performance Cluster'] = pd.Series(map(int, re.findall(r'P-Cluster Power:\s*([\d.]+)\s*mW', content)))
    dfFrequency['Performance Cluster'] = pd.Series(map(int, re.findall(r'P-Cluster HW active frequency:\s*([\d.]+)\s*MHz', content)))
    dfUsage['Performance Cluster'] = pd.Series(map(float, re.findall(r'P-Cluster HW active residency:\s*([\d.]+)%', content)))

    dfPower['DRAM'] = pd.Series(map(int, re.findall(r'DRAM Power:\s*([\d.]+)\s*mW', content)))
    dfPower['Cluster'] = pd.Series(map(int, re.findall(r'Clusters Total Power:\s*([\d.]+)\s*mW', content)))
    dfPower['Package'] = pd.Series(map(int, re.findall(r'Package Power:\s*([\d.]+)\s*mW', content)))

    dfPower['GPU'] = pd.Series(map(int, re.findall(r'GPU Power:\s*([\d.]+)\s*mW\nPackage Power', content)))
    dfFrequency['GPU'] = pd.Series(map(int, re.findall(r'GPU active frequency:\s*([\d.]+)\s*MHz', content)))
    dfUsage['GPU'] = pd.Series(map(float, re.findall(r'GPU active residency:\s*([\d.]+)%', content)))

    # Other components power needs to be extracted out of total package power
    # e.g. result from logs
        # DRAM Power: 14 mW
        # Clusters Total Power: 30 mW
        # GPU Power: 14 mW
        # Package Power: 99 mW
    dfPower['Other'] = dfPower['Package'] - (dfPower['Cluster'] + dfPower['DRAM'] + dfPower['GPU'])

    # Check if the number of datapoints from all videos are equal
    if((len(dfPower) != len(dfFrequency)) or (len(dfPower) != len(dfUsage)) or (len(dfUsage) != len(dfFrequency))):
        print("The lengths of the dataframes are not equal. Check the regexes.") 
    else:
        dataPoints = len(dfPower)
        dfPower['time'] = dfFrequency['time'] = dfUsage['time'] = list(range(1, dataPoints + 1))
        dfPower['Video Type'] = dfFrequency['Video Type'] = dfUsage['Video Type'] = [videoType] * dataPoints

    return dfPower, dfFrequency, dfUsage

def regexParse(content, model):
    # Declare local dataframes
    dfPower, dfFrequency, dfUsage = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    # Efficiency Cluster Power, Frequency, and Usage
    dfPower['Efficiency Cluster'] = pd.Series(map(int, re.findall(r'E-Cluster HW active frequency:\s*([\d]+)\s*MHz', content)))
    dfFrequency['Efficiency Cluster'] = pd.Series(map(float, re.findall(r'E-Cluster HW active residency:\s*([\d.]+)%', content)))

    # Performance Cluster Power, Frequency, and Usage
    dfPower['Performance Cluster'] = pd.Series(map(int, re.findall(r'P-Cluster HW active frequency:\s*([\d]+)\s*MHz', content)))
    dfFrequency['Performance Cluster'] = pd.Series(map(float, re.findall(r'P-Cluster HW active residency:\s*([\d.]+)%', content)))

    # DRAM, Cluster, and Package Power
    dfPower['CPU Power'] = pd.Series(map(int, re.findall(r'CPU Power:\s*([\d.]+)\s*mW', content)))
    dfPower['GPU Power'] = pd.Series(map(int, re.findall(r'GPU Power:\s*([\d.]+)\s*mW', content)))
    dfPower['ANE Power'] = pd.Series(map(int, re.findall(r'ANE Power:\s*([\d.]+)\s*mW', content)))
    dfPower['Combined Power'] = pd.Series(map(int, re.findall(r'Combined Power \(CPU \+ GPU \+ ANE\):\s*([\d.]+)\s*mW', content)))

    # GPU Frequency and Usage
    dfFrequency['GPU'] = pd.Series(map(int, re.findall(r'GPU HW active frequency:\s*([\d]+)\s*MHz', content)))
    dfUsage['GPU'] = pd.Series(map(float, re.findall(r'GPU HW active residency:\s*([\d.]+)%', content)))

    # Ensure all data points have the same length
    min_length = min(len(dfPower), len(dfFrequency), len(dfUsage))
    dfPower = dfPower[:min_length]
    dfFrequency = dfFrequency[:min_length]
    dfUsage = dfUsage[:min_length]

    # Create time and videoType columns
    dataPoints = len(dfPower)
    dfPower['time'] = dfFrequency['time'] = dfUsage['time'] = list(range(1, dataPoints + 1))
    dfPower['Model'] = dfFrequency['Model'] = dfUsage['Model'] = [model] * dataPoints

    return dfPower, dfFrequency, dfUsage

In [3]:
def plot_over_time(dataframe, title):
    # Line chart for CPU, GPU Frequency over time, faceted by Model
    fig = px.line(dataframe, 
                  x='time', 
                  y=['Efficiency Cluster', 'Performance Cluster', 'GPU'], 
                  template='plotly_dark', 
                  width=700, height=350 * len(dataframe['Model'].unique()),  # Adjust height based on number of models
                  line_shape="spline", render_mode="svg",
                  facet_row='Model',  # Create a new row for each model
                  color_discrete_map={
                      "Efficiency Cluster": "#73A4FF",
                      "Performance Cluster": "#FF715A",
                      "GPU": "#01F0B0"
                  },
                  labels={"value": f"{title} (MHz)", "time": "Time (s)", "Model": "Device Model"},
                  category_orders={"Model": sorted(dataframe['Model'].unique())}  # Ensure consistent order of models
                 )

    # Update layout settings for the figure
    fig.update_yaxes(type='linear', title_font=dict(size=12), color="#707070", 
                     title_font_color="#707070", tickfont=dict(size=9), 
                     gridcolor='#242424', zerolinecolor='#242424')
    fig.update_xaxes(showgrid=False, title_font=dict(size=10), color="#707070", 
                     title_font_color="#707070", tickfont=dict(size=9))
    fig.update_traces(hovertemplate='%{y} MHz', line_smoothing=1.3)

    # Customize overall layout and titles
    fig.update_layout(
        legend_title_text='', 
        autosize=True, 
        hovermode="x", 
        legend=dict(orientation="h", yanchor="top", y=-0.3, xanchor="center", x=0.5),
        font=dict(family="SF Pro Display, Roboto, Droid Sans, Arial"),
        title={
            'text': f"<b>{title} over Time</b> <br> <sup> Apple Mac M1 |  MacOS 14.2.1 </sup>",
            'y': 0.92, 'x': 0.54, 'xanchor': 'center', 'yanchor': 'top',
            'font': dict(size=18, color='#FFF')},
        margin=dict(r=50, t=80), 
        modebar=dict(orientation='v'),
        plot_bgcolor='#191C1F', 
        paper_bgcolor='#191C1F',
    )

    fig.write_image("./plotly-frequency.png")  # Save as PNG
    fig.show()
    # fig.write_image("./plotly-frequency.svg")  # Save as PNG

In [4]:
start_time = time.time()
print("Starting at = ", time.ctime(start_time))
directory_path = os.getcwd()

# Current directory should have a folder named powermetric-logs which contains the output logs of powermetric runs
powerLogsFolderName = "logs"

# Build the full path to the logs folder
pathLogsFolder = directory_path + '/' + powerLogsFolderName + '/'

# Get the list of all log files in the logs folder
powerLogsList = os.listdir(pathLogsFolder)

# Create local dataframes
dfPower, dfFrequency, dfUsage = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

# Parse each log file
for logsFile in powerLogsList:

    if not os.path.isfile(pathLogsFolder + logsFile): 
        print('File does not exist.')
    else:
        file = open(pathLogsFolder + logsFile, 'r', encoding="utf8", errors='ignore')
        content = file.read()
    
    if (logsFile.find('mp4') >= 0) or (logsFile.find('webm') >= 0):
        # Transform 4K-AV1.mp4.txt -> 4K-AV1 because that's what we want in the charts
        f_name = os.path.splitext(logsFile)[0]
        videoType = str.split(f_name, '.')[0]
    else:
        # Used for file paths like Safari-VP9-HW.txt i.e. without the video container (mp4, webm)
        videoType = os.path.splitext(logsFile)[0]

    # Parse the content and build Data Frames
    dfPowerTemp, dfFrequencyTemp, dfUsageTemp = regexParse(content, videoType)
    dfPower     = pd.concat([dfPower, dfPowerTemp], ignore_index=True)
    dfFrequency = pd.concat([dfFrequency, dfFrequencyTemp], ignore_index=True)
    dfUsage     = pd.concat([dfUsage, dfUsageTemp], ignore_index=True)

print(dfPower.head())
print(dfFrequency.head())
print(dfUsage.head())

plot_over_time(dfFrequency, "Frequency")

end_time = time.time()
print("Ending at = ", time.ctime(end_time))
print(f"It took {end_time-start_time:.2f} Time (s) to compute")

Starting at =  Wed Oct 23 11:55:18 2024
   Efficiency Cluster  Performance Cluster  CPU Power  GPU Power  ANE Power  \
0                1172                  697         83         17          0   
1                1037                  632         41         17          0   
2                1535                 1094       1471          9          0   
3                1419                 1724       1669          9          0   
4                1290                  910        569          3          0   

   Combined Power  time         Model  
0             100     1  opt200-short  
1              50     2  opt200-short  
2            1474     3  opt200-short  
3            1673     4  opt200-short  
4             572     5  opt200-short  
   Efficiency Cluster  Performance Cluster  GPU  time         Model
0               49.37                 3.79  707     1  opt200-short
1               24.98                 3.08  708     2  opt200-short
2               90.32                42.0

Ending at =  Wed Oct 23 11:55:28 2024
It took 9.90 Time (s) to compute
