In [1]:
import pandas as pd
import os
import re

In [2]:
def parse_log_file(log_file_path):
    model_size = None
    inference_time = None
    
    with open(log_file_path, 'r') as file:
        for line in file:
            # Extract model size
            size_match = re.search(r'Size \(MB\): ([\d\.]+)', line)
            if size_match:
                model_size = float(size_match.group(1))
            
            # Extract average inference time
            time_match = re.search(r'Average inference time: ([\d\.]+)', line)
            if time_match:
                inference_time = float(time_match.group(1))
                
    return model_size, inference_time

In [24]:
df = pd.read_csv('../../analysis/experiment_values.csv', sep=';')
df.head()

Unnamed: 0,Task,Compression Method,Parameter,Value
0,Defect Prediction,No One,Avg Inference Time CUDA,0.01
1,Defect Prediction,Distillation (distilbert),Avg Inference Time CUDA,0.005
2,Defect Prediction,Quantization (quanto-qfloat8),Avg Inference Time CUDA,0.021
3,Defect Prediction,Quantization (quanto-qint8),Avg Inference Time CUDA,0.019
4,Defect Prediction,Quantization (quanto-qint4),Avg Inference Time CUDA,0.03


In [25]:
for logs in os.listdir('logs_graph'):
  model_size, inf_time = parse_log_file(f'logs_graph/{logs}')
  task = 'Defect Prediction Graph'
  if 'prune4' in logs:
    compression = 'Pruning 0.4'
  elif 'prune6' in logs:
    compression = 'Pruning 0.6'
  elif 'prune' in logs:
    compression = 'Pruning 0.2'
  elif 'quantf8' in logs:
    compression = 'Quantization (quanto-qfloat8)'
  elif 'quant4' in logs:
    compression = 'Quantization (quanto-qint4)'
  elif 'quant' in logs:
    compression = 'Quantization (quanto-qint8)'
  else:
    compression = 'No One'
  if 'nocuda' in logs:
    df = pd.concat([df, pd.DataFrame({
      'Task': [task], 'Compression Method': [compression], 'Parameter': ["Avg Inference Time CPU"], 'Value': [inf_time]})])
    df = pd.concat([df, pd.DataFrame({
      'Task': [task], 'Compression Method': [compression], 'Parameter': ["Model Size"], 'Value': [model_size]})])
  else:
    df = pd.concat([df, pd.DataFrame({
      'Task': [task], 'Compression Method': [compression], 'Parameter': ["Avg Inference Time CUDA"], 'Value': [inf_time]})])  
 

In [26]:
df.tail(n=25)

Unnamed: 0,Task,Compression Method,Parameter,Value
202,Defect Prediction Graph,No One,Model Size,498.655532
203,Defect Prediction Graph,No One,Accuracy,0.621157
204,Defect Prediction Graph,No One,F1,0.572137
205,Defect Prediction Graph,No One,MCC,0.23363
0,Defect Prediction Graph,Pruning 0.2,Avg Inference Time CUDA,0.011872
0,Defect Prediction Graph,Pruning 0.6,Avg Inference Time CPU,13.021046
0,Defect Prediction Graph,Pruning 0.6,Model Size,498.64844
0,Defect Prediction Graph,No One,Avg Inference Time CPU,12.902478
0,Defect Prediction Graph,No One,Model Size,498.64844
0,Defect Prediction Graph,Quantization (quanto-qint4),Avg Inference Time CPU,14.108944


In [27]:
df.to_csv('../../analysis/experiment_values.csv', index=False)