# Imports

In [None]:
from pathlib import Path
from glob import glob
import re
import pandas as pd
import numpy as np

# Paths

In [None]:
# If a plot was generated by the GUI, a logfile for each plot will be present in this directory

log_dir = r'C:\Users\user\.chia\mainnet\plotter'

In [None]:
logs = [log for log in glob(log_dir + '/*.txt')]

# Helper Funcs

In [None]:
size_lut = {
    'M': 1e6,
    'G': 1e9,
    'T': 1e12,
    'P': 1e15,
}


class ChiaLogException(Exception):
    pass


class ChiaLogParser:

    def __init__(self, path):

        with open(path, 'r') as fp:
            self.text = fp.read()

        if re.search(r'(Caught plotting error|RuntimeError)', self.text):
            raise ChiaLogException(f'Log "{Path(path).stem}.txt" contains an error.')

        if not re.search(r'Total time = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text):
            raise ChiaLogException(f'Log "{Path(path).stem}.txt" contains an error.')


    @property
    def get_info(self):
        d = {}
        d['temp_dir'] = re.search(r'Starting plotting progress into temporary dirs: ([\w:\\]+)', self.text).groups()[0]
        d['id'] = re.search(r'ID: ([0-9a-fA-F]+)', self.text).groups()[0]
        d['plot_size'] = float(re.search(r'Plot size is: (\d+)', self.text).groups()[0])
        d['memory'] = float(re.search(r'Buffer size is: (\d+)MiB', self.text).groups()[0])
        d['buckets'] = float(re.search(r'Using (\d+) buckets', self.text).groups()[0])
        d['threads'] = float(re.search(r'Using (\d+) threads of stripe size (\d+)', self.text).groups()[0])
        d['strip_size'] = float(re.search(r'Using (\d+) threads of stripe size (\d+)', self.text).groups()[1])
        d['phase_1_time'] = float(re.search(r'Time for phase 1 = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[0])
        d['phase_1_cpu'] = float(re.search(r'Time for phase 1 = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[1])/100
        d['phase_2_time'] = float(re.search(r'Time for phase 2 = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[0])
        d['phase_2_cpu'] = float(re.search(r'Time for phase 2 = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[1])/100
        d['phase_3_time'] = float(re.search(r'Time for phase 3 = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[0])
        d['phase_3_cpu'] = float(re.search(r'Time for phase 3 = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[1])/100
        d['phase_4_time'] = float(re.search(r'Time for phase 4 = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[0])
        d['phase_4_cpu'] = float(re.search(r'Time for phase 4 = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[1])/100

        file_size = re.search(r'Approximate working space used \(without final file\): (\d+\.\d+) ([MGTP])iB', self.text).groups()
        d['workin_space_size'] = float(file_size[0]) * size_lut[file_size[1]]
        file_size = re.search(r'Final File size: (\d+\.\d+) ([MGTP])iB', self.text).groups()
        d['final_file_size'] = float(file_size[0]) * size_lut[file_size[1]]

        d['total_time'] = float(re.search(r'Total time = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[0])
        d['total_cpu'] = float(re.search(r'Total time = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[1])/100
        try:
            d['copy_time'] = float(re.search(r'Copy time = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[0])
            d['copy_cpu'] = float(re.search(r'Copy time = (\d+\.\d+) seconds\. CPU \((\d+\.\d+)%\)', self.text).groups()[1])/100

        except AttributeError:
            pass
        
        return d    


# Parsing

# Stats

In [None]:
cps = []
for log in logs:
    try:
        cps.append(ChiaLogParser(log))
    except ChiaLogException:
        print(f'Could not read "{Path(log).stem}.txt".')

In [None]:
all = {f'log_{i}': log.get_info for i, log in enumerate(cps)}

In [None]:
df = pd.DataFrame(all)

In [None]:
df = df.T

In [None]:
df.columns

In [None]:
df.final_file_size.unique()

In [None]:
df = df.query("final_file_size > 1e10")

In [None]:
df

In [None]:
df2 = df[['plot_size', 'memory', 'buckets', 'threads',
       'strip_size', 'phase_1_time', 'phase_1_cpu', 'phase_2_time',
       'phase_2_cpu', 'phase_3_time', 'phase_3_cpu', 'phase_4_time',
       'phase_4_cpu', 'workin_space_size', 'final_file_size', 'total_time',
       'total_cpu', 'copy_time', 'copy_cpu']].copy().astype(float)

In [None]:
df2.describe()

# debug.log

Following code implies, that you enabled logging on INFO level. You can change this here: "C:\Users\user\.chia\mainnet\config\config.yaml" --> log_level: INFO

A restart of the chia node is required (plotting can continue).

In [None]:
debug_folder = r"C:\Users\user\.chia\mainnet\log"

In [None]:
files = glob(debug_folder + '/*')

In [None]:
text = ''
for file in files:
    with open(file, 'r') as fp:
        text += fp.read()

In [None]:
# This can take a while

x = re.findall(r'(\d)+ plots[\w ]+... Found (\d+) proofs. Time: (\d+\.\d+)', text)

In [None]:
data = [[int(i[0]), int(i[1]), float(i[2])] for i in x]

In [None]:
df = pd.DataFrame(data, columns=['eligable', 'proofs', 'time'])

In [None]:
df.hist()

# Play

Deep dive in the DataFrame (some examples)

In [None]:
df.describe()

In [None]:
df.groupby('eligable').size()

In [None]:
# if it takes more than 30s, the challenge will not be counted even if a proof is found.

df.loc[df.time > 30]

In [None]:
df.loc[df.eligable > 2]