In [None]:
import os
import re
import pandas as pd
import matplotlib.pyplot as plt

def extract_number_from_filename(filepath):
    """Extract number (training steps) from filename"""
    filename = os.path.basename(filepath)
    match = re.search(r'--(\d+)\.log$', filename)
    return match.group(1) if match else None

def find_last_percentage_in_log(filepath):
    """Find the last percentage value in the log file"""
    try:
        with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
            content = f.read()

        patterns = [r'\((\d+\.?\d*)%\)', r'(\d+\.?\d*)%']
        
        for pattern in patterns:
            matches = re.findall(pattern, content)
            if matches:
                return float(matches[-1])
        return None
    except Exception as e:
        print(f"Error reading file {filepath}: {e}")
        return None

print("Functions defined successfully!")


In [None]:

# Set folder path and process data
folder_path = "/home/user1/workspace/juyi/lisaopenvla/eval_logs/libero_goal"
# Set your log folder path
folder_path = input("Please enter log folder path: ") if 'folder_path' not in locals() else folder_path

# If you want to set the path directly, comment out the input line above and uncomment the line below
# folder_path = "/your/log/folder/path"

if not os.path.exists(folder_path):
    print(f"Error: Folder {folder_path} does not exist")
else:
    print(f"Processing folder: {folder_path}")
    
    results = []
    processed_files = 0
    
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.log'):
                filepath = os.path.join(root, file)
                file_number = extract_number_from_filename(filepath)
                
                if file_number is None:
                    continue
                    
                last_percentage = find_last_percentage_in_log(filepath)
                
                if last_percentage is None:
                    continue
                    
                results.append((int(file_number), last_percentage))
                processed_files += 1
    
    # Sort by steps
    results.sort(key=lambda x: x[0])
    
    print(f"Successfully processed {processed_files} log files")
    print(f"Valid data points: {len(results)}")

if 'results' in locals() and results:
    df = pd.DataFrame(results, columns=['Training Steps', 'Success Rate (%)'])
    
    print("Training Progress Data:")
    print("=" * 40)
    print(df.to_string(index=False))
    
    print(f"\nStatistics:")
    print(f"Minimum success rate: {df['Success Rate (%)'].min():.1f}%")
    print(f"Maximum success rate: {df['Success Rate (%)'].max():.1f}%")
    
    # Find training steps corresponding to maximum success rate
    max_rate_idx = df['Success Rate (%)'].idxmax()
    max_rate_steps = df.loc[max_rate_idx, 'Training Steps']
    print(f"Training steps with maximum success rate: {max_rate_steps}")

else:
    print("No valid data found")

if 'results' in locals() and results:
    steps = [x[0] for x in results]
    success_rates = [x[1] for x in results]
    
    plt.figure(figsize=(14, 8))
    
    plt.plot(steps, success_rates, 'b-o', linewidth=2.5, markersize=8, 
             markerfacecolor='lightblue', markeredgecolor='blue', markeredgewidth=1.5)
    
    # Set title and labels
    plt.title('Training Steps vs Success Rate', fontsize=18, fontweight='bold', pad=20)
    plt.xlabel('Training Steps', fontsize=14, fontweight='bold')
    plt.ylabel('Success Rate (%)', fontsize=14, fontweight='bold')
    
    plt.grid(True, alpha=0.3, linestyle='--')
    
    for step, rate in zip(steps, success_rates):
        plt.annotate(f'{rate}%', (step, rate), 
                    textcoords="offset points", xytext=(0,12), 
                    ha='center', fontsize=10, fontweight='bold',
                    bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.7))
    
    min_rate = min(success_rates)
    max_rate = max(success_rates)
    margin = (max_rate - min_rate) * 0.15
    plt.ylim(min_rate - margin, max_rate + margin)
    
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.tight_layout()
    
    plt.savefig(folder_path.split('/')[-1] + '.png', dpi=300, bbox_inches='tight')
    print(f"Chart saved as: {folder_path.split('/')[-1]}.png")
    
    plt.show()
    
else:
    print("No data available to plot chart")
