In [2]:
import os
import re
import ast

def convert_args_to_dict(dir_path):
    for file in os.listdir(dir_path):
        if file.endswith('.txt'):
            try:
                with open(os.path.join(dir_path, file), 'r') as f:
                    content = f.read().strip()

                # Remove outer braces { }
                if content.startswith("{") and content.endswith("}"):
                    content = content[1:-1]

                # Remove Namespace( ... ) or namespace( ... )
                if content.startswith("Namespace(") and content.endswith(")"):
                    content = content[len("Namespace("):-1]
                elif content.startswith("namespace(") and content.endswith(")"):
                    content = content[len("namespace("):-1]

                # ‚úÖ NEW: Handle **{...} syntax at the end
                extra_dict = {}
                if ', **{' in content:
                    # Split at the **{ pattern
                    main_content, extra_content = content.rsplit(', **{', 1)
                    # Add back the opening brace and ensure it ends with }
                    extra_content = '{' + extra_content
                    if not extra_content.endswith('}'):
                        extra_content += '}'
                    
                    try:
                        # Parse the extra dictionary
                        extra_dict = ast.literal_eval(extra_content)
                        print(f"üìã Found extra dict: {extra_dict}")
                    except Exception as e:
                        print(f"‚ö†Ô∏è Could not parse extra dict: {e}")
                    
                    # Use the main content for regular parsing
                    content = main_content

                # ‚úÖ FIXED: Better parsing - split and handle each key=value pair manually
                config_dict = {}
                
                # Split by comma but be careful with nested structures
                parts = []
                bracket_count = 0
                current_part = ""
                
                for char in content:
                    if char in '[({':
                        bracket_count += 1
                    elif char in '])}':
                        bracket_count -= 1
                    elif char == ',' and bracket_count == 0:
                        if current_part.strip():
                            parts.append(current_part.strip())
                        current_part = ""
                        continue
                    current_part += char
                
                if current_part.strip():
                    parts.append(current_part.strip())
                
                # Parse each part
                for part in parts:
                    if '=' in part:
                        key, value = part.split('=', 1)
                        key = key.strip()
                        value = value.strip()
                        
                        # Handle different value types
                        if value == 'None':
                            config_dict[key] = None
                        elif value == 'True':
                            config_dict[key] = True
                        elif value == 'False':
                            config_dict[key] = False
                        elif value.startswith("'") and value.endswith("'"):
                            config_dict[key] = value[1:-1]  # Remove quotes
                        elif value.startswith('"') and value.endswith('"'):
                            config_dict[key] = value[1:-1]  # Remove quotes
                        elif value.startswith('[') and value.endswith(']'):
                            # Handle lists
                            try:
                                config_dict[key] = ast.literal_eval(value)
                            except:
                                config_dict[key] = value
                        else:
                            try:
                                # Try to convert to number
                                if '.' in value:
                                    config_dict[key] = float(value)
                                else:
                                    config_dict[key] = int(value)
                            except ValueError:
                                # Keep as string
                                config_dict[key] = value
                
                # ‚úÖ NEW: Merge the extra dictionary into the main config
                config_dict.update(extra_dict)
                
                # ‚úÖ NEW: Map common sampling field names
                if 'sampling method' in config_dict:
                    config_dict['sampling'] = config_dict['sampling method']
                elif 'sampling_method' in config_dict:
                    config_dict['sampling'] = config_dict['sampling_method']
                elif 'sampling' not in config_dict:
                    config_dict['sampling'] = 'unknown'
                
                return config_dict
                
            except Exception as e:
                print(f"Failed to parse {file}: {e}")
                # ‚úÖ Return default instead of None
                return {'data_augment': 'unknown', 'sampling': 'unknown'}
    
    # ‚úÖ Return default if no .txt file found
    return {'data_augment': 'unknown', 'sampling': 'unknown'}

In [8]:
def analyze_strategy_results(root_dir, strategy, target_sampling):
    # Initialize results list within the function
    results = []
    
    for subdir in os.listdir(root_dir):
        parts = subdir.split('_')
        
        # Determine the actual strategy from directory name
        actual_strategy = None
        if len(parts) > 4 and parts[3] == 'Mixup' and parts[4] == 'DRW':
            # This is Mixup_DRW case: cifar10_exp_0.01_Mixup_DRW_200_...
            actual_strategy = 'Mixup_DRW'
        elif len(parts) > 3 and parts[3] == 'Mixup':
            # This is pure Mixup case: cifar10_exp_0.01_Mixup_200_...
            actual_strategy = 'Mixup'
        elif len(parts) > 3:
            # Other strategies like ERM, DRW, etc.
            actual_strategy = parts[3]
        
        # Check if this matches the requested strategy
        if actual_strategy == strategy:
            print(f"üìÅ Processing {strategy}: {subdir}")
            
            arguments = convert_args_to_dict(os.path.join(root_dir, subdir))
            if arguments is None:
                print(f"‚ö†Ô∏è Could not parse arguments for {subdir}")
                continue
            
            data_augment = arguments.get('data_augment', 'unknown')  
            file_sampling = arguments.get('sampling', 'unknown')
            
            # Print debug info
            print(f"    üìã Found sampling: {file_sampling}")
            print(f"    üéØ Target sampling: {target_sampling}")
            
            # ‚úÖ FIXED: Compare file_sampling with target_sampling
            if file_sampling == target_sampling:
                print(f"    ‚úÖ Sampling matches! Processing files...")
                
                for file in os.listdir(os.path.join(root_dir, subdir)):
                    # ‚úÖ FIXED: Look for different log file patterns
                    if file.startswith('log_test') and file.endswith('.csv'):
                        max_training_result = None
                        
                        print(f"üìÑ Processing: {subdir}/{file}")
                        
                        try:
                            with open(os.path.join(root_dir, subdir, file), 'r') as f:
                                epochs = 0
                                for line in f:
                                    train_match = re.search(r"Best Prec@1:\s+([\d.]+)", line)
                                    if train_match:
                                        value = float(train_match.group(1))
                                        epochs += 1
                                        if (max_training_result is None) or (value > max_training_result):
                                            max_training_result = value
                            
                            results.append({
                                "file": file,
                                "subdir": subdir,
                                "dataset": root_dir.split('_')[1],
                                "strategy": actual_strategy,
                                "augmentation": data_augment,
                                "max_training_result": max_training_result,
                                "epochs": epochs,
                                "sampling": file_sampling
                            })
                            
                            print(f"    ‚úÖ Max result: {max_training_result}")
                            
                        except Exception as e:
                            print(f"    ‚ùå Error processing {file}: {e}")
            else:
                print(f"    ‚è≠Ô∏è Skipping - sampling mismatch: {file_sampling} != {target_sampling}")
    
    # Return results and create summary
    print(f"\nüéâ PROCESSING COMPLETE!")
    print(f"üìä Found {len(results)} {strategy} experiments with {target_sampling} sampling")
    
    if results:
        # Convert to DataFrame for easier analysis
        import pandas as pd
        df = pd.DataFrame(results)
        print(f"\nüìã {strategy.upper()} RESULTS SUMMARY:")
        print(df)
        
        # Create directory and save results
        dataset = root_dir.split('_')[1]
        os.makedirs(f'latex/{dataset}/{strategy}', exist_ok=True)
        
        # ‚úÖ FIXED: Use target_sampling in filename
        output_file = f'latex/{dataset}/{strategy}/{target_sampling}_{strategy}_results.csv'
        df.to_csv(output_file, index=False)
        print(f"‚úÖ Saved to: {output_file}")

        filtered_df = df[(df['epochs']==300) | (df['epochs']>=200)]
        if not filtered_df.empty:
            stats = filtered_df.groupby('augmentation',dropna=True)['max_training_result'].agg([
            'mean',
            'std',
            'count',
            'min',
            'max'
            ]).round(2)
            print(stats)
            return stats
        else:
            print("‚ö†Ô∏è No results with 200 or 300 epochs found!")
            return None
    else:
        print(f"‚ö†Ô∏è No {strategy} experiments found with {target_sampling} sampling!")
        return None

print("\n" + "="*60)
print("üîç ANALYZING DRW RESULTS")
print("="*60)
df_drw = analyze_strategy_results(root_dir='log_cinic10', strategy='ERM' , target_sampling='WeightedFixedBatchSampler')


üîç ANALYZING DRW RESULTS

üéâ PROCESSING COMPLETE!
üìä Found 0 ERM experiments with WeightedFixedBatchSampler sampling
‚ö†Ô∏è No ERM experiments found with WeightedFixedBatchSampler sampling!
