In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from utils import (
    load_ranking_dict, 
    get_text_ranks, 
    analyze_text_compression,
    compare_rankings,
    find_ranking_files,
    extract_reference_word_from_filename,
    batch_analyze_texts
)
import os
from typing import List, Dict
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✓ Libraries imported successfully")


In [None]:
# Find all available ranking files
ranking_files = find_ranking_files('.')

print(f"Found {len(ranking_files)} ranking files:")

ranking_info = []
for i, filepath in enumerate(ranking_files):
    filename = os.path.basename(filepath)
    ref_word = extract_reference_word_from_filename(filepath)
    model_type = 'BERT' if 'bert' in filename.lower() else 'OpenAI'
    file_size = os.path.getsize(filepath) / (1024 * 1024)  # MB
    
    ranking_info.append({
        'Index': i,
        'Filename': filename,
        'Model': model_type,
        'Reference Word': ref_word,
        'Size (MB)': f"{file_size:.2f}",
        'Path': filepath
    })
    
    print(f"  {i}: {filename} | {model_type} | Ref: '{ref_word}' | {file_size:.2f} MB")

# Convert to DataFrame for easier manipulation
ranking_df = pd.DataFrame(ranking_info)
ranking_df
