In [2]:
!pip install pandas chromadb sentence-transformers tqdm



In [5]:
# Import necessary libraries
import pandas as pd
import numpy as np
import glob
import os
import re
from tqdm.notebook import tqdm
import chromadb
from sentence_transformers import SentenceTransformer

# Configure pandas and tqdm
pd.set_option('display.max_colwidth', None)
tqdm.pandas()

In [6]:
# ===================================================================
# MEMORY-EFFICIENT FAERS DATA PROCESSOR (CRASH-RESISTANT VERSION)
# ===================================================================
import pandas as pd
import glob
import os
import re
from tqdm.notebook import tqdm
import psutil
import gc
import time

# --- Configuration ---
base_directory = "/Users/deliciamagdaline/Desktop/Project/faers_menstrual_rag_project/data/raw"
CHUNK_SIZE = 10  # Process files in chunks of 10
MEMORY_THRESHOLD = 85  # Stop if memory usage exceeds 85%

def check_memory():
    """Check current memory usage"""
    memory_percent = psutil.virtual_memory().percent
    print(f"💾 Memory usage: {memory_percent:.1f}%")
    return memory_percent

def find_files(pattern):
    """Find files with case-insensitive search"""
    search_patterns = [
        f"{base_directory}/*/ascii/{pattern}",
        f"{base_directory}/*/ASCII/{pattern}",
    ]
    
    all_files = []
    for search_path in search_patterns:
        files = glob.glob(search_path)
        all_files.extend(files)
    
    # Remove duplicates and sort by year/quarter for consistent processing
    unique_files = sorted(list(set(all_files)))
    print(f"Found {len(unique_files)} unique files for pattern {pattern}")
    return unique_files

def process_single_file(file_path):
    """Process a single file with memory management"""
    try:
        # Extract metadata
        dir_name = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
        match = re.match(r'(\d{4})(q\d)', dir_name)
        year, quarter = (match.groups() if match else (None, None))
        
        # Try multiple delimiters and encodings
        for delimiter in ['$', '\t', '|']:
            for encoding in ['ISO-8859-1', 'utf-8', 'cp1252']:
                try:
                    df = pd.read_csv(
                        file_path, 
                        delimiter=delimiter, 
                        encoding=encoding,
                        on_bad_lines="skip", 
                        low_memory=False,
                        dtype=str  # Read everything as string to save memory
                    )
                    
                    # Check if we got reasonable data
                    if len(df) > 0 and len(df.columns) > 1:
                        df['year'] = year
                        df['quarter'] = quarter
                        df['source_file'] = os.path.basename(file_path)
                        
                        print(f"  ✅ {os.path.basename(file_path)}: {len(df)} rows, {len(df.columns)} cols")
                        return df
                        
                except Exception as e:
                    continue
        
        print(f"  ❌ Failed to read: {os.path.basename(file_path)}")
        return None
        
    except Exception as e:
        print(f"  ❌ Error processing {file_path}: {e}")
        return None

def combine_files_chunked(file_list, output_filename, chunk_size=CHUNK_SIZE):
    """Combine files in chunks to avoid memory crashes"""
    if not file_list:
        print(f"ERROR: No files found for {output_filename}")
        return None
    
    print(f"\n🔄 Processing {len(file_list)} files in chunks of {chunk_size}")
    print(f"📁 Output: {output_filename}")
    
    # Remove existing output file if it exists
    if os.path.exists(output_filename):
        os.remove(output_filename)
        print(f"🗑️  Removed existing {output_filename}")
    
    total_rows = 0
    header_written = False
    
    # Process files in chunks
    for i in range(0, len(file_list), chunk_size):
        chunk_files = file_list[i:i+chunk_size]
        chunk_num = (i // chunk_size) + 1
        total_chunks = (len(file_list) + chunk_size - 1) // chunk_size
        
        print(f"\n📦 Processing chunk {chunk_num}/{total_chunks} ({len(chunk_files)} files)")
        
        # Check memory before processing chunk
        memory_percent = check_memory()
        if memory_percent > MEMORY_THRESHOLD:
            print(f"⚠️  Memory usage too high ({memory_percent:.1f}%). Running garbage collection...")
            gc.collect()
            time.sleep(2)
        
        chunk_dfs = []
        
        # Process each file in the chunk
        for file_path in tqdm(chunk_files, desc=f"Chunk {chunk_num}"):
            df = process_single_file(file_path)
            if df is not None:
                chunk_dfs.append(df)
        
        # Combine chunk dataframes
        if chunk_dfs:
            print(f"  🔗 Combining {len(chunk_dfs)} dataframes from chunk {chunk_num}")
            chunk_combined = pd.concat(chunk_dfs, ignore_index=True, sort=False)
            
            # Append to output file
            mode = 'w' if not header_written else 'a'
            header = not header_written
            
            chunk_combined.to_csv(output_filename, mode=mode, header=header, index=False)
            
            chunk_rows = len(chunk_combined)
            total_rows += chunk_rows
            header_written = True
            
            print(f"  ✅ Chunk {chunk_num}: {chunk_rows} rows appended (Total: {total_rows})")
            
            # Clear memory
            del chunk_combined
            del chunk_dfs
            gc.collect()
        
        print(f"  💾 Memory after chunk {chunk_num}: {psutil.virtual_memory().percent:.1f}%")
    
    if total_rows > 0:
        print(f"\n🎉 Successfully created {output_filename} with {total_rows:,} total rows")
        return pd.read_csv(output_filename, nrows=0)  # Return just headers for validation
    else:
        print(f"\n❌ No data processed for {output_filename}")
        return None

def process_data_type(data_type, patterns, resume_from=None):
    """Process a single data type with resume capability"""
    print(f"\n{'='*80}")
    print(f"🚀 PROCESSING: {data_type.upper()}")
    print(f"{'='*80}")
    
    # Check if already processed
    output_files = {
        'demo_df': 'faers_demographics_combined.csv',
        'drug_df': 'faers_drugs_combined.csv',
        'reac_df': 'faers_reactions_combined.csv',
        'indi_df': 'faers_indications_combined.csv',
        'outc_df': 'faers_outcomes_combined.csv',
        'rpsr_df': 'faers_reports_combined.csv',
        'ther_df': 'faers_therapy_combined.csv'
    }
    
    output_filename = output_files.get(data_type, f'faers_{data_type}_combined.csv')
    
    if os.path.exists(output_filename) and resume_from != data_type:
        print(f"✅ {output_filename} already exists. Skipping...")
        try:
            df = pd.read_csv(output_filename, nrows=100)  # Just check first 100 rows
            print(f"   File appears valid with {len(df.columns)} columns")
            return df
        except:
            print(f"   File appears corrupted. Will reprocess...")
    
    # Find files
    all_files = []
    for pattern in patterns:
        files = find_files(pattern)
        all_files.extend(files)
    
    unique_files = sorted(list(set(all_files)))
    
    if not unique_files:
        print(f"❌ No files found for {data_type}")
        return None
    
    print(f"📊 Found {len(unique_files)} files to process")
    
    # Process files
    return combine_files_chunked(unique_files, output_filename)

# --- MAIN EXECUTION WITH RESUME CAPABILITY ---
def main(resume_from=None):
    """
    Main function with resume capability
    resume_from: Skip to this data type (e.g., 'drug_df' to resume from drugs)
    """
    print("🚀 MEMORY-EFFICIENT FAERS PROCESSOR")
    print("="*80)
    check_memory()
    
    file_types = {
        'demo_df': ['demo*.txt', 'DEMO*.txt'],
        'drug_df': ['drug*.txt', 'DRUG*.txt'],
        'reac_df': ['reac*.txt', 'REAC*.txt'],
        'indi_df': ['indi*.txt', 'INDI*.txt'],
        'outc_df': ['outc*.txt', 'OUTC*.txt'],
        'rpsr_df': ['rpsr*.txt', 'RPSR*.txt'],
        'ther_df': ['ther*.txt', 'THER*.txt']
    }
    
    data_frames = {}
    processing_order = list(file_types.keys())
    
    # Find start point if resuming
    start_idx = 0
    if resume_from and resume_from in processing_order:
        start_idx = processing_order.index(resume_from)
        print(f"🔄 RESUMING from {resume_from}")
    
    # Process each data type
    for data_type in processing_order[start_idx:]:
        patterns = file_types[data_type]
        
        try:
            result = process_data_type(data_type, patterns, resume_from)
            if result is not None:
                data_frames[data_type] = result
            
        except KeyboardInterrupt:
            print(f"\n⚠️  Processing interrupted at {data_type}")
            print(f"To resume, run: main(resume_from='{data_type}')")
            return data_frames
            
        except Exception as e:
            print(f"\n❌ Error processing {data_type}: {e}")
            print(f"To resume, run: main(resume_from='{data_type}')")
            continue
    
    print(f"\n🎉 ALL PROCESSING COMPLETE!")
    return data_frames

# Run the main function
if __name__ == "__main__":
    # To resume from a specific point, use: main(resume_from='drug_df')
    data_frames = main()
    
    print("\n📊 FINAL SUMMARY:")
    print("="*80)
    for name, df in data_frames.items():
        if df is not None:
            # Get actual file size
            output_files = {
                'demo_df': 'faers_demographics_combined.csv',
                'drug_df': 'faers_drugs_combined.csv',
                'reac_df': 'faers_reactions_combined.csv',
                'indi_df': 'faers_indications_combined.csv',
                'outc_df': 'faers_outcomes_combined.csv',
                'rpsr_df': 'faers_reports_combined.csv',
                'ther_df': 'faers_therapy_combined.csv'
            }
            filename = output_files[name]
            if os.path.exists(filename):
                size_mb = os.path.getsize(filename) / 1024 / 1024
                print(f"✅ {name}: {filename} ({size_mb:.1f} MB)")
        else:
            print(f"❌ {name}: Not processed")

🚀 MEMORY-EFFICIENT FAERS PROCESSOR
💾 Memory usage: 62.9%

🚀 PROCESSING: DEMO_DF
Found 96 unique files for pattern demo*.txt
Found 0 unique files for pattern DEMO*.txt
📊 Found 96 files to process

🔄 Processing 96 files in chunks of 10
📁 Output: faers_demographics_combined.csv

📦 Processing chunk 1/10 (10 files)
💾 Memory usage: 62.9%


Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo13q1.txt: 223016 rows, 25 cols
  ✅ demo13q1.txt: 223016 rows, 25 cols
  ✅ demo13q2.txt: 171764 rows, 25 cols
  ✅ demo13q2.txt: 171764 rows, 25 cols
  ✅ demo13q3.txt: 185569 rows, 25 cols
  ✅ demo13q3.txt: 185569 rows, 25 cols
  ✅ demo13q4.txt: 232247 rows, 25 cols
  ✅ demo13q4.txt: 232247 rows, 25 cols
  ✅ demo14q1.txt: 260057 rows, 25 cols
  ✅ demo14q1.txt: 260057 rows, 25 cols
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 2145306 rows appended (Total: 2145306)
  💾 Memory after chunk 1: 62.5%

📦 Processing chunk 2/10 (10 files)
💾 Memory usage: 62.5%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo14q2.txt: 223845 rows, 25 cols
  ✅ demo14q2.txt: 223845 rows, 25 cols
  ✅ demo14q3.txt: 211308 rows, 28 cols
  ✅ demo14q3.txt: 211308 rows, 28 cols
  ✅ demo14q4.txt: 207964 rows, 28 cols
  ✅ demo14q4.txt: 207964 rows, 28 cols
  ✅ demo15q1.txt: 317071 rows, 28 cols
  ✅ demo15q1.txt: 317071 rows, 28 cols
  ✅ demo15q2.txt: 289270 rows, 28 cols
  ✅ demo15q2.txt: 289270 rows, 28 cols
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 2498916 rows appended (Total: 4644222)
  💾 Memory after chunk 2: 60.8%

📦 Processing chunk 3/10 (10 files)
💾 Memory usage: 60.8%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo15q3.txt: 398860 rows, 28 cols
  ✅ demo15q3.txt: 398860 rows, 28 cols
  ✅ demo15q4.txt: 314704 rows, 28 cols
  ✅ demo15q4.txt: 314704 rows, 28 cols
  ✅ demo16q1.txt: 365682 rows, 28 cols
  ✅ demo16q1.txt: 365682 rows, 28 cols
  ✅ demo16q2.txt: 316056 rows, 28 cols
  ✅ demo16q2.txt: 316056 rows, 28 cols
  ✅ demo16q3.txt: 313613 rows, 28 cols
  ✅ demo16q3.txt: 313613 rows, 28 cols
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 3417830 rows appended (Total: 8062052)
  💾 Memory after chunk 3: 54.7%

📦 Processing chunk 4/10 (10 files)
💾 Memory usage: 54.7%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo16q4.txt: 309534 rows, 28 cols
  ✅ demo16q4.txt: 309534 rows, 28 cols
  ✅ demo17q1.txt: 352913 rows, 28 cols
  ✅ demo17q1.txt: 352913 rows, 28 cols
  ✅ demo17q2.txt: 337398 rows, 28 cols
  ✅ demo17q2.txt: 337398 rows, 28 cols
  ✅ demo17q3.txt: 351388 rows, 28 cols
  ✅ demo17q3.txt: 351388 rows, 28 cols
  ✅ demo17q4.txt: 327848 rows, 28 cols
  ✅ demo17q4.txt: 327848 rows, 28 cols
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 3358162 rows appended (Total: 11420214)
  💾 Memory after chunk 4: 54.8%

📦 Processing chunk 5/10 (10 files)
💾 Memory usage: 54.8%


Chunk 5:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo18q1_new.txt: 412702 rows, 28 cols
  ✅ demo18q1_new.txt: 412702 rows, 28 cols
  ✅ demo18q2.txt: 457169 rows, 28 cols
  ✅ demo18q2.txt: 457169 rows, 28 cols
  ✅ demo18q3.txt: 420915 rows, 28 cols
  ✅ demo18q3.txt: 420915 rows, 28 cols
  ✅ demo18q4.txt: 394066 rows, 28 cols
  ✅ demo18q4.txt: 394066 rows, 28 cols
  ✅ demo19q1.txt: 413734 rows, 28 cols
  ✅ demo19q1.txt: 413734 rows, 28 cols
  🔗 Combining 10 dataframes from chunk 5
  ✅ Chunk 5: 4197172 rows appended (Total: 15617386)
  💾 Memory after chunk 5: 57.3%

📦 Processing chunk 6/10 (10 files)
💾 Memory usage: 57.3%


Chunk 6:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo19q2.txt: 441108 rows, 28 cols
  ✅ demo19q2.txt: 441108 rows, 28 cols
  ✅ demo19q3.txt: 452873 rows, 28 cols
  ✅ demo19q3.txt: 452873 rows, 28 cols
  ✅ demo19q4.txt: 419581 rows, 28 cols
  ✅ demo19q4.txt: 419581 rows, 28 cols
  ✅ demo20q1.txt: 460327 rows, 28 cols
  ✅ demo20q1.txt: 460327 rows, 28 cols
  ✅ demo20q2.txt: 429227 rows, 28 cols
  ✅ demo20q2.txt: 429227 rows, 28 cols
  🔗 Combining 10 dataframes from chunk 6
  ✅ Chunk 6: 4406232 rows appended (Total: 20023618)
  💾 Memory after chunk 6: 56.6%

📦 Processing chunk 7/10 (10 files)
💾 Memory usage: 56.6%


Chunk 7:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo20q3.txt: 431667 rows, 28 cols
  ✅ demo20q3.txt: 431667 rows, 28 cols
  ✅ demo20q4.txt: 436148 rows, 28 cols
  ✅ demo20q4.txt: 436148 rows, 28 cols
  ✅ demo21q1.txt: 463741 rows, 28 cols
  ✅ demo21q1.txt: 463741 rows, 28 cols
  ✅ demo21q2.txt: 479945 rows, 28 cols
  ✅ demo21q2.txt: 479945 rows, 28 cols
  ✅ demo21q3.txt: 504160 rows, 28 cols
  ✅ demo21q3.txt: 504160 rows, 28 cols
  🔗 Combining 10 dataframes from chunk 7
  ✅ Chunk 7: 4631322 rows appended (Total: 24654940)
  💾 Memory after chunk 7: 58.7%

📦 Processing chunk 8/10 (10 files)
💾 Memory usage: 58.7%


Chunk 8:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo21q4.txt: 412542 rows, 28 cols
  ✅ demo21q4.txt: 412542 rows, 28 cols
  ✅ demo22q1.txt: 461623 rows, 28 cols
  ✅ demo22q1.txt: 461623 rows, 28 cols
  ✅ demo22q2.txt: 435618 rows, 28 cols
  ✅ demo22q2.txt: 435618 rows, 28 cols
  ✅ demo22q3.txt: 446511 rows, 28 cols
  ✅ demo22q3.txt: 446511 rows, 28 cols
  ✅ demo22q4.txt: 483643 rows, 28 cols
  ✅ demo22q4.txt: 483643 rows, 28 cols
  🔗 Combining 10 dataframes from chunk 8
  ✅ Chunk 8: 4479874 rows appended (Total: 29134814)
  💾 Memory after chunk 8: 55.9%

📦 Processing chunk 9/10 (10 files)
💾 Memory usage: 55.9%


Chunk 9:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo23q1.txt: 432144 rows, 28 cols
  ✅ demo23q1.txt: 432144 rows, 28 cols
  ✅ demo23q2.txt: 418592 rows, 28 cols
  ✅ demo23q2.txt: 418592 rows, 28 cols
  ✅ demo23q3.txt: 407522 rows, 28 cols
  ✅ demo23q3.txt: 407522 rows, 28 cols
  ✅ demo23q4.txt: 415379 rows, 28 cols
  ✅ demo23q4.txt: 415379 rows, 28 cols
  ✅ demo24q1.txt: 406184 rows, 28 cols
  ✅ demo24q1.txt: 406184 rows, 28 cols
  🔗 Combining 10 dataframes from chunk 9
  ✅ Chunk 9: 4159642 rows appended (Total: 33294456)
  💾 Memory after chunk 9: 54.5%

📦 Processing chunk 10/10 (6 files)
💾 Memory usage: 54.5%


Chunk 10:   0%|          | 0/6 [00:00<?, ?it/s]

  ✅ demo24q2.txt: 397119 rows, 28 cols
  ✅ demo24q2.txt: 397119 rows, 28 cols
  ✅ demo24q3.txt: 405513 rows, 28 cols
  ✅ demo24q3.txt: 405513 rows, 28 cols
  ✅ demo24q4.txt: 410849 rows, 28 cols
  ✅ demo24q4.txt: 410849 rows, 28 cols
  🔗 Combining 6 dataframes from chunk 10
  ✅ Chunk 10: 2426962 rows appended (Total: 35721418)
  💾 Memory after chunk 10: 58.4%

🎉 Successfully created faers_demographics_combined.csv with 35,721,418 total rows

🚀 PROCESSING: DRUG_DF
Found 96 unique files for pattern drug*.txt
Found 0 unique files for pattern DRUG*.txt
📊 Found 96 files to process

🔄 Processing 96 files in chunks of 10
📁 Output: faers_drugs_combined.csv

📦 Processing chunk 1/10 (10 files)
💾 Memory usage: 57.5%


Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug13q1.txt: 737725 rows, 22 cols
  ✅ drug13q1.txt: 737725 rows, 22 cols
  ✅ drug13q2.txt: 564548 rows, 22 cols
  ✅ drug13q2.txt: 564548 rows, 22 cols
  ✅ drug13q3.txt: 607921 rows, 22 cols
  ✅ drug13q3.txt: 607921 rows, 22 cols
  ✅ drug13q4.txt: 738501 rows, 22 cols
  ✅ drug13q4.txt: 738501 rows, 22 cols
  ✅ drug14q1.txt: 901608 rows, 22 cols
  ✅ drug14q1.txt: 901608 rows, 22 cols
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 7100606 rows appended (Total: 7100606)
  💾 Memory after chunk 1: 51.8%

📦 Processing chunk 2/10 (10 files)
💾 Memory usage: 51.8%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug14q2.txt: 683829 rows, 22 cols
  ✅ drug14q2.txt: 683829 rows, 22 cols
  ✅ drug14q3.txt: 688140 rows, 23 cols
  ✅ drug14q3.txt: 688140 rows, 23 cols
  ✅ drug14q4.txt: 682292 rows, 23 cols
  ✅ drug14q4.txt: 682292 rows, 23 cols
  ✅ drug15q1.txt: 1082833 rows, 23 cols
  ✅ drug15q1.txt: 1082833 rows, 23 cols
  ✅ drug15q2.txt: 967120 rows, 23 cols
  ✅ drug15q2.txt: 967120 rows, 23 cols
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 8208428 rows appended (Total: 15309034)
  💾 Memory after chunk 2: 53.0%

📦 Processing chunk 3/10 (10 files)
💾 Memory usage: 53.0%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug15q3.txt: 1294625 rows, 23 cols
  ✅ drug15q3.txt: 1294625 rows, 23 cols
  ✅ drug15q4.txt: 1064332 rows, 23 cols
  ✅ drug15q4.txt: 1064332 rows, 23 cols
  ✅ drug16q1.txt: 1202390 rows, 23 cols
  ✅ drug16q1.txt: 1202390 rows, 23 cols
  ✅ drug16q2.txt: 1144148 rows, 23 cols
  ✅ drug16q2.txt: 1144148 rows, 23 cols
  ✅ drug16q3.txt: 1257054 rows, 23 cols
  ✅ drug16q3.txt: 1257054 rows, 23 cols
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 11925098 rows appended (Total: 27234132)
  💾 Memory after chunk 3: 51.1%

📦 Processing chunk 4/10 (10 files)
💾 Memory usage: 51.1%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug16q4.txt: 1147547 rows, 23 cols
  ✅ drug16q4.txt: 1147547 rows, 23 cols
  ✅ drug17q1.txt: 1234393 rows, 23 cols
  ✅ drug17q1.txt: 1234393 rows, 23 cols
  ✅ drug17q2.txt: 1188078 rows, 23 cols
  ✅ drug17q2.txt: 1188078 rows, 23 cols
  ✅ drug17q3.txt: 1220118 rows, 23 cols
  ✅ drug17q3.txt: 1220118 rows, 23 cols
  ✅ drug17q4.txt: 1213842 rows, 23 cols
  ✅ drug17q4.txt: 1213842 rows, 23 cols
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 12007956 rows appended (Total: 39242088)
  💾 Memory after chunk 4: 51.6%

📦 Processing chunk 5/10 (10 files)
💾 Memory usage: 51.5%


Chunk 5:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug18q1.txt: 1686461 rows, 23 cols
  ✅ drug18q1.txt: 1686461 rows, 23 cols
  ✅ drug18q2.txt: 1987872 rows, 23 cols
  ✅ drug18q2.txt: 1987872 rows, 23 cols
  ✅ drug18q3.txt: 1651966 rows, 23 cols
  ✅ drug18q3.txt: 1651966 rows, 23 cols
  ✅ drug18q4.txt: 1546835 rows, 23 cols
  ✅ drug18q4.txt: 1546835 rows, 23 cols
  ✅ drug19q1.txt: 1648987 rows, 23 cols
  ✅ drug19q1.txt: 1648987 rows, 23 cols
  🔗 Combining 10 dataframes from chunk 5
  ✅ Chunk 5: 17044242 rows appended (Total: 56286330)
  💾 Memory after chunk 5: 43.2%

📦 Processing chunk 6/10 (10 files)
💾 Memory usage: 43.2%


Chunk 6:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug19q2.txt: 1863010 rows, 23 cols
  ✅ drug19q2.txt: 1863010 rows, 23 cols
  ✅ drug19q3.txt: 1982442 rows, 23 cols
  ✅ drug19q3.txt: 1982442 rows, 23 cols
  ✅ drug19q4.txt: 1714177 rows, 23 cols
  ✅ drug19q4.txt: 1714177 rows, 23 cols
  ✅ drug20q1.txt: 1943532 rows, 23 cols
  ✅ drug20q1.txt: 1943532 rows, 23 cols
  ✅ drug20q2.txt: 1825414 rows, 23 cols
  ✅ drug20q2.txt: 1825414 rows, 23 cols
  🔗 Combining 10 dataframes from chunk 6
  ✅ Chunk 6: 18657150 rows appended (Total: 74943480)
  💾 Memory after chunk 6: 45.0%

📦 Processing chunk 7/10 (10 files)
💾 Memory usage: 45.0%


Chunk 7:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug20q3.txt: 1895153 rows, 23 cols
  ✅ drug20q3.txt: 1895153 rows, 23 cols
  ✅ drug20q4.txt: 1918927 rows, 23 cols
  ✅ drug20q4.txt: 1918927 rows, 23 cols
  ✅ drug21q1.txt: 2208416 rows, 23 cols
  ✅ drug21q1.txt: 2208416 rows, 23 cols
  ✅ drug21q2.txt: 2291903 rows, 23 cols
  ✅ drug21q2.txt: 2291903 rows, 23 cols
  ✅ drug21q3.txt: 2260570 rows, 23 cols
  ✅ drug21q3.txt: 2260570 rows, 23 cols
  🔗 Combining 10 dataframes from chunk 7
  ✅ Chunk 7: 21149938 rows appended (Total: 96093418)
  💾 Memory after chunk 7: 46.2%

📦 Processing chunk 8/10 (10 files)
💾 Memory usage: 46.1%


Chunk 8:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug21q4.txt: 1778675 rows, 23 cols
  ✅ drug21q4.txt: 1778675 rows, 23 cols
  ✅ drug22q1.txt: 1994171 rows, 23 cols
  ✅ drug22q1.txt: 1994171 rows, 23 cols
  ✅ drug22q2.txt: 1828103 rows, 23 cols
  ✅ drug22q2.txt: 1828103 rows, 23 cols
  ✅ drug22q3.txt: 1835461 rows, 23 cols
  ✅ drug22q3.txt: 1835461 rows, 23 cols
  ✅ drug22q4.txt: 2006967 rows, 23 cols
  ✅ drug22q4.txt: 2006967 rows, 23 cols
  🔗 Combining 10 dataframes from chunk 8
  ✅ Chunk 8: 18886754 rows appended (Total: 114980172)
  💾 Memory after chunk 8: 56.2%

📦 Processing chunk 9/10 (10 files)
💾 Memory usage: 56.2%


Chunk 9:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug23q1.txt: 1899503 rows, 23 cols
  ✅ drug23q1.txt: 1899503 rows, 23 cols
  ✅ drug23q2.txt: 1885096 rows, 23 cols
  ✅ drug23q2.txt: 1885096 rows, 23 cols
  ✅ drug23q3.txt: 1768391 rows, 23 cols
  ✅ drug23q3.txt: 1768391 rows, 23 cols
  ✅ drug23q4.txt: 1920732 rows, 23 cols
  ✅ drug23q4.txt: 1920732 rows, 23 cols
  ✅ drug24q1.txt: 1909327 rows, 23 cols
  ✅ drug24q1.txt: 1909327 rows, 23 cols
  🔗 Combining 10 dataframes from chunk 9
  ✅ Chunk 9: 18766098 rows appended (Total: 133746270)
  💾 Memory after chunk 9: 49.6%

📦 Processing chunk 10/10 (6 files)
💾 Memory usage: 49.5%


Chunk 10:   0%|          | 0/6 [00:00<?, ?it/s]

  ✅ drug24q2.txt: 1888937 rows, 23 cols
  ✅ drug24q2.txt: 1888937 rows, 23 cols
  ✅ drug24q3.txt: 1907293 rows, 23 cols
  ✅ drug24q3.txt: 1907293 rows, 23 cols
  ✅ drug24q4.txt: 2030938 rows, 23 cols
  ✅ drug24q4.txt: 2030938 rows, 23 cols
  🔗 Combining 6 dataframes from chunk 10
  ✅ Chunk 10: 11654336 rows appended (Total: 145400606)
  💾 Memory after chunk 10: 44.6%

🎉 Successfully created faers_drugs_combined.csv with 145,400,606 total rows

🚀 PROCESSING: REAC_DF
Found 96 unique files for pattern reac*.txt
Found 0 unique files for pattern REAC*.txt
📊 Found 96 files to process

🔄 Processing 96 files in chunks of 10
📁 Output: faers_reactions_combined.csv

📦 Processing chunk 1/10 (10 files)
💾 Memory usage: 41.4%


Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac13q1.txt: 658002 rows, 6 cols
  ✅ reac13q1.txt: 658002 rows, 6 cols
  ✅ reac13q2.txt: 544835 rows, 6 cols
  ✅ reac13q2.txt: 544835 rows, 6 cols
  ✅ reac13q3.txt: 584833 rows, 6 cols
  ✅ reac13q3.txt: 584833 rows, 6 cols
  ✅ reac13q4.txt: 690690 rows, 6 cols
  ✅ reac13q4.txt: 690690 rows, 6 cols
  ✅ reac14q1.txt: 754668 rows, 6 cols
  ✅ reac14q1.txt: 754668 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 6466056 rows appended (Total: 6466056)
  💾 Memory after chunk 1: 44.0%

📦 Processing chunk 2/10 (10 files)
💾 Memory usage: 43.9%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac14q2.txt: 652959 rows, 6 cols
  ✅ reac14q2.txt: 652959 rows, 6 cols
  ✅ reac14q3.txt: 667778 rows, 7 cols
  ✅ reac14q3.txt: 667778 rows, 7 cols
  ✅ reac14q4.txt: 635465 rows, 7 cols
  ✅ reac14q4.txt: 635465 rows, 7 cols
  ✅ reac15q1.txt: 872848 rows, 7 cols
  ✅ reac15q1.txt: 872848 rows, 7 cols
  ✅ reac15q2.txt: 845134 rows, 7 cols
  ✅ reac15q2.txt: 845134 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 7348368 rows appended (Total: 13814424)
  💾 Memory after chunk 2: 48.8%

📦 Processing chunk 3/10 (10 files)
💾 Memory usage: 48.7%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac15q3.txt: 1133196 rows, 7 cols
  ✅ reac15q3.txt: 1133196 rows, 7 cols
  ✅ reac15q4.txt: 968664 rows, 7 cols
  ✅ reac15q4.txt: 968664 rows, 7 cols
  ✅ reac16q1.txt: 1013744 rows, 7 cols
  ✅ reac16q1.txt: 1013744 rows, 7 cols
  ✅ reac16q2.txt: 914638 rows, 7 cols
  ✅ reac16q2.txt: 914638 rows, 7 cols
  ✅ reac16q3.txt: 936744 rows, 7 cols
  ✅ reac16q3.txt: 936744 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 9933972 rows appended (Total: 23748396)
  💾 Memory after chunk 3: 59.6%

📦 Processing chunk 4/10 (10 files)
💾 Memory usage: 59.6%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac16q4.txt: 916308 rows, 7 cols
  ✅ reac16q4.txt: 916308 rows, 7 cols
  ✅ reac17q1.txt: 1041509 rows, 7 cols
  ✅ reac17q1.txt: 1041509 rows, 7 cols
  ✅ reac17q2.txt: 987728 rows, 7 cols
  ✅ reac17q2.txt: 987728 rows, 7 cols
  ✅ reac17q3.txt: 1025153 rows, 7 cols
  ✅ reac17q3.txt: 1025153 rows, 7 cols
  ✅ reac17q4.txt: 961640 rows, 7 cols
  ✅ reac17q4.txt: 961640 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 9864676 rows appended (Total: 33613072)
  💾 Memory after chunk 4: 57.7%

📦 Processing chunk 5/10 (10 files)
💾 Memory usage: 57.7%


Chunk 5:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac18q1.txt: 1206259 rows, 7 cols
  ✅ reac18q1.txt: 1206259 rows, 7 cols
  ✅ reac18q2.txt: 1392439 rows, 7 cols
  ✅ reac18q2.txt: 1392439 rows, 7 cols
  ✅ reac18q3.txt: 1329530 rows, 7 cols
  ✅ reac18q3.txt: 1329530 rows, 7 cols
  ✅ reac18q4.txt: 1250978 rows, 7 cols
  ✅ reac18q4.txt: 1250978 rows, 7 cols
  ✅ reac19q1.txt: 1303532 rows, 7 cols
  ✅ reac19q1.txt: 1303532 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 5
  ✅ Chunk 5: 12965476 rows appended (Total: 46578548)
  💾 Memory after chunk 5: 65.5%

📦 Processing chunk 6/10 (10 files)
💾 Memory usage: 65.4%


Chunk 6:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac19q2.txt: 1408482 rows, 7 cols
  ✅ reac19q2.txt: 1408482 rows, 7 cols
  ✅ reac19q3.txt: 1504790 rows, 7 cols
  ✅ reac19q3.txt: 1504790 rows, 7 cols
  ✅ reac19q4.txt: 1359922 rows, 7 cols
  ✅ reac19q4.txt: 1359922 rows, 7 cols
  ✅ reac20q1.txt: 1517264 rows, 7 cols
  ✅ reac20q1.txt: 1517264 rows, 7 cols
  ✅ reac20q2.txt: 1437285 rows, 7 cols
  ✅ reac20q2.txt: 1437285 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 6
  ✅ Chunk 6: 14455486 rows appended (Total: 61034034)
  💾 Memory after chunk 6: 57.0%

📦 Processing chunk 7/10 (10 files)
💾 Memory usage: 57.0%


Chunk 7:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac20q3.txt: 1454044 rows, 7 cols
  ✅ reac20q3.txt: 1454044 rows, 7 cols
  ✅ reac20q4.txt: 1522657 rows, 7 cols
  ✅ reac20q4.txt: 1522657 rows, 7 cols
  ✅ reac21q1.txt: 1505167 rows, 7 cols
  ✅ reac21q1.txt: 1505167 rows, 7 cols
  ✅ reac21q2.txt: 1526544 rows, 7 cols
  ✅ reac21q2.txt: 1526544 rows, 7 cols
  ✅ reac21q3.txt: 1544374 rows, 7 cols
  ✅ reac21q3.txt: 1544374 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 7
  ✅ Chunk 7: 15105572 rows appended (Total: 76139606)
  💾 Memory after chunk 7: 57.4%

📦 Processing chunk 8/10 (10 files)
💾 Memory usage: 57.4%


Chunk 8:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac21q4.txt: 1355734 rows, 7 cols
  ✅ reac21q4.txt: 1355734 rows, 7 cols
  ✅ reac22q1.txt: 1543059 rows, 7 cols
  ✅ reac22q1.txt: 1543059 rows, 7 cols
  ✅ reac22q2.txt: 1464627 rows, 7 cols
  ✅ reac22q2.txt: 1464627 rows, 7 cols
  ✅ reac22q3.txt: 1449509 rows, 7 cols
  ✅ reac22q3.txt: 1449509 rows, 7 cols
  ✅ reac22q4.txt: 1617584 rows, 7 cols
  ✅ reac22q4.txt: 1617584 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 8
  ✅ Chunk 8: 14861026 rows appended (Total: 91000632)
  💾 Memory after chunk 8: 55.6%

📦 Processing chunk 9/10 (10 files)
💾 Memory usage: 55.5%


Chunk 9:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac23q1.txt: 1491473 rows, 7 cols
  ✅ reac23q1.txt: 1491473 rows, 7 cols
  ✅ reac23q2.txt: 1478973 rows, 7 cols
  ✅ reac23q2.txt: 1478973 rows, 7 cols
  ✅ reac23q3.txt: 1373338 rows, 7 cols
  ✅ reac23q3.txt: 1373338 rows, 7 cols
  ✅ reac23q4.txt: 1500033 rows, 7 cols
  ✅ reac23q4.txt: 1500033 rows, 7 cols
  ✅ reac24q1.txt: 1445416 rows, 7 cols
  ✅ reac24q1.txt: 1445416 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 9
  ✅ Chunk 9: 14578466 rows appended (Total: 105579098)
  💾 Memory after chunk 9: 58.0%

📦 Processing chunk 10/10 (6 files)
💾 Memory usage: 58.0%


Chunk 10:   0%|          | 0/6 [00:00<?, ?it/s]

  ✅ reac24q2.txt: 1445044 rows, 7 cols
  ✅ reac24q2.txt: 1445044 rows, 7 cols
  ✅ reac24q3.txt: 1431718 rows, 7 cols
  ✅ reac24q3.txt: 1431718 rows, 7 cols
  ✅ reac24q4.txt: 1472750 rows, 7 cols
  ✅ reac24q4.txt: 1472750 rows, 7 cols
  🔗 Combining 6 dataframes from chunk 10
  ✅ Chunk 10: 8699024 rows appended (Total: 114278122)
  💾 Memory after chunk 10: 57.0%

🎉 Successfully created faers_reactions_combined.csv with 114,278,122 total rows

🚀 PROCESSING: INDI_DF
Found 0 unique files for pattern indi*.txt
Found 96 unique files for pattern INDI*.txt
📊 Found 96 files to process

🔄 Processing 96 files in chunks of 10
📁 Output: faers_indications_combined.csv

📦 Processing chunk 1/10 (10 files)
💾 Memory usage: 56.4%


Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI13Q1.txt: 453964 rows, 7 cols
  ✅ INDI13Q1.txt: 453964 rows, 7 cols
  ✅ INDI13Q2.txt: 330405 rows, 7 cols
  ✅ INDI13Q2.txt: 330405 rows, 7 cols
  ✅ INDI13Q3.txt: 369227 rows, 7 cols
  ✅ INDI13Q3.txt: 369227 rows, 7 cols
  ✅ INDI13Q4.txt: 432190 rows, 7 cols
  ✅ INDI13Q4.txt: 432190 rows, 7 cols
  ✅ INDI14Q1.txt: 581696 rows, 7 cols
  ✅ INDI14Q1.txt: 581696 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 4334964 rows appended (Total: 4334964)
  💾 Memory after chunk 1: 63.4%

📦 Processing chunk 2/10 (10 files)
💾 Memory usage: 63.4%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI14Q2.txt: 420506 rows, 7 cols
  ✅ INDI14Q2.txt: 420506 rows, 7 cols
  ✅ INDI14Q3.txt: 423391 rows, 7 cols
  ✅ INDI14Q3.txt: 423391 rows, 7 cols
  ✅ INDI14Q4.txt: 436621 rows, 7 cols
  ✅ INDI14Q4.txt: 436621 rows, 7 cols
  ✅ INDI15Q1.txt: 748742 rows, 7 cols
  ✅ INDI15Q1.txt: 748742 rows, 7 cols
  ✅ INDI15Q2.txt: 659529 rows, 7 cols
  ✅ INDI15Q2.txt: 659529 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 5377578 rows appended (Total: 9712542)
  💾 Memory after chunk 2: 64.1%

📦 Processing chunk 3/10 (10 files)
💾 Memory usage: 64.1%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI15Q3.txt: 811279 rows, 7 cols
  ✅ INDI15Q3.txt: 811279 rows, 7 cols
  ✅ INDI15Q4.txt: 718340 rows, 7 cols
  ✅ INDI15Q4.txt: 718340 rows, 7 cols
  ✅ INDI16Q1.txt: 878267 rows, 7 cols
  ✅ INDI16Q1.txt: 878267 rows, 7 cols
  ✅ INDI16Q2.txt: 798489 rows, 7 cols
  ✅ INDI16Q2.txt: 798489 rows, 7 cols
  ✅ INDI16Q3.txt: 816898 rows, 7 cols
  ✅ INDI16Q3.txt: 816898 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 8046546 rows appended (Total: 17759088)
  💾 Memory after chunk 3: 65.6%

📦 Processing chunk 4/10 (10 files)
💾 Memory usage: 65.6%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI16Q4.txt: 755832 rows, 7 cols
  ✅ INDI16Q4.txt: 755832 rows, 7 cols
  ✅ INDI17Q1.txt: 859833 rows, 7 cols
  ✅ INDI17Q1.txt: 859833 rows, 7 cols
  ✅ INDI17Q2.txt: 816254 rows, 7 cols
  ✅ INDI17Q2.txt: 816254 rows, 7 cols
  ✅ INDI17Q3.txt: 844974 rows, 7 cols
  ✅ INDI17Q3.txt: 844974 rows, 7 cols
  ✅ INDI17Q4.txt: 863690 rows, 7 cols
  ✅ INDI17Q4.txt: 863690 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 8281166 rows appended (Total: 26040254)
  💾 Memory after chunk 4: 66.6%

📦 Processing chunk 5/10 (10 files)
💾 Memory usage: 66.5%


Chunk 5:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI18Q1.txt: 1253118 rows, 7 cols
  ✅ INDI18Q1.txt: 1253118 rows, 7 cols
  ✅ INDI18Q2.txt: 1428287 rows, 7 cols
  ✅ INDI18Q2.txt: 1428287 rows, 7 cols
  ✅ INDI18Q3.txt: 1134135 rows, 7 cols
  ✅ INDI18Q3.txt: 1134135 rows, 7 cols
  ✅ INDI18Q4.txt: 1064664 rows, 7 cols
  ✅ INDI18Q4.txt: 1064664 rows, 7 cols
  ✅ INDI19Q1.txt: 1106754 rows, 7 cols
  ✅ INDI19Q1.txt: 1106754 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 5
  ✅ Chunk 5: 11973916 rows appended (Total: 38014170)
  💾 Memory after chunk 5: 67.1%

📦 Processing chunk 6/10 (10 files)
💾 Memory usage: 67.1%


Chunk 6:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI19Q2.txt: 1221162 rows, 7 cols
  ✅ INDI19Q2.txt: 1221162 rows, 7 cols
  ✅ INDI19Q3.txt: 1353011 rows, 7 cols
  ✅ INDI19Q3.txt: 1353011 rows, 7 cols
  ✅ INDI19Q4.txt: 1146938 rows, 7 cols
  ✅ INDI19Q4.txt: 1146938 rows, 7 cols
  ✅ INDI20Q1.txt: 1348658 rows, 7 cols
  ✅ INDI20Q1.txt: 1348658 rows, 7 cols
  ✅ INDI20Q2.txt: 1294010 rows, 7 cols
  ✅ INDI20Q2.txt: 1294010 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 6
  ✅ Chunk 6: 12727558 rows appended (Total: 50741728)
  💾 Memory after chunk 6: 47.4%

📦 Processing chunk 7/10 (10 files)
💾 Memory usage: 47.3%


Chunk 7:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI20Q3.txt: 1276348 rows, 7 cols
  ✅ INDI20Q3.txt: 1276348 rows, 7 cols
  ✅ INDI20Q4.txt: 1297446 rows, 7 cols
  ✅ INDI20Q4.txt: 1297446 rows, 7 cols
  ✅ INDI21Q1.txt: 1603039 rows, 7 cols
  ✅ INDI21Q1.txt: 1603039 rows, 7 cols
  ✅ INDI21Q2.txt: 1659378 rows, 7 cols
  ✅ INDI21Q2.txt: 1659378 rows, 7 cols
  ✅ INDI21Q3.txt: 1588679 rows, 7 cols
  ✅ INDI21Q3.txt: 1588679 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 7
  ✅ Chunk 7: 14849780 rows appended (Total: 65591508)
  💾 Memory after chunk 7: 57.0%

📦 Processing chunk 8/10 (10 files)
💾 Memory usage: 57.0%


Chunk 8:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI21Q4.txt: 1234766 rows, 7 cols
  ✅ INDI21Q4.txt: 1234766 rows, 7 cols
  ✅ INDI22Q1.txt: 1347146 rows, 7 cols
  ✅ INDI22Q1.txt: 1347146 rows, 7 cols
  ✅ INDI22Q2.txt: 1150299 rows, 7 cols
  ✅ INDI22Q2.txt: 1150299 rows, 7 cols
  ✅ INDI22Q3.txt: 1159203 rows, 7 cols
  ✅ INDI22Q3.txt: 1159203 rows, 7 cols
  ✅ INDI22Q4.txt: 1321489 rows, 7 cols
  ✅ INDI22Q4.txt: 1321489 rows, 7 cols
  🔗 Combining 10 dataframes from chunk 8
  ✅ Chunk 8: 12425806 rows appended (Total: 78017314)
  💾 Memory after chunk 8: 61.0%

📦 Processing chunk 9/10 (10 files)
💾 Memory usage: 61.0%


Chunk 9:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI23Q1.txt: 1176237 rows, 7 cols
  ✅ INDI23Q1.txt: 1176237 rows, 7 cols
  ❌ Failed to read: INDI23Q2.txt
  ✅ INDI23Q2.txt: 1165782 rows, 7 cols
  ✅ INDI23Q3.txt: 1063761 rows, 7 cols
  ✅ INDI23Q3.txt: 1063761 rows, 7 cols
  ✅ INDI23Q4.txt: 1115961 rows, 7 cols
  ✅ INDI23Q4.txt: 1115961 rows, 7 cols
  ✅ INDI24Q1.txt: 1186115 rows, 7 cols
  ✅ INDI24Q1.txt: 1186115 rows, 7 cols
  🔗 Combining 9 dataframes from chunk 9
  ✅ Chunk 9: 10249930 rows appended (Total: 88267244)
  💾 Memory after chunk 9: 63.3%

📦 Processing chunk 10/10 (6 files)
💾 Memory usage: 63.3%


Chunk 10:   0%|          | 0/6 [00:00<?, ?it/s]

  ✅ INDI24Q2.txt: 1187626 rows, 7 cols
  ✅ INDI24Q2.txt: 1187626 rows, 7 cols
  ✅ INDI24Q3.txt: 1177133 rows, 7 cols
  ✅ INDI24Q3.txt: 1177133 rows, 7 cols
  ✅ INDI24Q4.txt: 1219759 rows, 7 cols
  ✅ INDI24Q4.txt: 1219759 rows, 7 cols
  🔗 Combining 6 dataframes from chunk 10
  ✅ Chunk 10: 7169036 rows appended (Total: 95436280)
  💾 Memory after chunk 10: 64.4%

🎉 Successfully created faers_indications_combined.csv with 95,436,280 total rows

🚀 PROCESSING: OUTC_DF
Found 0 unique files for pattern outc*.txt
Found 96 unique files for pattern OUTC*.txt
📊 Found 96 files to process

🔄 Processing 96 files in chunks of 10
📁 Output: faers_outcomes_combined.csv

📦 Processing chunk 1/10 (10 files)
💾 Memory usage: 64.1%


Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC13Q1.txt: 153733 rows, 6 cols
  ✅ OUTC13Q1.txt: 153733 rows, 6 cols
  ✅ OUTC13Q2.txt: 138634 rows, 6 cols
  ✅ OUTC13Q2.txt: 138634 rows, 6 cols
  ✅ OUTC13Q3.txt: 147472 rows, 6 cols
  ✅ OUTC13Q3.txt: 147472 rows, 6 cols
  ✅ OUTC13Q4.txt: 154451 rows, 6 cols
  ✅ OUTC13Q4.txt: 154451 rows, 6 cols
  ✅ OUTC14Q1.txt: 170437 rows, 6 cols
  ✅ OUTC14Q1.txt: 170437 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 1529454 rows appended (Total: 1529454)
  💾 Memory after chunk 1: 64.3%

📦 Processing chunk 2/10 (10 files)
💾 Memory usage: 64.3%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC14Q2.txt: 178144 rows, 6 cols
  ✅ OUTC14Q2.txt: 178144 rows, 6 cols
  ✅ OUTC14Q3.txt: 169754 rows, 6 cols
  ✅ OUTC14Q3.txt: 169754 rows, 6 cols
  ✅ OUTC14Q4.txt: 167401 rows, 6 cols
  ✅ OUTC14Q4.txt: 167401 rows, 6 cols
  ✅ OUTC15Q1.txt: 238008 rows, 6 cols
  ✅ OUTC15Q1.txt: 238008 rows, 6 cols
  ✅ OUTC15Q2.txt: 200423 rows, 6 cols
  ✅ OUTC15Q2.txt: 200423 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 1907460 rows appended (Total: 3436914)
  💾 Memory after chunk 2: 65.0%

📦 Processing chunk 3/10 (10 files)
💾 Memory usage: 65.0%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC15Q3.txt: 213108 rows, 6 cols
  ✅ OUTC15Q3.txt: 213108 rows, 6 cols
  ✅ OUTC15Q4.txt: 205080 rows, 6 cols
  ✅ OUTC15Q4.txt: 205080 rows, 6 cols
  ✅ OUTC16Q1.txt: 210280 rows, 6 cols
  ✅ OUTC16Q1.txt: 210280 rows, 6 cols
  ✅ OUTC16Q2.txt: 218745 rows, 6 cols
  ✅ OUTC16Q2.txt: 218745 rows, 6 cols
  ✅ OUTC16Q3.txt: 221346 rows, 6 cols
  ✅ OUTC16Q3.txt: 221346 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 2137118 rows appended (Total: 5574032)
  💾 Memory after chunk 3: 65.3%

📦 Processing chunk 4/10 (10 files)
💾 Memory usage: 65.3%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC16Q4.txt: 220468 rows, 6 cols
  ✅ OUTC16Q4.txt: 220468 rows, 6 cols
  ✅ OUTC17Q1.txt: 242274 rows, 6 cols
  ✅ OUTC17Q1.txt: 242274 rows, 6 cols
  ✅ OUTC17Q2.txt: 232053 rows, 6 cols
  ✅ OUTC17Q2.txt: 232053 rows, 6 cols
  ✅ OUTC17Q3.txt: 233715 rows, 6 cols
  ✅ OUTC17Q3.txt: 233715 rows, 6 cols
  ✅ OUTC17Q4.txt: 243176 rows, 6 cols
  ✅ OUTC17Q4.txt: 243176 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 2343372 rows appended (Total: 7917404)
  💾 Memory after chunk 4: 65.5%

📦 Processing chunk 5/10 (10 files)
💾 Memory usage: 65.4%


Chunk 5:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC18Q1.txt: 298710 rows, 6 cols
  ✅ OUTC18Q1.txt: 298710 rows, 6 cols
  ✅ OUTC18Q2.txt: 331947 rows, 6 cols
  ✅ OUTC18Q2.txt: 331947 rows, 6 cols
  ✅ OUTC18Q3.txt: 295817 rows, 6 cols
  ✅ OUTC18Q3.txt: 295817 rows, 6 cols
  ✅ OUTC18Q4.txt: 299135 rows, 6 cols
  ✅ OUTC18Q4.txt: 299135 rows, 6 cols
  ✅ OUTC19Q1.txt: 310662 rows, 6 cols
  ✅ OUTC19Q1.txt: 310662 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 5
  ✅ Chunk 5: 3072542 rows appended (Total: 10989946)
  💾 Memory after chunk 5: 58.9%

📦 Processing chunk 6/10 (10 files)
💾 Memory usage: 58.9%


Chunk 6:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC19Q2.txt: 324738 rows, 6 cols
  ✅ OUTC19Q2.txt: 324738 rows, 6 cols
  ✅ OUTC19Q3.txt: 323787 rows, 6 cols
  ✅ OUTC19Q3.txt: 323787 rows, 6 cols
  ✅ OUTC19Q4.txt: 321603 rows, 6 cols
  ✅ OUTC19Q4.txt: 321603 rows, 6 cols
  ✅ OUTC20Q1.txt: 335470 rows, 6 cols
  ✅ OUTC20Q1.txt: 335470 rows, 6 cols
  ✅ OUTC20Q2.txt: 307509 rows, 6 cols
  ✅ OUTC20Q2.txt: 307509 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 6
  ✅ Chunk 6: 3226214 rows appended (Total: 14216160)
  💾 Memory after chunk 6: 61.4%

📦 Processing chunk 7/10 (10 files)
💾 Memory usage: 61.4%


Chunk 7:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC20Q3.txt: 358815 rows, 6 cols
  ✅ OUTC20Q3.txt: 358815 rows, 6 cols
  ✅ OUTC20Q4.txt: 365575 rows, 6 cols
  ✅ OUTC20Q4.txt: 365575 rows, 6 cols
  ✅ OUTC21Q1.txt: 371698 rows, 6 cols
  ✅ OUTC21Q1.txt: 371698 rows, 6 cols
  ✅ OUTC21Q2.txt: 383928 rows, 6 cols
  ✅ OUTC21Q2.txt: 383928 rows, 6 cols
  ✅ OUTC21Q3.txt: 420729 rows, 6 cols
  ✅ OUTC21Q3.txt: 420729 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 7
  ✅ Chunk 7: 3801490 rows appended (Total: 18017650)
  💾 Memory after chunk 7: 62.0%

📦 Processing chunk 8/10 (10 files)
💾 Memory usage: 62.0%


Chunk 8:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC21Q4.txt: 337168 rows, 6 cols
  ✅ OUTC21Q4.txt: 337168 rows, 6 cols
  ✅ OUTC22Q1.txt: 375497 rows, 6 cols
  ✅ OUTC22Q1.txt: 375497 rows, 6 cols
  ✅ OUTC22Q2.txt: 325309 rows, 6 cols
  ✅ OUTC22Q2.txt: 325309 rows, 6 cols
  ✅ OUTC22Q3.txt: 345763 rows, 6 cols
  ✅ OUTC22Q3.txt: 345763 rows, 6 cols
  ✅ OUTC22Q4.txt: 334611 rows, 6 cols
  ✅ OUTC22Q4.txt: 334611 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 8
  ✅ Chunk 8: 3436696 rows appended (Total: 21454346)
  💾 Memory after chunk 8: 62.3%

📦 Processing chunk 9/10 (10 files)
💾 Memory usage: 62.3%


Chunk 9:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC23Q1.txt: 309217 rows, 6 cols
  ✅ OUTC23Q1.txt: 309217 rows, 6 cols
  ✅ OUTC23Q2.txt: 303513 rows, 6 cols
  ✅ OUTC23Q2.txt: 303513 rows, 6 cols
  ✅ OUTC23Q3.txt: 307396 rows, 6 cols
  ✅ OUTC23Q3.txt: 307396 rows, 6 cols
  ✅ OUTC23Q4.txt: 327797 rows, 6 cols
  ✅ OUTC23Q4.txt: 327797 rows, 6 cols
  ✅ OUTC24Q1.txt: 295044 rows, 6 cols
  ✅ OUTC24Q1.txt: 295044 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 9
  ✅ Chunk 9: 3085934 rows appended (Total: 24540280)
  💾 Memory after chunk 9: 62.8%

📦 Processing chunk 10/10 (6 files)
💾 Memory usage: 62.8%


Chunk 10:   0%|          | 0/6 [00:00<?, ?it/s]

  ✅ OUTC24Q2.txt: 291572 rows, 6 cols
  ✅ OUTC24Q2.txt: 291572 rows, 6 cols
  ✅ OUTC24Q3.txt: 288275 rows, 6 cols
  ✅ OUTC24Q3.txt: 288275 rows, 6 cols
  ✅ OUTC24Q4.txt: 308960 rows, 6 cols
  ✅ OUTC24Q4.txt: 308960 rows, 6 cols
  🔗 Combining 6 dataframes from chunk 10
  ✅ Chunk 10: 1777614 rows appended (Total: 26317894)
  💾 Memory after chunk 10: 62.9%

🎉 Successfully created faers_outcomes_combined.csv with 26,317,894 total rows

🚀 PROCESSING: RPSR_DF
Found 0 unique files for pattern rpsr*.txt
Found 96 unique files for pattern RPSR*.txt
📊 Found 96 files to process

🔄 Processing 96 files in chunks of 10
📁 Output: faers_reports_combined.csv

📦 Processing chunk 1/10 (10 files)
💾 Memory usage: 62.8%


Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR13Q1.txt: 54000 rows, 6 cols
  ✅ RPSR13Q1.txt: 54000 rows, 6 cols
  ✅ RPSR13Q2.txt: 27142 rows, 6 cols
  ✅ RPSR13Q2.txt: 27142 rows, 6 cols
  ✅ RPSR13Q3.txt: 24861 rows, 6 cols
  ✅ RPSR13Q3.txt: 24861 rows, 6 cols
  ✅ RPSR13Q4.txt: 26575 rows, 6 cols
  ✅ RPSR13Q4.txt: 26575 rows, 6 cols
  ✅ RPSR14Q1.txt: 37839 rows, 6 cols
  ✅ RPSR14Q1.txt: 37839 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 340834 rows appended (Total: 340834)
  💾 Memory after chunk 1: 62.9%

📦 Processing chunk 2/10 (10 files)
💾 Memory usage: 62.9%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR14Q2.txt: 22556 rows, 6 cols
  ✅ RPSR14Q2.txt: 22556 rows, 6 cols
  ✅ RPSR14Q3.txt: 24337 rows, 6 cols
  ✅ RPSR14Q3.txt: 24337 rows, 6 cols
  ✅ RPSR14Q4.txt: 23333 rows, 6 cols
  ✅ RPSR14Q4.txt: 23333 rows, 6 cols
  ✅ RPSR15Q1.txt: 28234 rows, 6 cols
  ✅ RPSR15Q1.txt: 28234 rows, 6 cols
  ✅ RPSR15Q2.txt: 20463 rows, 6 cols
  ✅ RPSR15Q2.txt: 20463 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 237846 rows appended (Total: 578680)
  💾 Memory after chunk 2: 62.9%

📦 Processing chunk 3/10 (10 files)
💾 Memory usage: 62.9%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR15Q3.txt: 12075 rows, 6 cols
  ✅ RPSR15Q3.txt: 12075 rows, 6 cols
  ✅ RPSR15Q4.txt: 5688 rows, 6 cols
  ✅ RPSR15Q4.txt: 5688 rows, 6 cols
  ✅ RPSR16Q1.txt: 6480 rows, 6 cols
  ✅ RPSR16Q1.txt: 6480 rows, 6 cols
  ✅ RPSR16Q2.txt: 9437 rows, 6 cols
  ✅ RPSR16Q2.txt: 9437 rows, 6 cols
  ✅ RPSR16Q3.txt: 7356 rows, 6 cols
  ✅ RPSR16Q3.txt: 7356 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 82072 rows appended (Total: 660752)
  💾 Memory after chunk 3: 63.2%

📦 Processing chunk 4/10 (10 files)
💾 Memory usage: 63.2%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR16Q4.txt: 10886 rows, 6 cols
  ✅ RPSR16Q4.txt: 10886 rows, 6 cols
  ✅ RPSR17Q1.txt: 13426 rows, 6 cols
  ✅ RPSR17Q1.txt: 13426 rows, 6 cols
  ✅ RPSR17Q2.txt: 15768 rows, 6 cols
  ✅ RPSR17Q2.txt: 15768 rows, 6 cols
  ✅ RPSR17Q3.txt: 15478 rows, 6 cols
  ✅ RPSR17Q3.txt: 15478 rows, 6 cols
  ✅ RPSR17Q4.txt: 15363 rows, 6 cols
  ✅ RPSR17Q4.txt: 15363 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 141842 rows appended (Total: 802594)
  💾 Memory after chunk 4: 63.9%

📦 Processing chunk 5/10 (10 files)
💾 Memory usage: 63.9%


Chunk 5:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR18Q1.txt: 19433 rows, 6 cols
  ✅ RPSR18Q1.txt: 19433 rows, 6 cols
  ✅ RPSR18Q2.txt: 15860 rows, 6 cols
  ✅ RPSR18Q2.txt: 15860 rows, 6 cols
  ✅ RPSR18Q3.txt: 23783 rows, 6 cols
  ✅ RPSR18Q3.txt: 23783 rows, 6 cols
  ✅ RPSR18Q4.txt: 21075 rows, 6 cols
  ✅ RPSR18Q4.txt: 21075 rows, 6 cols
  ✅ RPSR19Q1.txt: 16018 rows, 6 cols
  ✅ RPSR19Q1.txt: 16018 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 5
  ✅ Chunk 5: 192338 rows appended (Total: 994932)
  💾 Memory after chunk 5: 64.1%

📦 Processing chunk 6/10 (10 files)
💾 Memory usage: 64.1%


Chunk 6:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR19Q2.txt: 23643 rows, 6 cols
  ✅ RPSR19Q2.txt: 23643 rows, 6 cols
  ✅ RPSR19Q3.txt: 20652 rows, 6 cols
  ✅ RPSR19Q3.txt: 20652 rows, 6 cols
  ✅ RPSR19Q4.txt: 23774 rows, 6 cols
  ✅ RPSR19Q4.txt: 23774 rows, 6 cols
  ✅ RPSR20Q1.txt: 15492 rows, 6 cols
  ✅ RPSR20Q1.txt: 15492 rows, 6 cols
  ✅ RPSR20Q2.txt: 13094 rows, 6 cols
  ✅ RPSR20Q2.txt: 13094 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 6
  ✅ Chunk 6: 193310 rows appended (Total: 1188242)
  💾 Memory after chunk 6: 63.8%

📦 Processing chunk 7/10 (10 files)
💾 Memory usage: 63.8%


Chunk 7:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR20Q3.txt: 17281 rows, 6 cols
  ✅ RPSR20Q3.txt: 17281 rows, 6 cols
  ✅ RPSR20Q4.txt: 14477 rows, 6 cols
  ✅ RPSR20Q4.txt: 14477 rows, 6 cols
  ✅ RPSR21Q1.txt: 14046 rows, 6 cols
  ✅ RPSR21Q1.txt: 14046 rows, 6 cols
  ✅ RPSR21Q2.txt: 13123 rows, 6 cols
  ✅ RPSR21Q2.txt: 13123 rows, 6 cols
  ✅ RPSR21Q3.txt: 16855 rows, 6 cols
  ✅ RPSR21Q3.txt: 16855 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 7
  ✅ Chunk 7: 151564 rows appended (Total: 1339806)
  💾 Memory after chunk 7: 64.2%

📦 Processing chunk 8/10 (10 files)
💾 Memory usage: 64.2%


Chunk 8:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR21Q4.txt: 4936 rows, 6 cols
  ✅ RPSR21Q4.txt: 4936 rows, 6 cols
  ✅ RPSR22Q1.txt: 13091 rows, 6 cols
  ✅ RPSR22Q1.txt: 13091 rows, 6 cols
  ✅ RPSR22Q2.txt: 13867 rows, 6 cols
  ✅ RPSR22Q2.txt: 13867 rows, 6 cols
  ✅ RPSR22Q3.txt: 14727 rows, 6 cols
  ✅ RPSR22Q3.txt: 14727 rows, 6 cols
  ✅ RPSR22Q4.txt: 14398 rows, 6 cols
  ✅ RPSR22Q4.txt: 14398 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 8
  ✅ Chunk 8: 122038 rows appended (Total: 1461844)
  💾 Memory after chunk 8: 64.2%

📦 Processing chunk 9/10 (10 files)
💾 Memory usage: 64.2%


Chunk 9:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR23Q1.txt: 13851 rows, 6 cols
  ✅ RPSR23Q1.txt: 13851 rows, 6 cols
  ✅ RPSR23Q2.txt: 13884 rows, 6 cols
  ✅ RPSR23Q2.txt: 13884 rows, 6 cols
  ✅ RPSR23Q3.txt: 11524 rows, 6 cols
  ✅ RPSR23Q3.txt: 11524 rows, 6 cols
  ✅ RPSR23Q4.txt: 13238 rows, 6 cols
  ✅ RPSR23Q4.txt: 13238 rows, 6 cols
  ✅ RPSR24Q1.txt: 12381 rows, 6 cols
  ✅ RPSR24Q1.txt: 12381 rows, 6 cols
  🔗 Combining 10 dataframes from chunk 9
  ✅ Chunk 9: 129756 rows appended (Total: 1591600)
  💾 Memory after chunk 9: 64.3%

📦 Processing chunk 10/10 (6 files)
💾 Memory usage: 64.3%


Chunk 10:   0%|          | 0/6 [00:00<?, ?it/s]

  ✅ RPSR24Q2.txt: 11517 rows, 6 cols
  ✅ RPSR24Q2.txt: 11517 rows, 6 cols
  ✅ RPSR24Q3.txt: 10087 rows, 6 cols
  ✅ RPSR24Q3.txt: 10087 rows, 6 cols
  ✅ RPSR24Q4.txt: 11627 rows, 6 cols
  ✅ RPSR24Q4.txt: 11627 rows, 6 cols
  🔗 Combining 6 dataframes from chunk 10
  ✅ Chunk 10: 66462 rows appended (Total: 1658062)
  💾 Memory after chunk 10: 64.3%

🎉 Successfully created faers_reports_combined.csv with 1,658,062 total rows

🚀 PROCESSING: THER_DF
Found 0 unique files for pattern ther*.txt
Found 96 unique files for pattern THER*.txt
📊 Found 96 files to process

🔄 Processing 96 files in chunks of 10
📁 Output: faers_therapy_combined.csv

📦 Processing chunk 1/10 (10 files)
💾 Memory usage: 64.3%


Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER13Q1.txt: 281940 rows, 10 cols
  ✅ THER13Q1.txt: 281940 rows, 10 cols
  ✅ THER13Q2.txt: 237513 rows, 10 cols
  ✅ THER13Q2.txt: 237513 rows, 10 cols
  ✅ THER13Q3.txt: 251263 rows, 10 cols
  ✅ THER13Q3.txt: 251263 rows, 10 cols
  ✅ THER13Q4.txt: 291315 rows, 10 cols
  ✅ THER13Q4.txt: 291315 rows, 10 cols
  ✅ THER14Q1.txt: 329817 rows, 10 cols
  ✅ THER14Q1.txt: 329817 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 2783696 rows appended (Total: 2783696)
  💾 Memory after chunk 1: 63.4%

📦 Processing chunk 2/10 (10 files)
💾 Memory usage: 63.4%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER14Q2.txt: 266586 rows, 10 cols
  ✅ THER14Q2.txt: 266586 rows, 10 cols
  ✅ THER14Q3.txt: 270268 rows, 10 cols
  ✅ THER14Q3.txt: 270268 rows, 10 cols
  ✅ THER14Q4.txt: 270468 rows, 10 cols
  ✅ THER14Q4.txt: 270468 rows, 10 cols
  ✅ THER15Q1.txt: 374935 rows, 10 cols
  ✅ THER15Q1.txt: 374935 rows, 10 cols
  ✅ THER15Q2.txt: 373850 rows, 10 cols
  ✅ THER15Q2.txt: 373850 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 3112214 rows appended (Total: 5895910)
  💾 Memory after chunk 2: 63.6%

📦 Processing chunk 3/10 (10 files)
💾 Memory usage: 63.6%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER15Q3.txt: 494584 rows, 10 cols
  ✅ THER15Q3.txt: 494584 rows, 10 cols
  ✅ THER15Q4.txt: 401725 rows, 10 cols
  ✅ THER15Q4.txt: 401725 rows, 10 cols
  ✅ THER16Q1.txt: 458946 rows, 10 cols
  ✅ THER16Q1.txt: 458946 rows, 10 cols
  ✅ THER16Q2.txt: 464701 rows, 10 cols
  ✅ THER16Q2.txt: 464701 rows, 10 cols
  ✅ THER16Q3.txt: 483082 rows, 10 cols
  ✅ THER16Q3.txt: 483082 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 4606076 rows appended (Total: 10501986)
  💾 Memory after chunk 3: 63.4%

📦 Processing chunk 4/10 (10 files)
💾 Memory usage: 63.4%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER16Q4.txt: 433545 rows, 10 cols
  ✅ THER16Q4.txt: 433545 rows, 10 cols
  ✅ THER17Q1.txt: 455411 rows, 10 cols
  ✅ THER17Q1.txt: 455411 rows, 10 cols
  ✅ THER17Q2.txt: 454078 rows, 10 cols
  ✅ THER17Q2.txt: 454078 rows, 10 cols
  ✅ THER17Q3.txt: 443213 rows, 10 cols
  ✅ THER17Q3.txt: 443213 rows, 10 cols
  ✅ THER17Q4.txt: 442433 rows, 10 cols
  ✅ THER17Q4.txt: 442433 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 4457360 rows appended (Total: 14959346)
  💾 Memory after chunk 4: 63.8%

📦 Processing chunk 5/10 (10 files)
💾 Memory usage: 63.8%


Chunk 5:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER18Q1.txt: 551394 rows, 10 cols
  ✅ THER18Q1.txt: 551394 rows, 10 cols
  ✅ THER18Q2.txt: 688053 rows, 10 cols
  ✅ THER18Q2.txt: 688053 rows, 10 cols
  ✅ THER18Q3.txt: 663319 rows, 10 cols
  ✅ THER18Q3.txt: 663319 rows, 10 cols
  ✅ THER18Q4.txt: 620308 rows, 10 cols
  ✅ THER18Q4.txt: 620308 rows, 10 cols
  ✅ THER19Q1.txt: 627355 rows, 10 cols
  ✅ THER19Q1.txt: 627355 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 5
  ✅ Chunk 5: 6300858 rows appended (Total: 21260204)
  💾 Memory after chunk 5: 63.3%

📦 Processing chunk 6/10 (10 files)
💾 Memory usage: 63.3%


Chunk 6:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER19Q2.txt: 747535 rows, 10 cols
  ✅ THER19Q2.txt: 747535 rows, 10 cols
  ✅ THER19Q3.txt: 776039 rows, 10 cols
  ✅ THER19Q3.txt: 776039 rows, 10 cols
  ✅ THER19Q4.txt: 643525 rows, 10 cols
  ✅ THER19Q4.txt: 643525 rows, 10 cols
  ✅ THER20Q1.txt: 728199 rows, 10 cols
  ✅ THER20Q1.txt: 728199 rows, 10 cols
  ✅ THER20Q2.txt: 636959 rows, 10 cols
  ✅ THER20Q2.txt: 636959 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 6
  ✅ Chunk 6: 7064514 rows appended (Total: 28324718)
  💾 Memory after chunk 6: 63.3%

📦 Processing chunk 7/10 (10 files)
💾 Memory usage: 63.3%


Chunk 7:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER20Q3.txt: 646462 rows, 10 cols
  ✅ THER20Q3.txt: 646462 rows, 10 cols
  ✅ THER20Q4.txt: 681411 rows, 10 cols
  ✅ THER20Q4.txt: 681411 rows, 10 cols
  ✅ THER21Q1.txt: 786472 rows, 10 cols
  ✅ THER21Q1.txt: 786472 rows, 10 cols
  ✅ THER21Q2.txt: 789968 rows, 10 cols
  ✅ THER21Q2.txt: 789968 rows, 10 cols
  ✅ THER21Q3.txt: 848661 rows, 10 cols
  ✅ THER21Q3.txt: 848661 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 7
  ✅ Chunk 7: 7505948 rows appended (Total: 35830666)
  💾 Memory after chunk 7: 63.3%

📦 Processing chunk 8/10 (10 files)
💾 Memory usage: 63.3%


Chunk 8:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER21Q4.txt: 671045 rows, 10 cols
  ✅ THER21Q4.txt: 671045 rows, 10 cols
  ✅ THER22Q1.txt: 748899 rows, 10 cols
  ✅ THER22Q1.txt: 748899 rows, 10 cols
  ✅ THER22Q2.txt: 690828 rows, 10 cols
  ✅ THER22Q2.txt: 690828 rows, 10 cols
  ✅ THER22Q3.txt: 717902 rows, 10 cols
  ✅ THER22Q3.txt: 717902 rows, 10 cols
  ✅ THER22Q4.txt: 726767 rows, 10 cols
  ✅ THER22Q4.txt: 726767 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 8
  ✅ Chunk 8: 7110882 rows appended (Total: 42941548)
  💾 Memory after chunk 8: 53.8%

📦 Processing chunk 9/10 (10 files)
💾 Memory usage: 53.8%


Chunk 9:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER23Q1.txt: 683408 rows, 10 cols
  ✅ THER23Q1.txt: 683408 rows, 10 cols
  ✅ THER23Q2.txt: 678128 rows, 10 cols
  ✅ THER23Q2.txt: 678128 rows, 10 cols
  ✅ THER23Q3.txt: 593027 rows, 10 cols
  ✅ THER23Q3.txt: 593027 rows, 10 cols
  ✅ THER23Q4.txt: 633087 rows, 10 cols
  ✅ THER23Q4.txt: 633087 rows, 10 cols
  ✅ THER24Q1.txt: 594449 rows, 10 cols
  ✅ THER24Q1.txt: 594449 rows, 10 cols
  🔗 Combining 10 dataframes from chunk 9
  ✅ Chunk 9: 6364198 rows appended (Total: 49305746)
  💾 Memory after chunk 9: 56.3%

📦 Processing chunk 10/10 (6 files)
💾 Memory usage: 56.3%


Chunk 10:   0%|          | 0/6 [00:00<?, ?it/s]

  ✅ THER24Q2.txt: 539334 rows, 10 cols
  ✅ THER24Q2.txt: 539334 rows, 10 cols
  ✅ THER24Q3.txt: 532854 rows, 10 cols
  ✅ THER24Q3.txt: 532854 rows, 10 cols
  ✅ THER24Q4.txt: 561889 rows, 10 cols
  ✅ THER24Q4.txt: 561889 rows, 10 cols
  🔗 Combining 6 dataframes from chunk 10
  ✅ Chunk 10: 3268154 rows appended (Total: 52573900)
  💾 Memory after chunk 10: 57.8%

🎉 Successfully created faers_therapy_combined.csv with 52,573,900 total rows

🎉 ALL PROCESSING COMPLETE!

📊 FINAL SUMMARY:
✅ demo_df: faers_demographics_combined.csv (5507.9 MB)
✅ drug_df: faers_drugs_combined.csv (16206.8 MB)
✅ reac_df: faers_reactions_combined.csv (6326.7 MB)
✅ indi_df: faers_indications_combined.csv (6124.1 MB)
✅ outc_df: faers_outcomes_combined.csv (1076.3 MB)
✅ rpsr_df: faers_reports_combined.csv (67.9 MB)
✅ ther_df: faers_therapy_combined.csv (2837.6 MB)


In [7]:
# ===================================================================
# MEMORY-EFFICIENT FAERS DATA PROCESSOR (2020-2024 ONLY)
# ===================================================================
import pandas as pd
import glob
import os
import re
from tqdm.notebook import tqdm
import psutil
import gc
import time

# --- Configuration ---
base_directory = "/Users/deliciamagdaline/Desktop/Project/faers_menstrual_rag_project/data/raw"
CHUNK_SIZE = 10  # Process files in chunks of 10
MEMORY_THRESHOLD = 85  # Stop if memory usage exceeds 85%
TARGET_YEARS = [2020, 2021, 2022, 2023, 2024]  # 🆕 ONLY THESE YEARS

def check_memory():
    """Check current memory usage"""
    memory_percent = psutil.virtual_memory().percent
    print(f"💾 Memory usage: {memory_percent:.1f}%")
    return memory_percent

def filter_files_by_year(file_list, target_years=TARGET_YEARS):
    """Filter files to only include target years"""
    filtered_files = []
    
    for file_path in file_list:
        dir_name = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
        match = re.match(r'(\d{4})(q\d)', dir_name)
        
        if match:
            year = int(match.group(1))
            if year in target_years:
                filtered_files.append(file_path)
                print(f"  ✅ Including: {os.path.basename(file_path)} ({year})")
            else:
                print(f"  ⏭️  Skipping: {os.path.basename(file_path)} ({year}) - outside target range")
        else:
            print(f"  ❓ Could not extract year from: {file_path}")
    
    print(f"\n📊 Filtered {len(file_list)} files → {len(filtered_files)} files ({min(target_years)}-{max(target_years)})")
    return filtered_files

def find_files(pattern):
    """Find files with case-insensitive search and year filtering"""
    search_patterns = [
        f"{base_directory}/*/ascii/{pattern}",
        f"{base_directory}/*/ASCII/{pattern}",
    ]
    
    all_files = []
    for search_path in search_patterns:
        files = glob.glob(search_path)
        all_files.extend(files)
    
    # Remove duplicates
    unique_files = sorted(list(set(all_files)))
    print(f"Found {len(unique_files)} total files for pattern {pattern}")
    
    # 🆕 FILTER BY YEAR BEFORE PROCESSING
    filtered_files = filter_files_by_year(unique_files, TARGET_YEARS)
    
    return filtered_files

def process_single_file(file_path):
    """Process a single file with memory management"""
    try:
        # Extract metadata
        dir_name = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
        match = re.match(r'(\d{4})(q\d)', dir_name)
        year, quarter = (match.groups() if match else (None, None))
        
        # 🆕 DOUBLE-CHECK YEAR (safety net)
        if year and int(year) not in TARGET_YEARS:
            print(f"  ⏭️  Skipping {os.path.basename(file_path)} - year {year} not in target range")
            return None
        
        # Try multiple delimiters and encodings
        for delimiter in ['$', '\t', '|']:
            for encoding in ['ISO-8859-1', 'utf-8', 'cp1252']:
                try:
                    df = pd.read_csv(
                        file_path, 
                        delimiter=delimiter, 
                        encoding=encoding,
                        on_bad_lines="skip", 
                        low_memory=False,
                        dtype=str  # Read everything as string to save memory
                    )
                    
                    # Check if we got reasonable data
                    if len(df) > 0 and len(df.columns) > 1:
                        df['year'] = year
                        df['quarter'] = quarter
                        df['source_file'] = os.path.basename(file_path)
                        
                        print(f"  ✅ {os.path.basename(file_path)}: {len(df)} rows, {len(df.columns)} cols ({year})")
                        return df
                        
                except Exception as e:
                    continue
        
        print(f"  ❌ Failed to read: {os.path.basename(file_path)}")
        return None
        
    except Exception as e:
        print(f"  ❌ Error processing {file_path}: {e}")
        return None

def combine_files_chunked(file_list, output_filename, chunk_size=CHUNK_SIZE):
    """Combine files in chunks to avoid memory crashes"""
    if not file_list:
        print(f"ERROR: No files found for {output_filename}")
        return None
    
    print(f"\n🔄 Processing {len(file_list)} files in chunks of {chunk_size}")
    print(f"📁 Output: {output_filename}")
    
    # 🆕 ADD YEAR RANGE TO FILENAME
    year_range = f"_{min(TARGET_YEARS)}_{max(TARGET_YEARS)}"
    output_filename = output_filename.replace('.csv', f'{year_range}.csv')
    
    # Remove existing output file if it exists
    if os.path.exists(output_filename):
        os.remove(output_filename)
        print(f"🗑️  Removed existing {output_filename}")
    
    total_rows = 0
    header_written = False
    
    # Process files in chunks
    for i in range(0, len(file_list), chunk_size):
        chunk_files = file_list[i:i+chunk_size]
        chunk_num = (i // chunk_size) + 1
        total_chunks = (len(file_list) + chunk_size - 1) // chunk_size
        
        print(f"\n📦 Processing chunk {chunk_num}/{total_chunks} ({len(chunk_files)} files)")
        
        # Check memory before processing chunk
        memory_percent = check_memory()
        if memory_percent > MEMORY_THRESHOLD:
            print(f"⚠️  Memory usage too high ({memory_percent:.1f}%). Running garbage collection...")
            gc.collect()
            time.sleep(2)
        
        chunk_dfs = []
        
        # Process each file in the chunk
        for file_path in tqdm(chunk_files, desc=f"Chunk {chunk_num}"):
            df = process_single_file(file_path)
            if df is not None:
                chunk_dfs.append(df)
        
        # Combine chunk dataframes
        if chunk_dfs:
            print(f"  🔗 Combining {len(chunk_dfs)} dataframes from chunk {chunk_num}")
            chunk_combined = pd.concat(chunk_dfs, ignore_index=True, sort=False)
            
            # Append to output file
            mode = 'w' if not header_written else 'a'
            header = not header_written
            
            chunk_combined.to_csv(output_filename, mode=mode, header=header, index=False)
            
            chunk_rows = len(chunk_combined)
            total_rows += chunk_rows
            header_written = True
            
            print(f"  ✅ Chunk {chunk_num}: {chunk_rows} rows appended (Total: {total_rows})")
            
            # Clear memory
            del chunk_combined
            del chunk_dfs
            gc.collect()
        
        print(f"  💾 Memory after chunk {chunk_num}: {psutil.virtual_memory().percent:.1f}%")
    
    if total_rows > 0:
        print(f"\n🎉 Successfully created {output_filename} with {total_rows:,} total rows")
        return pd.read_csv(output_filename, nrows=0)  # Return just headers for validation
    else:
        print(f"\n❌ No data processed for {output_filename}")
        return None

def process_data_type(data_type, patterns, resume_from=None):
    """Process a single data type with resume capability"""
    print(f"\n{'='*80}")
    print(f"🚀 PROCESSING: {data_type.upper()} ({min(TARGET_YEARS)}-{max(TARGET_YEARS)})")
    print(f"{'='*80}")
    
    # 🆕 UPDATE OUTPUT FILENAMES WITH YEAR RANGE
    year_suffix = f"_{min(TARGET_YEARS)}_{max(TARGET_YEARS)}"
    output_files = {
        'demo_df': f'faers_demographics_combined{year_suffix}.csv',
        'drug_df': f'faers_drugs_combined{year_suffix}.csv',
        'reac_df': f'faers_reactions_combined{year_suffix}.csv',
        'indi_df': f'faers_indications_combined{year_suffix}.csv',
        'outc_df': f'faers_outcomes_combined{year_suffix}.csv',
        'rpsr_df': f'faers_reports_combined{year_suffix}.csv',
        'ther_df': f'faers_therapy_combined{year_suffix}.csv'
    }
    
    output_filename = output_files.get(data_type, f'faers_{data_type}_combined{year_suffix}.csv')
    
    if os.path.exists(output_filename) and resume_from != data_type:
        print(f"✅ {output_filename} already exists. Skipping...")
        try:
            df = pd.read_csv(output_filename, nrows=100)  # Just check first 100 rows
            print(f"   File appears valid with {len(df.columns)} columns")
            return df
        except:
            print(f"   File appears corrupted. Will reprocess...")
    
    # Find files (already filtered by year in find_files function)
    all_files = []
    for pattern in patterns:
        files = find_files(pattern)
        all_files.extend(files)
    
    unique_files = sorted(list(set(all_files)))
    
    if not unique_files:
        print(f"❌ No files found for {data_type} in years {TARGET_YEARS}")
        return None
    
    print(f"📊 Found {len(unique_files)} files to process for {TARGET_YEARS}")
    
    # Process files
    return combine_files_chunked(unique_files, output_filename.replace(year_suffix, ''))

# --- MAIN EXECUTION WITH YEAR FILTERING ---
def main(resume_from=None, target_years=None):
    """
    Main function with resume capability and year filtering
    resume_from: Skip to this data type (e.g., 'drug_df' to resume from drugs)
    target_years: List of years to process (default: 2020-2024)
    """
    global TARGET_YEARS
    if target_years:
        TARGET_YEARS = target_years
    
    print("🚀 MEMORY-EFFICIENT FAERS PROCESSOR")
    print(f"📅 TARGET YEARS: {TARGET_YEARS}")
    print("="*80)
    check_memory()
    
    file_types = {
        'demo_df': ['demo*.txt', 'DEMO*.txt'],
        'drug_df': ['drug*.txt', 'DRUG*.txt'],
        'reac_df': ['reac*.txt', 'REAC*.txt'],
        'indi_df': ['indi*.txt', 'INDI*.txt'],
        'outc_df': ['outc*.txt', 'OUTC*.txt'],
        'rpsr_df': ['rpsr*.txt', 'RPSR*.txt'],
        'ther_df': ['ther*.txt', 'THER*.txt']
    }
    
    data_frames = {}
    processing_order = list(file_types.keys())
    
    # Find start point if resuming
    start_idx = 0
    if resume_from and resume_from in processing_order:
        start_idx = processing_order.index(resume_from)
        print(f"🔄 RESUMING from {resume_from}")
    
    # Process each data type
    for data_type in processing_order[start_idx:]:
        patterns = file_types[data_type]
        
        try:
            result = process_data_type(data_type, patterns, resume_from)
            if result is not None:
                data_frames[data_type] = result
            
        except KeyboardInterrupt:
            print(f"\n⚠️  Processing interrupted at {data_type}")
            print(f"To resume, run: main(resume_from='{data_type}')")
            return data_frames
            
        except Exception as e:
            print(f"\n❌ Error processing {data_type}: {e}")
            print(f"To resume, run: main(resume_from='{data_type}')")
            continue
    
    print(f"\n🎉 ALL PROCESSING COMPLETE FOR {TARGET_YEARS}!")
    return data_frames

# Run the main function
if __name__ == "__main__":
    # 🆕 EXAMPLES OF HOW TO RUN:
    
    # Default: 2020-2024 only
    data_frames = main()
    
    # Custom year range:
    # data_frames = main(target_years=[2022, 2023, 2024])
    
    # Resume from specific point:
    # data_frames = main(resume_from='drug_df')
    
    # Both custom years and resume:
    # data_frames = main(resume_from='drug_df', target_years=[2020, 2021])
    
    print("\n📊 FINAL SUMMARY:")
    print("="*80)
    for name, df in data_frames.items():
        if df is not None:
            year_suffix = f"_{min(TARGET_YEARS)}_{max(TARGET_YEARS)}"
            output_files = {
                'demo_df': f'faers_demographics_combined{year_suffix}.csv',
                'drug_df': f'faers_drugs_combined{year_suffix}.csv',
                'reac_df': f'faers_reactions_combined{year_suffix}.csv',
                'indi_df': f'faers_indications_combined{year_suffix}.csv',
                'outc_df': f'faers_outcomes_combined{year_suffix}.csv',
                'rpsr_df': f'faers_reports_combined{year_suffix}.csv',
                'ther_df': f'faers_therapy_combined{year_suffix}.csv'
            }
            filename = output_files[name]
            if os.path.exists(filename):
                size_mb = os.path.getsize(filename) / 1024 / 1024
                print(f"✅ {name}: {filename} ({size_mb:.1f} MB)")
        else:
            print(f"❌ {name}: Not processed")

🚀 MEMORY-EFFICIENT FAERS PROCESSOR
📅 TARGET YEARS: [2020, 2021, 2022, 2023, 2024]
💾 Memory usage: 85.9%

🚀 PROCESSING: DEMO_DF (2020-2024)
Found 96 total files for pattern demo*.txt
  ⏭️  Skipping: demo13q1.txt (2013) - outside target range
  ⏭️  Skipping: demo13q1.txt (2013) - outside target range
  ⏭️  Skipping: demo13q2.txt (2013) - outside target range
  ⏭️  Skipping: demo13q2.txt (2013) - outside target range
  ⏭️  Skipping: demo13q3.txt (2013) - outside target range
  ⏭️  Skipping: demo13q3.txt (2013) - outside target range
  ⏭️  Skipping: demo13q4.txt (2013) - outside target range
  ⏭️  Skipping: demo13q4.txt (2013) - outside target range
  ⏭️  Skipping: demo14q1.txt (2014) - outside target range
  ⏭️  Skipping: demo14q1.txt (2014) - outside target range
  ⏭️  Skipping: demo14q2.txt (2014) - outside target range
  ⏭️  Skipping: demo14q2.txt (2014) - outside target range
  ⏭️  Skipping: demo14q3.txt (2014) - outside target range
  ⏭️  Skipping: demo14q3.txt (2014) - outside targe

Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo20q1.txt: 460327 rows, 28 cols (2020)
  ✅ demo20q1.txt: 460327 rows, 28 cols (2020)
  ✅ demo20q2.txt: 429227 rows, 28 cols (2020)
  ✅ demo20q2.txt: 429227 rows, 28 cols (2020)
  ✅ demo20q3.txt: 431667 rows, 28 cols (2020)
  ✅ demo20q3.txt: 431667 rows, 28 cols (2020)
  ✅ demo20q4.txt: 436148 rows, 28 cols (2020)
  ✅ demo20q4.txt: 436148 rows, 28 cols (2020)
  ✅ demo21q1.txt: 463741 rows, 28 cols (2021)
  ✅ demo21q1.txt: 463741 rows, 28 cols (2021)
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 4442220 rows appended (Total: 4442220)
  💾 Memory after chunk 1: 61.4%

📦 Processing chunk 2/4 (10 files)
💾 Memory usage: 61.4%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo21q2.txt: 479945 rows, 28 cols (2021)
  ✅ demo21q2.txt: 479945 rows, 28 cols (2021)
  ✅ demo21q3.txt: 504160 rows, 28 cols (2021)
  ✅ demo21q3.txt: 504160 rows, 28 cols (2021)
  ✅ demo21q4.txt: 412542 rows, 28 cols (2021)
  ✅ demo21q4.txt: 412542 rows, 28 cols (2021)
  ✅ demo22q1.txt: 461623 rows, 28 cols (2022)
  ✅ demo22q1.txt: 461623 rows, 28 cols (2022)
  ✅ demo22q2.txt: 435618 rows, 28 cols (2022)
  ✅ demo22q2.txt: 435618 rows, 28 cols (2022)
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 4587776 rows appended (Total: 9029996)
  💾 Memory after chunk 2: 55.6%

📦 Processing chunk 3/4 (10 files)
💾 Memory usage: 55.6%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo22q3.txt: 446511 rows, 28 cols (2022)
  ✅ demo22q3.txt: 446511 rows, 28 cols (2022)
  ✅ demo22q4.txt: 483643 rows, 28 cols (2022)
  ✅ demo22q4.txt: 483643 rows, 28 cols (2022)
  ✅ demo23q1.txt: 432144 rows, 28 cols (2023)
  ✅ demo23q1.txt: 432144 rows, 28 cols (2023)
  ✅ demo23q2.txt: 418592 rows, 28 cols (2023)
  ✅ demo23q2.txt: 418592 rows, 28 cols (2023)
  ✅ demo23q3.txt: 407522 rows, 28 cols (2023)
  ✅ demo23q3.txt: 407522 rows, 28 cols (2023)
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 4376824 rows appended (Total: 13406820)
  💾 Memory after chunk 3: 55.6%

📦 Processing chunk 4/4 (10 files)
💾 Memory usage: 55.6%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ demo23q4.txt: 415379 rows, 28 cols (2023)
  ✅ demo23q4.txt: 415379 rows, 28 cols (2023)
  ✅ demo24q1.txt: 406184 rows, 28 cols (2024)
  ✅ demo24q1.txt: 406184 rows, 28 cols (2024)
  ✅ demo24q2.txt: 397119 rows, 28 cols (2024)
  ✅ demo24q2.txt: 397119 rows, 28 cols (2024)
  ✅ demo24q3.txt: 405513 rows, 28 cols (2024)
  ✅ demo24q3.txt: 405513 rows, 28 cols (2024)
  ✅ demo24q4.txt: 410849 rows, 28 cols (2024)
  ✅ demo24q4.txt: 410849 rows, 28 cols (2024)
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 4070088 rows appended (Total: 17476908)
  💾 Memory after chunk 4: 66.0%

🎉 Successfully created faers_demographics_combined_2020_2024.csv with 17,476,908 total rows

🚀 PROCESSING: DRUG_DF (2020-2024)
Found 96 total files for pattern drug*.txt
  ⏭️  Skipping: drug13q1.txt (2013) - outside target range
  ⏭️  Skipping: drug13q1.txt (2013) - outside target range
  ⏭️  Skipping: drug13q2.txt (2013) - outside target range
  ⏭️  Skipping: drug13q2.txt (2013) - outside target range
  ⏭️  S

Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug20q1.txt: 1943532 rows, 23 cols (2020)
  ✅ drug20q1.txt: 1943532 rows, 23 cols (2020)
  ✅ drug20q2.txt: 1825414 rows, 23 cols (2020)
  ✅ drug20q2.txt: 1825414 rows, 23 cols (2020)
  ✅ drug20q3.txt: 1895153 rows, 23 cols (2020)
  ✅ drug20q3.txt: 1895153 rows, 23 cols (2020)
  ✅ drug20q4.txt: 1918927 rows, 23 cols (2020)
  ✅ drug20q4.txt: 1918927 rows, 23 cols (2020)
  ✅ drug21q1.txt: 2208416 rows, 23 cols (2021)
  ✅ drug21q1.txt: 2208416 rows, 23 cols (2021)
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 19582884 rows appended (Total: 19582884)
  💾 Memory after chunk 1: 48.0%

📦 Processing chunk 2/4 (10 files)
💾 Memory usage: 48.0%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug21q2.txt: 2291903 rows, 23 cols (2021)
  ✅ drug21q2.txt: 2291903 rows, 23 cols (2021)
  ✅ drug21q3.txt: 2260570 rows, 23 cols (2021)
  ✅ drug21q3.txt: 2260570 rows, 23 cols (2021)
  ✅ drug21q4.txt: 1778675 rows, 23 cols (2021)
  ✅ drug21q4.txt: 1778675 rows, 23 cols (2021)
  ✅ drug22q1.txt: 1994171 rows, 23 cols (2022)
  ✅ drug22q1.txt: 1994171 rows, 23 cols (2022)
  ✅ drug22q2.txt: 1828103 rows, 23 cols (2022)
  ✅ drug22q2.txt: 1828103 rows, 23 cols (2022)
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 20306844 rows appended (Total: 39889728)
  💾 Memory after chunk 2: 44.5%

📦 Processing chunk 3/4 (10 files)
💾 Memory usage: 44.5%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug22q3.txt: 1835461 rows, 23 cols (2022)
  ✅ drug22q3.txt: 1835461 rows, 23 cols (2022)
  ✅ drug22q4.txt: 2006967 rows, 23 cols (2022)
  ✅ drug22q4.txt: 2006967 rows, 23 cols (2022)
  ✅ drug23q1.txt: 1899503 rows, 23 cols (2023)
  ✅ drug23q1.txt: 1899503 rows, 23 cols (2023)
  ✅ drug23q2.txt: 1885096 rows, 23 cols (2023)
  ✅ drug23q2.txt: 1885096 rows, 23 cols (2023)
  ✅ drug23q3.txt: 1768391 rows, 23 cols (2023)
  ✅ drug23q3.txt: 1768391 rows, 23 cols (2023)
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 18790836 rows appended (Total: 58680564)
  💾 Memory after chunk 3: 45.3%

📦 Processing chunk 4/4 (10 files)
💾 Memory usage: 45.3%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ drug23q4.txt: 1920732 rows, 23 cols (2023)
  ✅ drug23q4.txt: 1920732 rows, 23 cols (2023)
  ✅ drug24q1.txt: 1909327 rows, 23 cols (2024)
  ✅ drug24q1.txt: 1909327 rows, 23 cols (2024)
  ✅ drug24q2.txt: 1888937 rows, 23 cols (2024)
  ✅ drug24q2.txt: 1888937 rows, 23 cols (2024)
  ✅ drug24q3.txt: 1907293 rows, 23 cols (2024)
  ✅ drug24q3.txt: 1907293 rows, 23 cols (2024)
  ✅ drug24q4.txt: 2030938 rows, 23 cols (2024)
  ✅ drug24q4.txt: 2030938 rows, 23 cols (2024)
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 19314454 rows appended (Total: 77995018)
  💾 Memory after chunk 4: 43.7%

🎉 Successfully created faers_drugs_combined_2020_2024.csv with 77,995,018 total rows

🚀 PROCESSING: REAC_DF (2020-2024)
Found 96 total files for pattern reac*.txt
  ⏭️  Skipping: reac13q1.txt (2013) - outside target range
  ⏭️  Skipping: reac13q1.txt (2013) - outside target range
  ⏭️  Skipping: reac13q2.txt (2013) - outside target range
  ⏭️  Skipping: reac13q2.txt (2013) - outside target range
  ⏭

Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac20q1.txt: 1517264 rows, 7 cols (2020)
  ✅ reac20q1.txt: 1517264 rows, 7 cols (2020)
  ✅ reac20q2.txt: 1437285 rows, 7 cols (2020)
  ✅ reac20q2.txt: 1437285 rows, 7 cols (2020)
  ✅ reac20q3.txt: 1454044 rows, 7 cols (2020)
  ✅ reac20q3.txt: 1454044 rows, 7 cols (2020)
  ✅ reac20q4.txt: 1522657 rows, 7 cols (2020)
  ✅ reac20q4.txt: 1522657 rows, 7 cols (2020)
  ✅ reac21q1.txt: 1505167 rows, 7 cols (2021)
  ✅ reac21q1.txt: 1505167 rows, 7 cols (2021)
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 14872834 rows appended (Total: 14872834)
  💾 Memory after chunk 1: 57.8%

📦 Processing chunk 2/4 (10 files)
💾 Memory usage: 57.8%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac21q2.txt: 1526544 rows, 7 cols (2021)
  ✅ reac21q2.txt: 1526544 rows, 7 cols (2021)
  ✅ reac21q3.txt: 1544374 rows, 7 cols (2021)
  ✅ reac21q3.txt: 1544374 rows, 7 cols (2021)
  ✅ reac21q4.txt: 1355734 rows, 7 cols (2021)
  ✅ reac21q4.txt: 1355734 rows, 7 cols (2021)
  ✅ reac22q1.txt: 1543059 rows, 7 cols (2022)
  ✅ reac22q1.txt: 1543059 rows, 7 cols (2022)
  ✅ reac22q2.txt: 1464627 rows, 7 cols (2022)
  ✅ reac22q2.txt: 1464627 rows, 7 cols (2022)
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 14868676 rows appended (Total: 29741510)
  💾 Memory after chunk 2: 55.8%

📦 Processing chunk 3/4 (10 files)
💾 Memory usage: 55.8%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac22q3.txt: 1449509 rows, 7 cols (2022)
  ✅ reac22q3.txt: 1449509 rows, 7 cols (2022)
  ✅ reac22q4.txt: 1617584 rows, 7 cols (2022)
  ✅ reac22q4.txt: 1617584 rows, 7 cols (2022)
  ✅ reac23q1.txt: 1491473 rows, 7 cols (2023)
  ✅ reac23q1.txt: 1491473 rows, 7 cols (2023)
  ✅ reac23q2.txt: 1478973 rows, 7 cols (2023)
  ✅ reac23q2.txt: 1478973 rows, 7 cols (2023)
  ✅ reac23q3.txt: 1373338 rows, 7 cols (2023)
  ✅ reac23q3.txt: 1373338 rows, 7 cols (2023)
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 14821754 rows appended (Total: 44563264)
  💾 Memory after chunk 3: 58.3%

📦 Processing chunk 4/4 (10 files)
💾 Memory usage: 58.3%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ reac23q4.txt: 1500033 rows, 7 cols (2023)
  ✅ reac23q4.txt: 1500033 rows, 7 cols (2023)
  ✅ reac24q1.txt: 1445416 rows, 7 cols (2024)
  ✅ reac24q1.txt: 1445416 rows, 7 cols (2024)
  ✅ reac24q2.txt: 1445044 rows, 7 cols (2024)
  ✅ reac24q2.txt: 1445044 rows, 7 cols (2024)
  ✅ reac24q3.txt: 1431718 rows, 7 cols (2024)
  ✅ reac24q3.txt: 1431718 rows, 7 cols (2024)
  ✅ reac24q4.txt: 1472750 rows, 7 cols (2024)
  ✅ reac24q4.txt: 1472750 rows, 7 cols (2024)
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 14589922 rows appended (Total: 59153186)
  💾 Memory after chunk 4: 60.5%

🎉 Successfully created faers_reactions_combined_2020_2024.csv with 59,153,186 total rows

🚀 PROCESSING: INDI_DF (2020-2024)
Found 0 total files for pattern indi*.txt

📊 Filtered 0 files → 0 files (2020-2024)
Found 96 total files for pattern INDI*.txt
  ⏭️  Skipping: INDI13Q1.txt (2013) - outside target range
  ⏭️  Skipping: INDI13Q1.txt (2013) - outside target range
  ⏭️  Skipping: INDI13Q2.txt (2013) - outsi

Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI20Q1.txt: 1348658 rows, 7 cols (2020)
  ✅ INDI20Q1.txt: 1348658 rows, 7 cols (2020)
  ✅ INDI20Q2.txt: 1294010 rows, 7 cols (2020)
  ✅ INDI20Q2.txt: 1294010 rows, 7 cols (2020)
  ✅ INDI20Q3.txt: 1276348 rows, 7 cols (2020)
  ✅ INDI20Q3.txt: 1276348 rows, 7 cols (2020)
  ✅ INDI20Q4.txt: 1297446 rows, 7 cols (2020)
  ✅ INDI20Q4.txt: 1297446 rows, 7 cols (2020)
  ✅ INDI21Q1.txt: 1603039 rows, 7 cols (2021)
  ✅ INDI21Q1.txt: 1603039 rows, 7 cols (2021)
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 13639002 rows appended (Total: 13639002)
  💾 Memory after chunk 1: 63.5%

📦 Processing chunk 2/4 (10 files)
💾 Memory usage: 63.5%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI21Q2.txt: 1659378 rows, 7 cols (2021)
  ✅ INDI21Q2.txt: 1659378 rows, 7 cols (2021)
  ✅ INDI21Q3.txt: 1588679 rows, 7 cols (2021)
  ✅ INDI21Q3.txt: 1588679 rows, 7 cols (2021)
  ✅ INDI21Q4.txt: 1234766 rows, 7 cols (2021)
  ✅ INDI21Q4.txt: 1234766 rows, 7 cols (2021)
  ✅ INDI22Q1.txt: 1347146 rows, 7 cols (2022)
  ✅ INDI22Q1.txt: 1347146 rows, 7 cols (2022)
  ✅ INDI22Q2.txt: 1150299 rows, 7 cols (2022)
  ✅ INDI22Q2.txt: 1150299 rows, 7 cols (2022)
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 13960536 rows appended (Total: 27599538)
  💾 Memory after chunk 2: 65.0%

📦 Processing chunk 3/4 (10 files)
💾 Memory usage: 65.0%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI22Q3.txt: 1159203 rows, 7 cols (2022)
  ✅ INDI22Q3.txt: 1159203 rows, 7 cols (2022)
  ✅ INDI22Q4.txt: 1321489 rows, 7 cols (2022)
  ✅ INDI22Q4.txt: 1321489 rows, 7 cols (2022)
  ✅ INDI23Q1.txt: 1176237 rows, 7 cols (2023)
  ✅ INDI23Q1.txt: 1176237 rows, 7 cols (2023)
  ✅ INDI23Q2.txt: 1165782 rows, 7 cols (2023)
  ✅ INDI23Q2.txt: 1165782 rows, 7 cols (2023)
  ✅ INDI23Q3.txt: 1063761 rows, 7 cols (2023)
  ✅ INDI23Q3.txt: 1063761 rows, 7 cols (2023)
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 11772944 rows appended (Total: 39372482)
  💾 Memory after chunk 3: 62.7%

📦 Processing chunk 4/4 (10 files)
💾 Memory usage: 62.6%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ INDI23Q4.txt: 1115961 rows, 7 cols (2023)
  ✅ INDI23Q4.txt: 1115961 rows, 7 cols (2023)
  ✅ INDI24Q1.txt: 1186115 rows, 7 cols (2024)
  ✅ INDI24Q1.txt: 1186115 rows, 7 cols (2024)
  ✅ INDI24Q2.txt: 1187626 rows, 7 cols (2024)
  ✅ INDI24Q2.txt: 1187626 rows, 7 cols (2024)
  ✅ INDI24Q3.txt: 1177133 rows, 7 cols (2024)
  ✅ INDI24Q3.txt: 1177133 rows, 7 cols (2024)
  ✅ INDI24Q4.txt: 1219759 rows, 7 cols (2024)
  ✅ INDI24Q4.txt: 1219759 rows, 7 cols (2024)
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 11773188 rows appended (Total: 51145670)
  💾 Memory after chunk 4: 63.0%

🎉 Successfully created faers_indications_combined_2020_2024.csv with 51,145,670 total rows

🚀 PROCESSING: OUTC_DF (2020-2024)
Found 0 total files for pattern outc*.txt

📊 Filtered 0 files → 0 files (2020-2024)
Found 96 total files for pattern OUTC*.txt
  ⏭️  Skipping: OUTC13Q1.txt (2013) - outside target range
  ⏭️  Skipping: OUTC13Q1.txt (2013) - outside target range
  ⏭️  Skipping: OUTC13Q2.txt (2013) - out

Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC20Q1.txt: 335470 rows, 6 cols (2020)
  ✅ OUTC20Q1.txt: 335470 rows, 6 cols (2020)
  ✅ OUTC20Q2.txt: 307509 rows, 6 cols (2020)
  ✅ OUTC20Q2.txt: 307509 rows, 6 cols (2020)
  ✅ OUTC20Q3.txt: 358815 rows, 6 cols (2020)
  ✅ OUTC20Q3.txt: 358815 rows, 6 cols (2020)
  ✅ OUTC20Q4.txt: 365575 rows, 6 cols (2020)
  ✅ OUTC20Q4.txt: 365575 rows, 6 cols (2020)
  ✅ OUTC21Q1.txt: 371698 rows, 6 cols (2021)
  ✅ OUTC21Q1.txt: 371698 rows, 6 cols (2021)
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 3478134 rows appended (Total: 3478134)
  💾 Memory after chunk 1: 64.0%

📦 Processing chunk 2/4 (10 files)
💾 Memory usage: 64.0%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC21Q2.txt: 383928 rows, 6 cols (2021)
  ✅ OUTC21Q2.txt: 383928 rows, 6 cols (2021)
  ✅ OUTC21Q3.txt: 420729 rows, 6 cols (2021)
  ✅ OUTC21Q3.txt: 420729 rows, 6 cols (2021)
  ✅ OUTC21Q4.txt: 337168 rows, 6 cols (2021)
  ✅ OUTC21Q4.txt: 337168 rows, 6 cols (2021)
  ✅ OUTC22Q1.txt: 375497 rows, 6 cols (2022)
  ✅ OUTC22Q1.txt: 375497 rows, 6 cols (2022)
  ✅ OUTC22Q2.txt: 325309 rows, 6 cols (2022)
  ✅ OUTC22Q2.txt: 325309 rows, 6 cols (2022)
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 3685262 rows appended (Total: 7163396)
  💾 Memory after chunk 2: 64.9%

📦 Processing chunk 3/4 (10 files)
💾 Memory usage: 64.9%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC22Q3.txt: 345763 rows, 6 cols (2022)
  ✅ OUTC22Q3.txt: 345763 rows, 6 cols (2022)
  ✅ OUTC22Q4.txt: 334611 rows, 6 cols (2022)
  ✅ OUTC22Q4.txt: 334611 rows, 6 cols (2022)
  ✅ OUTC23Q1.txt: 309217 rows, 6 cols (2023)
  ✅ OUTC23Q1.txt: 309217 rows, 6 cols (2023)
  ✅ OUTC23Q2.txt: 303513 rows, 6 cols (2023)
  ✅ OUTC23Q2.txt: 303513 rows, 6 cols (2023)
  ✅ OUTC23Q3.txt: 307396 rows, 6 cols (2023)
  ✅ OUTC23Q3.txt: 307396 rows, 6 cols (2023)
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 3201000 rows appended (Total: 10364396)
  💾 Memory after chunk 3: 65.5%

📦 Processing chunk 4/4 (10 files)
💾 Memory usage: 65.5%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ OUTC23Q4.txt: 327797 rows, 6 cols (2023)
  ✅ OUTC23Q4.txt: 327797 rows, 6 cols (2023)
  ✅ OUTC24Q1.txt: 295044 rows, 6 cols (2024)
  ✅ OUTC24Q1.txt: 295044 rows, 6 cols (2024)
  ✅ OUTC24Q2.txt: 291572 rows, 6 cols (2024)
  ✅ OUTC24Q2.txt: 291572 rows, 6 cols (2024)
  ✅ OUTC24Q3.txt: 288275 rows, 6 cols (2024)
  ✅ OUTC24Q3.txt: 288275 rows, 6 cols (2024)
  ✅ OUTC24Q4.txt: 308960 rows, 6 cols (2024)
  ✅ OUTC24Q4.txt: 308960 rows, 6 cols (2024)
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 3023296 rows appended (Total: 13387692)
  💾 Memory after chunk 4: 65.7%

🎉 Successfully created faers_outcomes_combined_2020_2024.csv with 13,387,692 total rows

🚀 PROCESSING: RPSR_DF (2020-2024)
Found 0 total files for pattern rpsr*.txt

📊 Filtered 0 files → 0 files (2020-2024)
Found 96 total files for pattern RPSR*.txt
  ⏭️  Skipping: RPSR13Q1.txt (2013) - outside target range
  ⏭️  Skipping: RPSR13Q1.txt (2013) - outside target range
  ⏭️  Skipping: RPSR13Q2.txt (2013) - outside target ra

Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR20Q1.txt: 15492 rows, 6 cols (2020)
  ✅ RPSR20Q1.txt: 15492 rows, 6 cols (2020)
  ✅ RPSR20Q2.txt: 13094 rows, 6 cols (2020)
  ✅ RPSR20Q2.txt: 13094 rows, 6 cols (2020)
  ✅ RPSR20Q3.txt: 17281 rows, 6 cols (2020)
  ✅ RPSR20Q3.txt: 17281 rows, 6 cols (2020)
  ✅ RPSR20Q4.txt: 14477 rows, 6 cols (2020)
  ✅ RPSR20Q4.txt: 14477 rows, 6 cols (2020)
  ✅ RPSR21Q1.txt: 14046 rows, 6 cols (2021)
  ✅ RPSR21Q1.txt: 14046 rows, 6 cols (2021)
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 148780 rows appended (Total: 148780)
  💾 Memory after chunk 1: 65.7%

📦 Processing chunk 2/4 (10 files)
💾 Memory usage: 65.7%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR21Q2.txt: 13123 rows, 6 cols (2021)
  ✅ RPSR21Q2.txt: 13123 rows, 6 cols (2021)
  ✅ RPSR21Q3.txt: 16855 rows, 6 cols (2021)
  ✅ RPSR21Q3.txt: 16855 rows, 6 cols (2021)
  ✅ RPSR21Q4.txt: 4936 rows, 6 cols (2021)
  ✅ RPSR21Q4.txt: 4936 rows, 6 cols (2021)
  ✅ RPSR22Q1.txt: 13091 rows, 6 cols (2022)
  ✅ RPSR22Q1.txt: 13091 rows, 6 cols (2022)
  ✅ RPSR22Q2.txt: 13867 rows, 6 cols (2022)
  ✅ RPSR22Q2.txt: 13867 rows, 6 cols (2022)
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 123744 rows appended (Total: 272524)
  💾 Memory after chunk 2: 66.0%

📦 Processing chunk 3/4 (10 files)
💾 Memory usage: 66.0%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR22Q3.txt: 14727 rows, 6 cols (2022)
  ✅ RPSR22Q3.txt: 14727 rows, 6 cols (2022)
  ✅ RPSR22Q4.txt: 14398 rows, 6 cols (2022)
  ✅ RPSR22Q4.txt: 14398 rows, 6 cols (2022)
  ✅ RPSR23Q1.txt: 13851 rows, 6 cols (2023)
  ✅ RPSR23Q1.txt: 13851 rows, 6 cols (2023)
  ✅ RPSR23Q2.txt: 13884 rows, 6 cols (2023)
  ✅ RPSR23Q2.txt: 13884 rows, 6 cols (2023)
  ✅ RPSR23Q3.txt: 11524 rows, 6 cols (2023)
  ✅ RPSR23Q3.txt: 11524 rows, 6 cols (2023)
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 136768 rows appended (Total: 409292)
  💾 Memory after chunk 3: 67.2%

📦 Processing chunk 4/4 (10 files)
💾 Memory usage: 67.2%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ RPSR23Q4.txt: 13238 rows, 6 cols (2023)
  ✅ RPSR23Q4.txt: 13238 rows, 6 cols (2023)
  ✅ RPSR24Q1.txt: 12381 rows, 6 cols (2024)
  ✅ RPSR24Q1.txt: 12381 rows, 6 cols (2024)
  ✅ RPSR24Q2.txt: 11517 rows, 6 cols (2024)
  ✅ RPSR24Q2.txt: 11517 rows, 6 cols (2024)
  ✅ RPSR24Q3.txt: 10087 rows, 6 cols (2024)
  ✅ RPSR24Q3.txt: 10087 rows, 6 cols (2024)
  ✅ RPSR24Q4.txt: 11627 rows, 6 cols (2024)
  ✅ RPSR24Q4.txt: 11627 rows, 6 cols (2024)
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 117700 rows appended (Total: 526992)
  💾 Memory after chunk 4: 67.1%

🎉 Successfully created faers_reports_combined_2020_2024.csv with 526,992 total rows

🚀 PROCESSING: THER_DF (2020-2024)
Found 0 total files for pattern ther*.txt

📊 Filtered 0 files → 0 files (2020-2024)
Found 96 total files for pattern THER*.txt
  ⏭️  Skipping: THER13Q1.txt (2013) - outside target range
  ⏭️  Skipping: THER13Q1.txt (2013) - outside target range
  ⏭️  Skipping: THER13Q2.txt (2013) - outside target range
  ⏭️  Skippin

Chunk 1:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER20Q1.txt: 728199 rows, 10 cols (2020)
  ✅ THER20Q1.txt: 728199 rows, 10 cols (2020)
  ✅ THER20Q2.txt: 636959 rows, 10 cols (2020)
  ✅ THER20Q2.txt: 636959 rows, 10 cols (2020)
  ✅ THER20Q3.txt: 646462 rows, 10 cols (2020)
  ✅ THER20Q3.txt: 646462 rows, 10 cols (2020)
  ✅ THER20Q4.txt: 681411 rows, 10 cols (2020)
  ✅ THER20Q4.txt: 681411 rows, 10 cols (2020)
  ✅ THER21Q1.txt: 786472 rows, 10 cols (2021)
  ✅ THER21Q1.txt: 786472 rows, 10 cols (2021)
  🔗 Combining 10 dataframes from chunk 1
  ✅ Chunk 1: 6959006 rows appended (Total: 6959006)
  💾 Memory after chunk 1: 55.4%

📦 Processing chunk 2/4 (10 files)
💾 Memory usage: 55.3%


Chunk 2:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER21Q2.txt: 789968 rows, 10 cols (2021)
  ✅ THER21Q2.txt: 789968 rows, 10 cols (2021)
  ✅ THER21Q3.txt: 848661 rows, 10 cols (2021)
  ✅ THER21Q3.txt: 848661 rows, 10 cols (2021)
  ✅ THER21Q4.txt: 671045 rows, 10 cols (2021)
  ✅ THER21Q4.txt: 671045 rows, 10 cols (2021)
  ✅ THER22Q1.txt: 748899 rows, 10 cols (2022)
  ✅ THER22Q1.txt: 748899 rows, 10 cols (2022)
  ✅ THER22Q2.txt: 690828 rows, 10 cols (2022)
  ✅ THER22Q2.txt: 690828 rows, 10 cols (2022)
  🔗 Combining 10 dataframes from chunk 2
  ✅ Chunk 2: 7498802 rows appended (Total: 14457808)
  💾 Memory after chunk 2: 58.7%

📦 Processing chunk 3/4 (10 files)
💾 Memory usage: 58.7%


Chunk 3:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER22Q3.txt: 717902 rows, 10 cols (2022)
  ✅ THER22Q3.txt: 717902 rows, 10 cols (2022)
  ✅ THER22Q4.txt: 726767 rows, 10 cols (2022)
  ✅ THER22Q4.txt: 726767 rows, 10 cols (2022)
  ✅ THER23Q1.txt: 683408 rows, 10 cols (2023)
  ✅ THER23Q1.txt: 683408 rows, 10 cols (2023)
  ✅ THER23Q2.txt: 678128 rows, 10 cols (2023)
  ✅ THER23Q2.txt: 678128 rows, 10 cols (2023)
  ✅ THER23Q3.txt: 593027 rows, 10 cols (2023)
  ✅ THER23Q3.txt: 593027 rows, 10 cols (2023)
  🔗 Combining 10 dataframes from chunk 3
  ✅ Chunk 3: 6798464 rows appended (Total: 21256272)
  💾 Memory after chunk 3: 66.9%

📦 Processing chunk 4/4 (10 files)
💾 Memory usage: 66.9%


Chunk 4:   0%|          | 0/10 [00:00<?, ?it/s]

  ✅ THER23Q4.txt: 633087 rows, 10 cols (2023)
  ✅ THER23Q4.txt: 633087 rows, 10 cols (2023)
  ✅ THER24Q1.txt: 594449 rows, 10 cols (2024)
  ✅ THER24Q1.txt: 594449 rows, 10 cols (2024)
  ✅ THER24Q2.txt: 539334 rows, 10 cols (2024)
  ✅ THER24Q2.txt: 539334 rows, 10 cols (2024)
  ✅ THER24Q3.txt: 532854 rows, 10 cols (2024)
  ✅ THER24Q3.txt: 532854 rows, 10 cols (2024)
  ✅ THER24Q4.txt: 561889 rows, 10 cols (2024)
  ✅ THER24Q4.txt: 561889 rows, 10 cols (2024)
  🔗 Combining 10 dataframes from chunk 4
  ✅ Chunk 4: 5723226 rows appended (Total: 26979498)
  💾 Memory after chunk 4: 58.8%

🎉 Successfully created faers_therapy_combined_2020_2024.csv with 26,979,498 total rows

🎉 ALL PROCESSING COMPLETE FOR [2020, 2021, 2022, 2023, 2024]!

📊 FINAL SUMMARY:
✅ demo_df: faers_demographics_combined_2020_2024.csv (2752.1 MB)
✅ drug_df: faers_drugs_combined_2020_2024.csv (8985.0 MB)
✅ reac_df: faers_reactions_combined_2020_2024.csv (3301.0 MB)
✅ indi_df: faers_indications_combined_2020_2024.csv (3314.1 