In [2]:
import pandas as pd
import os

def clean_csv_remove_dnf(input_file, output_file=None):
    """
    Read a CSV file, remove rows containing 'DNF' in any column, and save back.
    
    Parameters:
    input_file (str): Path to the input CSV file
    output_file (str, optional): Path to save the cleaned file. If None, overwrites the original.
    """
    
    # Read the CSV file
    print(f"Reading {input_file}...")
    df = pd.read_csv(input_file)
    
    print(f"Original shape: {df.shape}")
    
    # Drop rows containing 'DNF' in any column
    # Using vectorized string operations for efficiency
    mask = df.astype(str).apply(lambda x: x.str.contains('DNF', case=False, na=False)).any(axis=1)
    df_cleaned = df[~mask]
    
    print(f"Shape after removing DNF rows: {df_cleaned.shape}")
    print(f"Removed {df.shape[0] - df_cleaned.shape[0]} rows containing 'DNF'")
    
    # Determine output file path
    if output_file is None:
        output_file = input_file
    
    # Save the cleaned dataframe
    df_cleaned.to_csv(output_file, index=False)
    print(f"Cleaned data saved to {output_file}")
    
    return df_cleaned

# Example usage - you can modify the file paths as needed
# Uncomment and modify the lines below to use with your specific files

# For newest_test.csv
# df_clean = clean_csv_remove_dnf('newest_test.csv', 'newest_test_cleaned.csv')

# For newest_test_negatives_1k.csv  
# df_clean = clean_csv_remove_dnf('newest_test_negatives_1k.csv', 'newest_test_negatives_1k_cleaned.csv')

print("Script ready! Use clean_csv_remove_dnf() function to clean your CSV files.")

Script ready! Use clean_csv_remove_dnf() function to clean your CSV files.


In [3]:
clean_csv_remove_dnf(r'C:\Users\adamk\Desktop\gerzhoy research\new paper\newest_test_negatives_1k.csv', r'C:\Users\adamk\Desktop\gerzhoy research\new paper\newest_test_negatives_1k_clean.csv')
clean_csv_remove_dnf(r'C:\Users\adamk\Desktop\gerzhoy research\new paper\newest_test.csv', r'C:\Users\adamk\Desktop\gerzhoy research\new paper\newest_test_new.csv')

Reading C:\Users\adamk\Desktop\gerzhoy research\new paper\newest_test_negatives_1k.csv...
Original shape: (1000, 7)
Shape after removing DNF rows: (921, 7)
Removed 79 rows containing 'DNF'
Cleaned data saved to C:\Users\adamk\Desktop\gerzhoy research\new paper\newest_test_negatives_1k_clean.csv
Reading C:\Users\adamk\Desktop\gerzhoy research\new paper\newest_test.csv...
Original shape: (82, 6)
Shape after removing DNF rows: (73, 6)
Removed 9 rows containing 'DNF'
Cleaned data saved to C:\Users\adamk\Desktop\gerzhoy research\new paper\newest_test_new.csv


Unnamed: 0,comment,creativity_score,num_guesses,wordle_guesses,wordle_answer,entry_id
0,In a strategic way. CLK are all very common co...,9,2,"['noise', 'caulk']",caulk,1
1,Best result so far! My choice of the first wor...,9,2,"['soare', 'shire']",shire,2
2,I'd suggest swapping a few consonants for more...,9,4,"['dogma', 'erupt', 'shiny', 'story']",story,3
3,that is really really really fricking smart,9,5,"['deary', 'oinks', 'cough', 'jumbo', 'flout']",flout,4
4,"Scoredle 293 3/6 \n\n12,972 \n* 🟩⬜🟩🟩🟩 &gt;!S...",9,3,"['stare', 'pinch', 'scare']",scare,5
...,...,...,...,...,...,...
76,"I’m also a &gt;!SLATE!&lt; opener, considered ...",8,3,"['slate', 'miked', 'stead']",stead,77
77,"Scoredle 259 3/6 \n\n12,947 \n\n⬛⬛🟨⬛🟩 &gt;!D...",8,3,"['dance', 'biome', 'brine']",brine,78
78,Hard mode combines very well with challenge mo...,8,5,"['deary', 'oinks', 'cough', 'jumbo', 'flout']",flout,79
79,Wordle 219 5/6\n\n⬛🟨⬛⬛⬛ &gt;!SLATE!&lt;\n\n⬛⬛🟩...,8,5,"['slate', 'group', 'minks', 'clonk', 'knoll']",knoll,80
