In [1]:
import pandas as pd
import re

try:
    df = pd.read_csv('tweets.csv')
    print("CSV file loaded successfully.")
except FileNotFoundError:
    print("The file path is incorrect or the file does not exist.")
except pd.errors.EmptyDataError:
    print("The file is empty.")
except Exception as e:
    print(f"An error occurred: {e}")

if 'text' in df.columns:
    print("'text' column found, proceeding with cleaning.")
    
    def clean_text(text):
        # Remove URLs
        text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
        # Convert to lowercase
        text = text.lower()
        # Remove numbers
        text = re.sub(r'\d+', '', text)
        # Remove special characters (keeping only letters and spaces)
        text = re.sub(r'[^a-z\s]', '', text)
        return text.strip()  # Remove any extra spaces at the beginning or end

    df['cleaned_text'] = df['text'].apply(clean_text)

    # Filter out rows where 'cleaned_text' is empty or has only a single letter
    df = df[~df['cleaned_text'].str.match(r'^\s*$|^[a-zA-Z]$', na=False)]
    
    # Save the updated DataFrame back to a CSV file
    df.to_csv('tweets_cleaned.csv', index=False)
    print("Filtered file saved successfully as 'tweets_cleaned.csv'.")
else:
    print("'text' column not found in the CSV file. Please check the column name.")


CSV file loaded successfully.
'text' column found, proceeding with cleaning.
Filtered file saved successfully as 'tweets_cleaned2.csv'.
