In [None]:
import re
import string
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

# Sample noisy text data
data = {
    'Text': [
        "Helloooo!!! This is sooo cooolllll!!!",
        "Thissss isss a teestt of theee cleeanerrr.",
        "U r gr8!!! Plz call ASAP!!!",
        "H@ppy B!rthd@y to uuuu",
        "L0L!!! I can't stoppp laughingggg!!!"
    ]
}
df = pd.DataFrame(data)

# Download NLTK resources if not already available
import nltk
nltk.download('punkt')
nltk.download('stopwords')

# Initialize
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

# Cleaning function
def clean_text(text):
    # Lowercase
    text = text.lower()
    # Remove digits and special characters
    text = re.sub(r'[^a-z\s]', '', text)
    # Remove repeated characters (e.g., "coool" -> "cool")
    text = re.sub(r'(.)\1{2,}', r'\1\1', text)
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords and stem
    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
    # Join back
    return ' '.join(tokens)

# Apply cleaning
df['Cleaned_Text'] = df['Text'].apply(clean_text)

# Display results
print("Original and Cleaned Text:")
print(df)