### These preprocessing steps aim to refine the text data, reduce noise, and create a more structured dataset that facilitates accurate analysis 

In [None]:
# Download stopwords if not already downloaded
nltk.download('stopwords')

# Function to remove punctuation, stopwords, and convert text to lowercase
def remove_punctuation_and_stopwords(text):
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Convert to lowercase
    text = text.lower()
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = text.split()
    text = " ".join([word for word in words if word not in stop_words])
    return text

# Apply the remove_punctuation_and_stopwords function to a DataFrame's 'text' column
def clean_and_print(df):
    df["text"] = df["text"].apply(remove_punctuation_and_stopwords)
    print("After cleaning:")
    print(df.head().text.values)
    return df.head()

dfT = pd.DataFrame(dfT)
dfF = pd.DataFrame(dfF)

# Apply the cleaning function to the DataFrame
clean_and_print(dfT)
clean_and_print(dfF)

In [None]:
# Initialize the Porter Stemmer
stemmer = PorterStemmer()

# Function to remove punctuation, stopwords, and perform stemming
def preprocess_text(text, remove_punctuation=True, remove_stopwords=True, perform_stemming=True):
    if remove_punctuation:
        text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()

    if remove_stopwords:
        stop_words = set(stopwords.words('english'))
        words = text.split()
        words = [word for word in words if word not in stop_words]
        text = " ".join(words)

    if perform_stemming:
        words = text.split()
        stemmed_words = [stemmer.stem(word) for word in words]
        text = " ".join(stemmed_words)

    return text

# Apply the preprocess_text function to a DataFrame's 'text' column
def clean_and_print(df, remove_punctuation=True, remove_stopwords=True, perform_stemming=True):
    df["text"] = df["text"].apply(lambda text: preprocess_text(text, remove_punctuation, remove_stopwords, perform_stemming))
    print("After preprocessing:")
    print(df.head().text.values)
    return df.head()

# Preprocess text in the DataFrames
clean_and_print(dfT, remove_punctuation=True, remove_stopwords=True, perform_stemming=True)
clean_and_print(dfF, remove_punctuation=True, remove_stopwords=True, perform_stemming=True)