In [1]:
import langid
import pandas as pd
import re
import emoji
from emot.emo_unicode import EMOTICONS_EMO

In [23]:
def process_text_column(df, input_column):
    """
    Process a text column in a pandas DataFrame.

    Args:
    - df: pandas DataFrame
    - input_column: str, the name of the column to process

    Returns:
    - pandas Series, a new column with the required modifications
    """

    def replace_emoji(text):
        """
        Extracts emojis from text using the emot library and replaces them with their descriptions.
        """
        text = emoji.demojize(text)
       
        return text
    
    def convert_emoticons(text):
        """
        Extracts emoticons from text using the emot library and replaces them with their descriptions.
        """
        for emoticon in EMOTICONS_EMO:
            pattern = re.escape(emoticon)
            replacement = "_".join(EMOTICONS_EMO[emoticon].replace(",", "").split())
            text = re.sub(f'({pattern})', replacement, text)
        return text

    def detect_non_english_words(text):
        """
        Detects non-English words in the text using the langid library.
        """
        words = text.split()
        for i in range(len(words)):
            if langid.classify(words[i])[0] != 'en':
                words[i] = '{Non-English}'
        return ' '.join(words)

    def process_text(text):
        """
        Processes the text by replacing emojis and detecting non-English words.
        """
        text = replace_emoji(text)
        text = detect_non_english_words(text)
        text = convert_emoticons(text)
                
        text = text.replace(':',' ')
        
        return text

    # Apply the processing function to the specified column
    new_column = df[input_column].apply(process_text)

    return new_column

In [26]:
some_text = "Hi there Gracias 😡🫥😂 :-( :[ :-) :-)) :( :'( "
df = pd.DataFrame({'text_column': [some_text]})
df['processed_text'] = process_text_column(df, 'text_column')
df['processed_text'][0]

'Hi there {Non-English}  enraged_face  dotted_line_face  face_with_tears_of_joy  Frown_sad_andry_or_pouting Frown_sad_andry_or_pouting Happy_face_smiley Very_happy >Frown_sad_andry_or_pouting Crying'

In [7]:
df

Unnamed: 0,text_column,processed_text
0,😡🫥 Hi there Gracias :-( :[ :-) :-)),enraged_face dotted_line_face Hi there {Non-En...
