In [4]:
import pandas as pd
from glob import glob
import random
import Stemmer
import emoji
import re
from abc import ABC, abstractmethod
from num2words import num2words

# Create Preprocessing Classes

In [5]:
class Preprocessing(ABC):
    @abstractmethod
    def fit(self,text:str)->str:
        pass

In [20]:
class RemoveSpecialCharacters(Preprocessing):
    def fit(self,text:str)->str:
        text=text.lower()
        process_text = re.sub(r"[^a-zA-Z0-9'\s]", '', text)
        process_text = re.sub(r"\s+", " ", process_text)
        return process_text

class ConvertEmoji(Preprocessing):    
    def fit(self,text:str)->str:
        text=text.lower()
        converted_text = emoji.demojize(text)
        return converted_text

class TextStemmer(Preprocessing):
    def fit(self,text:str)->str:
        text=text.lower()
        tokens=text.split()
        stemmer=Stemmer.Stemmer("english")
        tokens=[stemmer.stemWord(token) for token in tokens]
        return ' '.join(tokens)
    
class ConvertNumberToWords(Preprocessing):
    def fit(self,text:str)->str:
        def replace_match(match):
            return num2words(int(match.group(0)), to='ordinal')
        return re.sub(r'-?\d+',replace_match,text)
        


In [23]:
string="100 I love this product! 😍 It's absolutely amazing. But the delivery was slow... 😠"
string=TextStemmer().fit(string)
string=ConvertEmoji().fit(string)
string=RemoveSpecialCharacters().fit(string)
string=ConvertNumberToWords().fit(string)

string

'one hundredth i love this product smilingfacewithhearteyes it absolut amazing but the deliveri was slow angryface'

In [4]:
labels_file_name=["neg","pos"]
data=[]
test=[]
for label in labels_file_name:
    for filename in glob(f"data/training_data/{label}/*.txt"):
        with open(filename, "r", encoding="utf-8") as file:
            text=file.read()
            test.append(text)
            data.append({"text":text,"label":label})

random.shuffle(data)
df = pd.DataFrame(data)

string="hello every body today i want playing 😂😂 a video game,,,()"
emoji.demojize(string)

'hello every body today i want playing :face_with_tears_of_joy::face_with_tears_of_joy: a video game,,,()'