In [6]:
from nltk.tokenize import word_tokenize
from nltk.classify import NaiveBayesClassifier, accuracy
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tag import pos_tag
from nltk.probability import FreqDist
from IPython.display import clear_output
import pandas as pd
import random
import pickle
import os
import time

In [7]:
class Classifier:
    _classifier = None
    _MODEL_FILENAME = 'model.pickle'
    _stopword_list = stopwords.words('english')
    _lemmatizer = WordNetLemmatizer()

    def __init__(self):
        raise TypeError(f"Instances of {self.__class__.__name__} cannot be created")
    
    @classmethod
    def _loadModel(cls):
        print("Loading Model...")
        if os.path.exists(cls._MODEL_FILENAME):
            with open(cls._MODEL_FILENAME, "rb") as file:
                cls._classifier = pickle.load(file)
            clear_output(wait=True)
        else:
            cls._trainModel()
    
    @classmethod
    def _trainModel(cls):
        print("Model not found, creating model, please wait...")
        dataset = pd.read_csv('dataset.csv')
        # Preprocess dataset
        preprocessed_list = []
        words_features = []
        
        for _, row in dataset.iterrows():
            text = row['text']
            label = row['label']
            
            text_word_list = cls._preprocessed_sentence(text)

            words_features.extend(text_word_list)
            preprocessed_list.append((text_word_list, label))
            
        fd = FreqDist(words_features)
        words_features = [word for word, _ in fd.most_common(3000)]

        # Extract features from text
        feature_data_list = []
        for text_word_list, label in preprocessed_list:
            features = {}
            for word in words_features:
                features[word] = word in text_word_list # true/false
            
            feature_data_list.append((features, label))

        random.shuffle(feature_data_list)
        
        # Training data 80% - 20% Testing data
        train_data_count = int(len(feature_data_list) * 0.8)
        train_data = feature_data_list[:train_data_count]
        test_data = feature_data_list[train_data_count:]

        cls._classifier = NaiveBayesClassifier.train(train_data)
        accu = accuracy(cls._classifier, test_data)
        
        print(f"Created model with accuracy: {round(accu * 100, 2)}%")

        # Save to classifier to model.pickle file
        with open(cls._MODEL_FILENAME, "wb") as file:
            pickle.dump(cls._classifier, file)
        
        time.sleep(5)
        clear_output(wait=True)
        
    
    @staticmethod
    def _get_lemma_pos(word):
        result_pos_tag = pos_tag([word])
        tag = result_pos_tag[0][1][0].lower()
        
        if tag == 'j':
            return 'a'  # Adjective
        elif tag == 'n':
            return 'n'  # Noun
        elif tag == 'v':
            return 'v'  # Verb
        elif tag == 'r':
            return 'r'  # Adverb
        else:
            return 'n'
    
    @classmethod
    def _preprocessed_sentence(cls, sentence: str):
        words = word_tokenize(sentence.lower())
        words = [cls._lemmatizer.lemmatize(word, pos=cls._get_lemma_pos(word)) for word in words if word not in cls._stopword_list and word.isalpha()]
        
        return words
    
    @classmethod
    def classify(cls, sentence: str) -> str:
        # The first classify call
        if cls._classifier == None:
            cls._loadModel()
        
        return cls._classifier.classify(FreqDist(cls._preprocessed_sentence(sentence)))
        

In [8]:
class Place:
    def __init__(self, name:str):
        self.name = name
        self.reviews = []

    def show_place_review_status(self):
        fake_count = 0
        
        for review in self.reviews:
            # The label of fake review (Computer Generated) is 'CG'
            if Classifier.classify(review) == 'CG':
                fake_count += 1
                
        fake_percentage = round(fake_count / len(self.reviews) * 100, 2)
        print(f"This place has {fake_percentage}% suspected fake reviews")

In [9]:
dummy_hotel = Place('Dummy Hotel')
dummy_hotel.reviews = [
    "This place is absolutely amazing! The ambiance is perfect, the staff are incredibly friendly, and the food is to die for. I can't recommend it enough!",
    "A very pleasant experience overall. The service was attentive, and the dishes were flavorful and well-presented. I'll definitely be back!",
    "It's a vibrant place with reasonable prices. The food was good.",
    "I was quite disappointed. The food was mediocre at best, and the service was slow. There are better options nearby.",
    "Terrible experience. The food was cold, and the staff were rude. I will not be returning to this place.",
    "Great atmosphere and a solid menu selection. The dishes were tasty, but the wait time was a bit long. Still, I'd recommend giving it a try.",
    "An exceptional place to dine! Every dish we tried was delicious, and the staff made us feel right at home. Perfect for a special occasion.",
    "The place is okay. It's clean and the food is alright, but nothing really stood out to me. It's just an average spot.",
    "I had a fantastic time here! The decor is beautiful, and the food was just perfect. The staff were very accommodating as well.",
    "Not impressed. The portions were small, and the food lacked flavor. The service was also quite slow. I expected more for the price.",
    "Wonderful! The best place in the world. Everything was just perfect. You need to come here. It's a five-star experience!",
    "This place is the best! Everything is perfect, and I had the best time of my life. Highly recommend to everyone!",
    "Amazing experience! The staff is so friendly, and the food is unbelievably good. You must visit this place. It's the best!",
    "Absolutely fantastic! I couldn't believe how great everything was. The atmosphere, the food, the service, all top-notch. You won't regret coming here!",
    "Incredible! This place exceeded all my expectations. I've never been anywhere better. Five stars without a doubt!",
    "Phenomenal! From the moment I walked in, I knew this place was special. The food is divine, and the service is impeccable. A must-visit!",
    "Simply the best place ever! Everything was perfect, and I couldn't have asked for a better experience. I will be coming back for sure!",
    "Outstanding! The food is to die for, and the service is unbeatable. You have to try this place. It's simply the best!",
    "I'm obsessed with this place! Highly recommended!",
]

dummy_hotel.show_place_review_status()

This place has 10.53% suspected fake reviews
