In [1]:
# !pip install emoji
# !pip install googletrans==3.1.0a0

import re
from googletrans import Translator
import emoji
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from string import punctuation
import warnings

warnings.filterwarnings('ignore')

# nltk.download('stopwords')
# nltk.download('punkt')

In [2]:
def english_sentiment_analysis(comment):
    df = pd.read_csv("C:/Users/Dell/OneDrive/Desktop/book/English_Dataset.csv")
    def preprocess_text(text):
        stopwords_and_punctuation = set(
            stopwords.words('english')) | set(punctuation)
        stemmer = SnowballStemmer("english")

        text = re.sub('[^a-zA-Z]', ' ', text)
        text = text.lower()
        text = word_tokenize(text)
        text = [stemmer.stem(word)
                for word in text if word not in stopwords_and_punctuation]

        return text

    df['Comment_Cleaned'] = df['Comment'].apply(
        lambda x: ' '.join(preprocess_text(x)))

    tfidf = TfidfVectorizer()
    X = tfidf.fit_transform(df['Comment_Cleaned'])
    y = df['Review']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=0)

    model = MultinomialNB()
    model.fit(X_train, y_train)

    new_review_tokens = ' '.join(preprocess_text(comment))
    new_review_tokens_transformed = tfidf.transform([new_review_tokens])

    predicted_label = model.predict(new_review_tokens_transformed)[0]
    return predicted_label

In [3]:
def hinglish_sentiment_analysis(comment):
    df = pd.read_csv("C:/Users/Dell/OneDrive/Desktop/book/Hinglish_Dataset.csv")
    translator = Translator(service_urls=['translate.googleapis.com'])

    df['Comment_English'] = df['Comment'].apply(
        lambda x: translator.translate(x, src='hi', dest='en').text)

    def preprocess_text(text):
        stopwords_and_punctuation = set(
            stopwords.words('english')) | set(punctuation)
        stemmer = SnowballStemmer("english")

        text = re.sub('[^a-zA-Z]', ' ', text)
        text = text.lower()
        text = word_tokenize(text)
        text = [stemmer.stem(word)
                for word in text if word not in stopwords_and_punctuation]

        return text

    df['Comment_Cleaned'] = df['Comment_English'].apply(
        lambda x: ' '.join(preprocess_text(x)))

    # tfidf = TfidfVectorizer(ngram_range=(1, 2))
    tfidf = TfidfVectorizer()
    X = tfidf.fit_transform(df['Comment_Cleaned'])
    y = df['Review']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=0)
    model = MultinomialNB()
    model.fit(X_train, y_train)

    translated_review = translator.translate(
        comment, src='hi', dest='en').text
    new_review_tokens = ' '.join(preprocess_text(translated_review))
    new_review_tokens_transformed = tfidf.transform([new_review_tokens])

    predicted_label = model.predict(new_review_tokens_transformed)[0]
    return predicted_label


In [4]:
def emoji_sentiment_analysis(comment):
    data = pd.read_csv('C:/Users/Dell/Downloads/Emoji_Dataset.csv')

    data['Polarity'] = data['Polarity'].fillna(-1)

    data['Emoji_Text'] = data['Emoji'].apply(
        lambda x: emoji.demojize(x) if isinstance(x, str) else '')

    X = data['Emoji_Text']
    y = data['Polarity']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=0)

    vectorizer = CountVectorizer()
    X_train_counts = vectorizer.fit_transform(X_train)
    X_test_counts = vectorizer.transform(X_test)

    classifier = MultinomialNB()
    classifier.fit(X_train_counts, y_train)

    emoji_text = emoji.demojize(comment)
    emoji_vector = vectorizer.transform([emoji_text])
    prediction = classifier.predict(emoji_vector)
    return prediction[0]


In [5]:
def predict_sentiment(command):
    if any(char in emoji.EMOJI_DATA for char in command):
        return emoji_sentiment_analysis(command)

    if re.match(r'^[a-zA-Z0-9 ]*$', command):
        return english_sentiment_analysis(command)

    return hinglish_sentiment_analysis(command)

In [6]:
command = "This is awesome! 🙌"
predicted_sentiment = predict_sentiment(command)
if predicted_sentiment == -1:
    print("Neutral")
elif predicted_sentiment == 0:
    print("Negative")
elif predicted_sentiment == 1:
    print("Positive")

Positive
