In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('vader_lexicon')

gsheet_url = 'https://docs.google.com/spreadsheets/d/'
sheet_id = '1xHgmmhVE4IA7F60qnNkuMQ6OxVpR7AEx'

data = pd.read_excel(f'{gsheet_url}{sheet_id}/export?format=xlsx')

def column_cleaner(selected_column):
    data[selected_column] = data[selected_column].str.lower()
    data[selected_column] = data[selected_column].str.strip()
    data[selected_column].drop_duplicates(keep='first', inplace=True)
    return data

data = column_cleaner('Key Words')

lemmatizer = WordNetLemmatizer()
english_stop_words = set(stopwords.words('english'))
filipino_stop_words = set(stopwords.words('filipino'))
chinese_stop_words = set(stopwords.words('chinese'))
stop_words = english_stop_words | filipino_stop_words | chinese_stop_words

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

data['Key Words'] = data['Key Words'].apply(preprocess_text)

data['Comment Length'] = data['Key Words'].apply(lambda x: len(x.split()))

X_train, X_val, y_train, y_val = train_test_split(data[['Key Words', 'Comment Length']], data['Category'], test_size=0.2, random_state=42)

text_clf = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=1000)),
    ('clf', MultinomialNB())
])

param_grid = {
    'tfidf__ngram_range': [(1, 1), (1, 2)],
    'tfidf__max_df': [0.7, 0.8, 0.9],
    'clf__alpha': [0.1, 0.5, 1.0]
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

cross_val_scores = cross_val_score(text_clf, X_train['Key Words'], y_train, cv=cv, scoring='accuracy')

grid_search = GridSearchCV(text_clf, param_grid, cv=cv, n_jobs=-1)
grid_search.fit(X_train['Key Words'], y_train)

print("Best hyperparameters: ", grid_search.best_params_)

y_pred = grid_search.predict(X_val['Key Words'])

accuracy = accuracy_score(y_val, y_pred)
report = classification_report(y_val, y_pred)

print(f'Accuracy: {accuracy}')
print(report)

sid = SentimentIntensityAnalyzer()

def analyze_sentiment(comment):
    if isinstance(comment, str):
        sentiment_scores = sid.polarity_scores(comment)
        if sentiment_scores['compound'] >= 0.05:
            return 'positive'
        elif sentiment_scores['compound'] <= -0.05:
            return 'negative'
        else:
            return 'neutral'
    elif pd.isnull(comment):  # Handle np.nan
        return 'neutral'
    else:
        return 'neutral'  # Handle other non-string values

data['Sentiment'] = data['Key Words'].apply(analyze_sentiment)

# target_path = 'TestComments.xlsx'
target_path = r'C:\Users\Jerome Pintucan\OneDrive - TRANSNATIONAL E-BUSINESS SOLUTIONS INC\EASYCALL\EASYCALL\JUSTKITCHEN\REVIEWS\Customer Feedback Aug 2023 - Feb 2024.xlsx'
target_comment = pd.read_excel(target_path)
target_comments = target_comment['Comments'].tolist()

# Handle NaN values in target_comments
target_comments = ["" if pd.isnull(comment) else comment for comment in target_comments]

sentiments = [analyze_sentiment(comment) for comment in target_comments]
X_target = pd.DataFrame({'Reviews': target_comments, 'Sentiment': sentiments})
predicted_categories = grid_search.predict(X_target['Reviews'])

df = pd.DataFrame({'Reviews': target_comments, 'Category': predicted_categories, 'Sentiment': sentiments})

df


[nltk_data] Downloading package punkt to C:\Users\Jerome
[nltk_data]     Pintucan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Jerome
[nltk_data]     Pintucan\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\Jerome
[nltk_data]     Pintucan\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package vader_lexicon to C:\Users\Jerome
[nltk_data]     Pintucan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Best hyperparameters:  {'clf__alpha': 0.1, 'tfidf__max_df': 0.7, 'tfidf__ngram_range': (1, 2)}
Accuracy: 0.8795986622073578
                        precision    recall  f1-score   support

Food Quality and Taste       0.87      0.99      0.92       211
        Order Accuracy       0.93      0.79      0.86        53
               Portion       0.90      0.38      0.53        24
               Pricing       0.80      0.36      0.50        11

              accuracy                           0.88       299
             macro avg       0.88      0.63      0.70       299
          weighted avg       0.88      0.88      0.87       299



Unnamed: 0,Reviews,Category,Sentiment
0,I think cheesy gyudon ramen is overpriced give...,Food Quality and Taste,neutral
1,its an ordinary taste of noodle soup,Food Quality and Taste,neutral
2,wish the pork katsu had more flavour ü•∫ but ...,Food Quality and Taste,positive
3,sadly the sauce wasnt secured enough tumapon t...,Food Quality and Taste,positive
4,pretty nice Japanese food.,Food Quality and Taste,positive
...,...,...,...
326,no chopsticks,Food Quality and Taste,negative
327,Please don‚Äôt forget the utensils when asked ...,Order Accuracy,positive
328,The katsudon tasted great and the portion size...,Food Quality and Taste,positive
329,There were spillage all over as the packaging ...,Food Quality and Taste,positive


In [4]:
output = pd.read_excel(r'C:\Users\Jerome Pintucan\OneDrive - TRANSNATIONAL E-BUSINESS SOLUTIONS INC\EASYCALL\EASYCALL\JUSTKITCHEN\REVIEWS\Feb - July 2023 Feedback Analysis.xlsx')

output

Unnamed: 0,Reviews,Category,Sentiment
0,best,Food Quality and Taste,positive
1,The Best Stewed Pork :) Thanks!,Food Quality and Taste,positive
2,,Food Quality and Taste,neutral
3,new jap comfort food,Food Quality and Taste,positive
4,super yummy!!!! My family loved it 😍,Food Quality and Taste,positive
...,...,...,...
822,,Food Quality and Taste,neutral
823,没有餐具，一份饭一个姑娘都吃不饱,Order Accuracy,negative
824,，齁咸齁咸 一锤子买卖 直接拉黑,Food Quality and Taste,negative
825,sorry hindi masarap ramen nila for its price. ...,Food Quality and Taste,negative


In [4]:
output_path = r'C:\Users\Jerome Pintucan\OneDrive - TRANSNATIONAL E-BUSINESS SOLUTIONS INC\EASYCALL\EASYCALL\JUSTKITCHEN\REVIEWS\Aug 2023 - Feb 2024 Feedback Analysis.xlsx'
df.to_excel(output_path, index=False)