In [88]:
# Utility: Preparation text

from contractions import CONTRACTION_MAP

##========== PREPARATION TEXT ===========##

# Contraction
def expand_contractions(sentence, contraction_mapping=CONTRACTION_MAP):
    """
    Expand the contractions in a sentence. For example don't => do not.
    
    Paramters:
    sentence (str): The input sentence to clean.
    contraction_mapping (dict): A dictionary for mapping contractions.
    
    
    Returns:
    str: The expanded contraction sentence.
    """
    
    contractions_pattern = re.compile('({})'.format('|'.join(contraction_mapping.keys())),
                                      flags=re.IGNORECASE|re.DOTALL)
    
    def expanded_match(contraction):
        """
        Filter for expanding the matched contraction.
        
        Parameters:
        contraction (str): The input of contraction
        
        Returns:
        str: The expanded contraction.
        """
        match = contraction.group(0)
        first_char = match[0]
        expanded_contraction = contraction_mapping.get(match) if contraction_mapping.get(match) else contraction_mapping.get(match.lower())
        
        expanded_contraction = first_char + expanded_contraction[1:]
        return expanded_contraction
    
    expanded_sentence = contractions_pattern.sub(expanded_match, sentence)
    return expanded_sentence


def remove_extra_spaces(sentence):
    # Use regex to replace multiple spaces with a single space
    return re.sub(r'\s+', ' ', sentence).strip()


def remove_non_ascii(text):
    """
    Remove all non-ASCII characters from the text.

    Parameters:
    text (str): The input text to clean.

    Returns:
    str: The cleaned text with only ASCII characters.
    
    """
    
    return ''.join([char for char in text if ord(char) < 128])

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import spacy

import re
import os
import json

import ast

from tqdm import tqdm

from nltk.corpus import stopwords
import gensim

In [30]:
from pattern.en import sentiment

In [22]:
# Load nlp model
nlp = spacy.load('en_core_web_lg')

In [24]:
df = pd.read_csv('example.csv')

# Convert the string columns to dictionaries
df['ability'] = df['ability'].apply(ast.literal_eval)
df['aspects'] = df['aspects'].apply(ast.literal_eval)


print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 91 entries, 0 to 90
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   index    91 non-null     int64 
 1   review   91 non-null     object
 2   ability  91 non-null     object
 3   aspects  91 non-null     object
dtypes: int64(1), object(3)
memory usage: 3.0+ KB
None


Unnamed: 0,index,review,ability,aspects
0,0,"The staff were incredibly helpful and patient,...","{0: [('staff', 'were patient'), ('staff', 'wer...","{'staff': ['staff were patient.', 'staff were ..."
1,1,I had a great experience purchasing my phone h...,"{0: [('process', 'was smooth'), ('process', 'w...","{'process': ['process was smooth.', 'process w..."
2,2,"Their selection of phones is amazing, and the ...","{0: [('price', 'are competitive'), ('selection...","{'price': ['price are competitive.'], 'selecti..."
3,3,I appreciate how the staff walked me through s...,"{0: [('I', 'appreciate walked me'), ('I', 'app...",{'new device': ['I appreciate walked through s...
4,4,"Great customer service, I left with the phone ...","{0: [('I', 'left with phone'), ('question', 'a...",{'question': ['question answered.']}


In [52]:
# Load JSON data from a file
with open('data-1.json', 'r') as json_file:
    data = json.load(json_file)

data

{'0': {'staff': {'ADJ': [[['helpful'], 'staff were helpful.'],
    [['patient'], 'staff were patient.']],
   'VERB': [],
   'OTHER': []}},
 '1': {'process': {'ADJ': [[['smooth'], 'process was smooth.'],
    [['quick'], 'process was quick.']],
   'VERB': [],
   'OTHER': []}},
 '2': {'selection': {'ADJ': [[['amazing'], 'selection is amazing.']],
   'VERB': [],
   'OTHER': []},
  'price': {'ADJ': [[['competitive'], 'price are competitive.']],
   'VERB': [],
   'OTHER': []}},
 '3': {'staff': {'ADJ': [],
   'VERB': [[['appreciate', 'walk'], 'I appreciate walked me.'],
    [['appreciate', 'walk'],
     'I appreciate walked through setting new device.']],
   'OTHER': []}},
 '4': {'question': {'ADJ': [],
   'VERB': [[['answer'], 'question answered.']],
   'OTHER': []}},
 '5': {'staff': {'ADJ': [],
   'VERB': [[['resist'], 'I could not resist upgrading.'],
    [['resist', 'offer'],
     'I could not resist offer amazing deals on phones.']],
   'OTHER': []}},
 '6': {'phones issue': {'ADJ': [],
 

In [53]:
# Function for Sentiment Analysis using Pattern
def pattern_sentiment_analysis(text):
    # Get the sentiment polarity and subjectivity
    polarity, subjectivity = sentiment(text)
    
    # Determine the sentiment based on polarity
    if polarity > 0:
        sentiment_label = "Positive"
    elif polarity < 0:
        sentiment_label = "Negative"
    else:
        sentiment_label = "Neutral"
    
    # Return the results
    return {
        "sentiment": sentiment_label,
        "polarity": polarity,
        "subjectivity": subjectivity
    }

In [54]:
pattern_sentiment_analysis("representative were rude on phone.")
pattern_sentiment_analysis('store was chaotic.')

{'sentiment': 'Neutral', 'polarity': 0.0, 'subjectivity': 0.0}

In [55]:
example = {'store': {'ADJ': [[['chaotic'], 'store was chaotic.']],
                     'VERB': [],
                     'OTHER': [[['staff'], 'store was with unhelpful staff.'],
                               [['lines'], 'store was with long lines.']]},
           'staff': {'ADJ': [],
                     'VERB': [],
                     'OTHER': [[['staff'], 'store was with unhelpful staff.']]},
           'long lines': {'ADJ': [],
                          'VERB': [],
                          'OTHER': [[['lines'], 'store was with long lines.']]}}

example

{'store': {'ADJ': [[['chaotic'], 'store was chaotic.']],
  'VERB': [],
  'OTHER': [[['staff'], 'store was with unhelpful staff.'],
   [['lines'], 'store was with long lines.']]},
 'staff': {'ADJ': [],
  'VERB': [],
  'OTHER': [[['staff'], 'store was with unhelpful staff.']]},
 'long lines': {'ADJ': [],
  'VERB': [],
  'OTHER': [[['lines'], 'store was with long lines.']]}}

In [None]:
for aspect, item in example.items():
    

In [67]:
def fine_sentiment(input_):
    new_data = {}
    if len(input_) > 0:
        for aspect, item in input_.items():
            dict_label = {}
            for label, data in item.items():
                temp = []
                if len(data) > 0:
                    texts = []
                    for d in data:
                        text = d[-1]
                        texts.append(text)
                        # print(text)
                        result = pattern_sentiment_analysis(text)['sentiment']
                        temp.append([d[0], d[1], result])
                # Update data label
                dict_label[label] = temp
                
            texts = ' '.join(texts)
            result = pattern_sentiment_analysis(texts)['sentiment']
            
            new_data[aspect] = {'data': dict_label,
                                'sentiment': result}
    return new_data

test = [fine_sentiment(x) for x in data.values()]

test

[{'staff': {'data': {'ADJ': [[['helpful'], 'staff were helpful.', 'Neutral'],
     [['patient'], 'staff were patient.', 'Neutral']],
    'VERB': [],
    'OTHER': []},
   'sentiment': 'Neutral'}},
 {'process': {'data': {'ADJ': [[['smooth'], 'process was smooth.', 'Positive'],
     [['quick'], 'process was quick.', 'Positive']],
    'VERB': [],
    'OTHER': []},
   'sentiment': 'Positive'}},
 {'selection': {'data': {'ADJ': [[['amazing'],
      'selection is amazing.',
      'Positive']],
    'VERB': [],
    'OTHER': []},
   'sentiment': 'Positive'},
  'price': {'data': {'ADJ': [[['competitive'],
      'price are competitive.',
      'Neutral']],
    'VERB': [],
    'OTHER': []},
   'sentiment': 'Neutral'}},
 {'staff': {'data': {'ADJ': [],
    'VERB': [[['appreciate', 'walk'], 'I appreciate walked me.', 'Neutral'],
     [['appreciate', 'walk'],
      'I appreciate walked through setting new device.',
      'Positive']],
    'OTHER': []},
   'sentiment': 'Positive'}},
 {'question': {'data'

In [68]:
testing = pd.DataFrame({'before': data.values(),
                        'after': test})

testing.to_excel('fail.xlsx', index=False)

In [96]:
import re

# Function to check if a specific word exists in text
def word_exists(word, text):
    # Create the regex pattern with word boundaries
    pattern = r'\b' + re.escape(word) + r'\b'
    
    # Search for the word in the text
    if re.search(pattern, text, re.IGNORECASE):
        return True
    else:
        return False

In [97]:
word_exists("representative", "representative were rude on phone.")

True

In [98]:
store = ["phone", "apple", "rude"]

store = [s for s in store if word_exists(s, "representative were rude on phone.")]


store    

['phone', 'rude']

In [112]:
def sentiment_analysis(data):
    text, aspects = data
    doc = nlp(text)

    storage = {}
    for sentence in doc.sents:
        sentiment = pattern_sentiment_analysis(sentence.text)

        # Get aspect of sentiment
        temp_aspects = [a for a in aspects if word_exists(a, sentence.text)]
        # If there is no aspect extracted, custom aspect might appear.
        if len(temp_aspects) == 0:
            # Condition from custom aspect
            temp_aspects = [a for a in aspects if a in ['staff', 'store']]

        for a in temp_aspects:
            store = storage.get(a)
            if not store or store.lower() != 'Negative':
                storage[a] = sentiment['sentiment']
    return storage



In [106]:
example = ('price are competitive. selection is amazing.', ['selection', 'price'])

sentiment_analysis(example)

{'price': 'Neutral', 'selection': 'Positive'}

In [107]:
sentiments = [pattern_sentiment_analysis(text)['sentiment'] for text in corpus]
sentiments

['Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Neutral',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Positive',
 'Neutral',
 'Positive',
 'Negative',
 'Neutral',
 'Negative',
 'Neutral',
 'Negative',
 'Negative',
 'Neutral',
 'Negative',
 'Neutral',
 'Positive',
 'Neutral',
 'Neutral',
 'Negative',
 'Negative',
 'Neutral',
 'Neutral',
 'Neutral',
 'Neutral',
 'Negative',
 'Neutral',
 'Negative',
 'Neutral',

In [108]:
aspect_based_sentiment = [sentiment_analysis(data) for data in zip(corpus, df['aspects'].values)]
aspect_based_sentiment

[{'staff': 'Neutral'},
 {'process': 'Positive'},
 {'selection': 'Positive', 'price': 'Neutral'},
 {'new device': 'Neutral'},
 {'question': 'Neutral'},
 {'upgrading': 'Neutral', 'deals': 'Positive'},
 {'technician': 'Positive'},
 {'experience': 'Positive', 'stuff': 'Positive'},
 {'variety': 'Positive', 'case': 'Positive'},
 {'staff': 'Neutral'},
 {'price': 'Positive'},
 {'staff': 'Positive'},
 {'service': 'Neutral'},
 {'deal': 'Positive', 'staff': 'Positive'},
 {'store': 'Positive', 'buying': 'Neutral'},
 {'old phone': 'Positive', 'deal': 'Positive'},
 {'service': 'Neutral'},
 {'staff': 'Neutral', 'phone': 'Neutral'},
 {'store': 'Positive'},
 {'staff': 'Neutral', 'variety': 'Neutral'},
 {'process': 'Neutral'},
 {'phone': 'Neutral'},
 {'phone': 'Positive'},
 {'service': 'Neutral'},
 {'store hands': 'Neutral'},
 {'staff': 'Positive'},
 {'deal': 'Neutral'},
 {'store': 'Positive', 'service': 'Positive'},
 {'staff': 'Neutral', 'lot': 'Neutral'},
 {'store': 'Positive'},
 {'phone': 'Positive'}

In [109]:
df['aspect_sentiment'] = aspect_based_sentiment
df['sentiment'] = sentiments

In [110]:
df.to_excel('demo-sentiment.xlsx', index=False)