In [1]:
import spacy
from spacy import displacy

import networkx as nx

import re

import pandas as pd

import xlwt
from xlwt import Workbook

from textblob import TextBlob
import pandas as pd

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns 

from scipy.stats import pearsonr
from statistics import mean

from tasks_python_files.Task1_Negation_cue_detection import *
from tasks_python_files.Task2_Rules_based_neg_scope_spacy import *

from nltk import pos_tag, word_tokenize, RegexpParser

from negspacy.negation import Negex
from negspacy.termsets import termset



###### calculates sentiment of input text

In [2]:
def get_sentiment(input_text):
    blob = TextBlob(input_text)
    return blob.sentiment.polarity

###### returns list of sentiment scores of reviews

In [3]:
def get_senti_list(df_review):
    senti_reviews_list = []
    for i in range(len(df_review)):
        senti_reviews_list.append(get_sentiment(str(df_review['reviews.text'][i])))
        
    return senti_reviews_list

In [4]:
def decontract(phrase):
    phrase = re.sub(r"[c|C]an\'t", "can not", phrase)
    phrase = re.sub(r"[w|W]on\'t", "will not", phrase)
    
    phrase = re.sub(r"n\'t", " not", phrase)
    return phrase

###### gets sentences of a review

In [5]:
nlp = spacy.load("en_core_web_sm")
def get_sents(input_text_col): 
    sentences=[]
    reviews=[]
    for i in range(len(input_text_col)):
        reviews.append(input_text_col[i])
        doc = nlp(input_text_col[i])
        sentences.append([])
        for sent in doc.sents:
            sent = decontract(sent.text)
            sentences[i].append(sent)
    return sentences,reviews

###### for a review returns negation cues and scope in txt_neg_cue and txt_neg_scope list:

In [6]:
def get_neg_scope_review(review):
    txt_neg_cue = []
    txt_neg_scope = []
    for j in range(len(review)):
        #print(review[j])
        doc = nlp(review[j])
        operating_negs, prefix_negs, suffix_negs, all_negs  = identify_all_negs(review[j])

        #print(all_negs)
        txt_neg_cue.append(all_negs)
        
        txt_neg_scope.append(get_neg_scope_rule3_new(doc, txt_neg_cue[j]))
    return txt_neg_cue, txt_neg_scope

###### for a review returns negation cues, negation scope and negated named entities  in txt_neg_cue and txt_neg_scope and txt_named_ents list:

In [7]:
def get_neg_cue_scope_named_ent(review_sents_list):
    txt_neg_cue = []
    txt_neg_scope = []
    txt_pos = []
    txt_named_ents = []
    for j in range(len(review_sents_list)):
        #print(review_sents_list[j])
        doc = nlp(review_sents_list[j])
        operating_negs, prefix_negs, suffix_negs, all_negs = identify_all_negs(review_sents_list[j])
        #print(all_negs)
        txt_neg_cue.append(all_negs)
        #neg_scope, part_of_speech, named_ents = get_neg_scope_pos_ne_rule3(doc, txt_neg_cue[j])
        neg_scope, part_of_speech, named_ents, scope_idx_l, token_idx_l = get_neg_scope_pos_ne_rule3_new(doc, txt_neg_cue[j])
        txt_neg_scope.append(neg_scope)
        txt_pos.append(part_of_speech)
        txt_named_ents.append(named_ents)
    return txt_neg_cue, txt_neg_scope, txt_pos, txt_named_ents

###### Negex output for a sentence:

In [8]:
def get_negex_output_sent(sentence):
    ts =termset("en")
    nlp = spacy.load("en_core_web_sm")

    #print(ts.get_patterns())
    nlp.add_pipe(
                "negex",
                config={
                    "neg_termset": ts.get_patterns()
                }
    )
    doc = nlp(sentence)
    ne = []
    ne_out = []
    for e in doc.ents:
        ne.append(e.text)
        ne_out.append(e._.negex)
    #print(ne, ne_out)
    return ne, ne_out

###### ne and negex out for single review which may/may not have multiple sentences:

In [9]:
def get_negex_output_review(review_sents_list):
    ne_lt = []
    ne_out_lt = []
    negex_review_count=0
    for i in range(len(review_sents_list)):
        ne, ne_out = get_negex_output_sent(review_sents_list[i])
        ne_lt.append(ne)
        ne_out_lt.append(ne_out)
    return ne_lt, ne_out_lt

###### ne and negex output for all reviews of dataset:

In [10]:
def get_negex_out_all_reviews(reviews_sents_list):
    ne_reviews_lt = []
    negex_out_reviews_lt = []
    for i in range(len(reviews_sents_list)):
#         ne_reviews_lt.append([])
#         negex_out_reviews_lt.append([])
        ne_lt, negex_out_lt= get_negex_output_review(reviews_sents_list[i])
        ne_reviews_lt.append(ne_lt)
        negex_out_reviews_lt.append(negex_out_lt)
    return ne_reviews_lt, negex_out_reviews_lt

###### file_path: passing this variable to get results over different subsets of original df. eg: filepath = 'neg_df_20.xls' for df_20 subset(first 20 
###### reviews_col: reviews.text column is passed eg: df_small['reviews.text']

In [11]:
def get_neg_scope_reviews_named_ent(ratings_col, reviews_col, file_path):
    negex_review_count = 0
    reviews_sents_list, reviews_lt = get_sents(reviews_col)
    
    # Workbook is created
    wb = Workbook()
    # add_sheet is used to create sheet.
    sheet1 = wb.add_sheet('Sheet 1')
    sum_count = 0
    #list of number of negations of each review of dataset
    neg_count_list = []
    
    sheet1.write(0, 0,"Reviews")
    sheet1.write(0, 1,"Ratings")
    sheet1.write(0, 2,"Sentiment of reviews")
    sheet1.write(0, 3,"Negation count")
    sheet1.write(0, 4,"Sentences in review")
    sheet1.write(0, 5,"Named Entities")
    sheet1.write(0, 6,"Negation cues")
    sheet1.write(0, 7,"Negation scope")
    sheet1.write(0, 8,"POS of negation scope")
    sheet1.write(0, 9,"Negex named entities")
    sheet1.write(0, 10,"Negex ne entities output")
    
    for i in range(len(reviews_sents_list)):
#         print("Review is: \n")
#         print(reviews_list[i])
        neg_cues, neg_scopes, part_of_speech, named_ents = get_neg_cue_scope_named_ent(reviews_sents_list[i])
#         print(str(neg_cues)+"\n")
#         print(str(neg_scopes)+"\n")
        
        negex_ne_l, negex_out_l = get_negex_output_review(reviews_sents_list[i])
        if negex_out_l.count(True) !=0:
            negex_review_count +=1
        
        review_sent_count = len(reviews_sents_list[i])
        sheet1.write(sum_count+1, 0,reviews_col[i])
        sheet1.write(sum_count+1, 1,str(ratings_col[i]))
        sheet1.write(sum_count+1, 2,get_sentiment(reviews_col[i]))
        
        count_negs=0
        for j in range(len(reviews_sents_list[i])):
            sheet1.write(sum_count+j+1, 4, reviews_sents_list[i][j])
            sheet1.write(sum_count+j+1, 5, repr(named_ents[j]))
            sheet1.write(sum_count+j+1, 6, repr(neg_cues[j]))
            sheet1.write(sum_count+j+1, 7, repr(neg_scopes[j]))
            sheet1.write(sum_count+j+1, 8, repr(part_of_speech[j]))
            sheet1.write(sum_count+j+1, 9, repr(negex_ne_l[j]))
            sheet1.write(sum_count+j+1, 10, repr(negex_out_l[j]))
            
            
            if len(neg_cues[j])!=0:
                c = len(neg_cues[j])
                count_negs = count_negs + c
        neg_count_list.append(count_negs)
        sheet1.write(sum_count+1, 3,count_negs)
        #print(count_negs)                
        sum_count = sum_count + review_sent_count
        wb.save(file_path)
    return neg_count_list, negex_review_count

###### Code to test above methods

In [12]:
# df_senti_negs = create_df_senti_neg_count(df, neg_count_lt)

# print("Correlation and p-value of sentiment and ratings of reviews: \n")
# print(correlation_between_variables(df_senti_negs['sentiment_score'], df_senti_negs['reviews.rating']))

# plot_two_columns(df_senti_negs['reviews.rating'], df_senti_negs['sentiment_score'], "Ratings per review", "Sentiment per review", "Sentiment vs Ratings", "corr_senti_vs_rating_df.png")

# print("Correlation and p-value of sentiment and negation count of review: \n")
# print(correlation_between_variables(df_senti_negs['sentiment_score'], df_senti_negs['negations_count']))

# plot_two_columns(df_senti_negs['negations_count'], df_senti_negs['sentiment_score'], "Negation count per review", "Sentiment per review", "Sentiment vs Negation count", "corr_senti_neg_count_df.png")

`Sentiment is calculated based on each review whereas number of negation, negation words and negation scope is calculated based on each sentence of the review. So, I have created 2 dataframes. 1st: consist of neg cues and scopes of each sentences of each review. 2nd: updated original dataset by adding sentiment column (calculated by textblob) of each review `

###### 1st dataframe
example:  #df_new, avg_senti_review_list = create_dataframe_neg_cues_scope(df_10)
###### columns:
reviews, sentence_in_review, sentiment_score, negation_cues, negation_scopes, neg_named_entities

In [13]:
def create_dataframe_neg_cues_scope(df_review):
    df_new = pd.DataFrame(columns = ['reviews','sentences_in_review'])

    reviews_sents_lt,reviews_lt = get_sents(df_review['reviews.text'])

    #get sentiment of each sentence of the review and avg sentiment of each review list   
    senti_sents_l = []
    for i in range(len(reviews_sents_lt)):
        senti_sents_l.append([])
        for j in range(len(reviews_sents_lt[i])):
            senti_sents_l[i].append(get_sentiment(reviews_sents_lt[i][j]))
    
    avg_senti_review = []
    for k in range(len(senti_sents_l)):
        avg_k = mean(senti_sents_l[k])
        avg_senti_review.append(avg_k)
    
    sents=[]
    ss_l=[]
    for t in range(len(reviews_sents_lt)):
        for m in range(len(reviews_sents_lt[t])):
            sents.append(reviews_sents_lt[t][m])
            ss_l.append(senti_sents_l[t][m])

    for i in range(len(reviews_sents_lt)):
        for j in range(len(reviews_sents_lt[i])):
            df_new = df_new.append(pd.DataFrame({'reviews':reviews_lt[i]}, index=[i]) )
    
    #inserting sentences_in_review column and sentiment scores of each sentence of each review
    df_new = df_new.assign(sentences_in_review = sents)        
    df_new = df_new.assign(sentiment_score = ss_l) 

    #inserting negation_cues and negation_scopes column
    cues_lt=[]
    scope_lt=[]
    pos_lt=[]
    ne_lt=[]
    negex_ne_lt = []
    negex_out_lt = []
    for k in range(len(reviews_sents_lt)):
        neg_cues, neg_scope, part_of_speech, ne = get_neg_cue_scope_named_ent(reviews_sents_lt[k])
        
        negex_ne_l, negex_out_l = get_negex_output_review(reviews_sents_lt[k])
        
        #print(ne)
        for p in range(len(neg_cues)):
            cues_lt.append(neg_cues[p])
            scope_lt.append(neg_scope[p])
            pos_lt.append(part_of_speech[p])
            ne_lt.append(ne[p])
            negex_ne_lt.append(negex_ne_l[p])
            negex_out_lt.append(negex_out_l[p])
    df_new = df_new.assign(named_entities = ne_lt)        
    df_new = df_new.assign(negation_cues = cues_lt)  
    df_new = df_new.assign(negation_scopes = scope_lt) 
    df_new = df_new.assign(POS_negation_scope = pos_lt)
    df_new = df_new.assign(negex_named_entities = negex_ne_lt)
    df_new = df_new.assign(negex_ne_output = negex_out_lt)
    
    
    #inserting each sentence of the review's sentiment score and return list of average sentiment score of each review

    return df_new, avg_senti_review

###### code to test above methods

In [14]:
# df = pd.read_csv("./datasets/Restaurant_Reviews.csv")
# df = df.drop('Unnamed: 0', axis='columns')
# df = df.rename(columns = {'Review':'reviews.text', 'Liked': 'reviews.rating'})

In [15]:
#df = pd.read_csv('./datasets/Datafiniti_Hotel_Reviews.csv')

In [16]:
#df_5 = df.head(5)

In [17]:
# output_path = './results_review/df_5_negs.xls'
# neg_count_lt, negex_out_cnt_list = get_neg_scope_reviews_named_ent(df_5['reviews.rating'], df_5['reviews.text'], output_path)

In [18]:
#df_neg_ne_pos, avg_senti_scores_lt = create_dataframe_neg_cues_scope(df_50)

###### 2nd dataframe
adding columns (sentiment_score, average sentiment score and negations_count ) to original dataset

In [19]:
def create_df_senti_neg_count(df_review, neg_count_list, avg_senti_review_list):
    #calculate senti of each review and add a column in df for senti of each review
    senti_list_reviews = get_senti_list(df_review)

    #df_senti : In original dataset df, sentiment score added as a new column and dataset renamed as df_senti
    df_senti_neg = df_review.assign(sentiment_score = senti_list_reviews)
    df_senti_neg = df_senti_neg.assign(negations_count = neg_count_list)
    df_senti_neg = df_senti_neg.assign(average_sentiment_score = avg_senti_review_list)
    return df_senti_neg

###### Finding correlation between two variables:

In [20]:
def correlation_between_variables(x,y):
    corr, p_val = pearsonr(x,y)
    #print(corr)
    return corr, p_val

###### method to plot sentiment score of each review vs ratings of each review:

In [21]:
def plot_ratings_senti_reviews(df_review, senti_review_list):
    #plt.style.use('seaborn-ticks')
    plt.scatter(df_review["reviews.rating"], senti_review_list)
    plt.ylabel('Review_sentiment',fontsize=10)
    plt.xlabel('Rating',fontsize=10)
    plt.title('Sentiment vs Rating', fontsize=15)
    #plt.savefig("rat_senti.png", dpi=500)
    plt.show()

###### Divide dataset based on ratings:  1 to 2: df_negative_review, 2 to 3 (2 not inclusive): df_neutral_review and 3 to 5 (3 not inclusive): df_postive_review: 

In [22]:
def create_df_negative_reviews(df_original, num_rows_needed):
    df_neg_reviews = df_original.loc[(df_original['reviews.rating'] >= 1) & (df_original['reviews.rating'] <= 2)].head(num_rows_needed)
    df_neg_reviews = df_neg_reviews.reset_index()
    df_neg_reviews = df_neg_reviews.rename(columns = {'index':'previous_index'})
    return df_neg_reviews

In [23]:
def create_df_neutral_reviews(df_original, num_rows_needed):
    df_neu_reviews = df_original.loc[(df_original['reviews.rating']>2) & (df_original['reviews.rating'] <= 3)].head(num_rows_needed)
    df_neu_reviews = df_neu_reviews.reset_index()
    df_neu_reviews = df_neu_reviews.rename(columns = {'index':'previous_index'})
    return df_neu_reviews

In [24]:
#not a general method for getting subset for postive reviews
#3 to 5 (3 not inclusive): df_postive_review
# num_rows_needed multiple of 4
def create_df_positive_reviews(df_original, num_rows_needed):
    i = int(num_rows_needed/4)
    
    d_1 = df_original.loc[(df_original['reviews.rating'] > 3) & (df_original['reviews.rating'] < 4)].head(i)
    d_2 = df_original.loc[df_original['reviews.rating'] == 4].head(i)
    d_3 = df_original.loc[(df_original['reviews.rating'] > 4) & (df_original['reviews.rating'] < 5)].head(i)
    d_4 = df_original.loc[df_original['reviews.rating'] == 5].head(i)
    df_pos_reviews = d_1.append(d_2,ignore_index=False,sort=False)
    df_pos_reviews = df_pos_reviews.append(d_3,ignore_index=False,sort=False)
    df_pos_reviews = df_pos_reviews.append(d_4,ignore_index=False,sort=False)
    df_pos_reviews = df_pos_reviews.reset_index()
    df_pos_reviews = df_pos_reviews.rename(columns = {'index':'previous_index'})
    return df_pos_reviews

In [25]:
def plot_senti_vs_neg_count(no_of_neg_list, senti_review_list):
    plt.scatter(no_of_neg_list, senti_review_list)
    plt.ylabel('Review_sentiment',fontsize=10)
    plt.xlabel('Negation Count',fontsize=10)
    plt.title('Sentiment vs Negation count', fontsize=15)
    plt.savefig("rat_senti.png", dpi=500)
    plt.show()

###### df division based on sentiment score
`-1 to 0(not inclusive) : negative ` `0: neutral `,  `0(not inclusive) to 1: positive` 

In [26]:
def get_df_neg_reviews_senti(df_original, num_rows_needed):
    df_neg_reviews = df_original.loc[(df_original['sentiment_score'] < 0) & (df_original['sentiment_score'] >= -1)].head(num_rows_needed)
    df_neg_reviews = df_neg_reviews.reset_index()
    df_neg_reviews = df_neg_reviews.rename(columns = {'index':'previous_index'})
    return df_neg_reviews

In [27]:
def get_df_neu_reviews_senti(df_original, num_rows_needed):
    df_neu_reviews = df_original.loc[df_original['sentiment_score'] == 0].head(num_rows_needed)
    df_neu_reviews = df_neu_reviews.reset_index()
    df_neu_reviews = df_neu_reviews.rename(columns = {'index':'previous_index'})
    return df_neu_reviews

In [28]:
def get_df_pos_reviews_senti(df_original, num_rows_needed):
    df_pos_reviews = df_original.loc[(df_original['sentiment_score'] > 0) & (df_original['sentiment_score'] <= 1)].head(num_rows_needed)
    df_pos_reviews = df_pos_reviews.reset_index()
    df_pos_reviews = df_pos_reviews.rename(columns = {'index':'previous_index'})
    return df_pos_reviews

In [29]:
#method only for ratings
def plot_two_columns(df_review_col1, df_review_col2, x_label, y_label, title,filename):
    #plt.style.use('seaborn-ticks')
    plt.scatter(df_review_col1, df_review_col2)
    plt.xlabel(x_label,fontsize=10)
    plt.ylabel(y_label,fontsize=10)
    plt.title(title, fontsize=15)
    plt.savefig("./results_pics/"+filename, dpi=500)
    plt.show()

###### For a review returns negation cues such as: not, no in one list, affixed cues in second list and suffixed cues in third list and similarly for scope and pos:

In [30]:
def get_neg_cue_scope_modified(review_sents_list):
    txt_neg_cue1 = []
    txt_neg_cue2 = []
    txt_neg_cue3 = []
    
    txt_neg_scope1 =[]
    txt_neg_scope2 =[]
    txt_neg_scope3 =[]
    
    txt_pos1 = []
    txt_pos2 = []
    txt_pos3 = []
    
    txt_named_ents = []
    for j in range(len(review_sents_list)):
        #print(review_sents_list[j]) 
        doc = nlp(review_sents_list[j])
        
        input_txt_tokens = word_tokenize(review_sents_list[j])
        input_txt_tokens = [t.lower() for t in input_txt_tokens]
        
        neg_cue_list1 = identify_negative_words(input_txt_tokens)
        txt_neg_cue1.append(neg_cue_list1)
        neg_scope1, parts_of_speech1, named_ents1, scope_idx_l1, token_idx_l1 = get_neg_scope_pos_ne_rule3_new(doc, txt_neg_cue1[j])
        txt_neg_scope1.append(neg_scope1)
        txt_pos1.append(parts_of_speech1)
        
        txt_named_ents.append(named_ents1)
        
        neg_cue_list2 = identify_prefix_neg_words(input_txt_tokens)
        txt_neg_cue2.append(neg_cue_list2)
        neg_scope2, parts_of_speech2, named_ents2, scope_idx_l2, token_idx_l2 = get_neg_scope_pos_ne_rule3_new(doc, txt_neg_cue2[j])
        txt_neg_scope2.append(neg_scope2)
        txt_pos2.append(parts_of_speech2)
        
        neg_cue_list3 = identify_suffix_neg_words(input_txt_tokens)
        txt_neg_cue3.append(neg_cue_list3)
        neg_scope3, parts_of_speech3, named_ents3, scope_idx_l3, token_idx_l3 = get_neg_scope_pos_ne_rule3_new(doc, txt_neg_cue3[j])
        txt_neg_scope3.append(neg_scope3)
        txt_pos3.append(parts_of_speech3)
        
    return txt_neg_cue1, txt_neg_cue2, txt_neg_cue3, txt_neg_scope1, txt_neg_scope2, txt_neg_scope3, txt_pos1, txt_pos2, txt_pos3, txt_named_ents

In [31]:
#a, b, c, s1,s2,s3, ne1,ne2,ne3 = get_neg_cue_scope_part1(reviews_sents_lt1[17])

###### 1st dataframe :: negation cues & scopes are listed in different based on operating, affix & suffix negations:
example: df_new, avg_senti_review_list = create_dataframe_neg_cues_scope(df_10)

columns:

reviews,sentence_in_review, sentiment_score, negation_cues, negation_scopes, negation_affix_scopes, 
negation_suffix_scopes & neg_named_entities

In [32]:
def create_dataframe_neg_cues_scope_modified(df_review):
    df_new = pd.DataFrame(columns = ['reviews','sentences_in_review'])

    reviews_sents_lt,reviews_lt = get_sents(df_review['reviews.text'])

    #get sentiment of each sentence of the review and avg sentiment of each review list   
    senti_sents_l = []
    for i in range(len(reviews_sents_lt)):
        senti_sents_l.append([])
        for j in range(len(reviews_sents_lt[i])):
            #print(type(reviews_sents_lt[i][j]))
            senti_sents_l[i].append(get_sentiment(reviews_sents_lt[i][j]))
    
    avg_senti_review = []
    for k in range(len(senti_sents_l)):
        avg_k = mean(senti_sents_l[k])
        avg_senti_review.append(avg_k)
    
    sents=[]
    ss_l=[]
    for t in range(len(reviews_sents_lt)):
        for m in range(len(reviews_sents_lt[t])):
            sents.append(reviews_sents_lt[t][m])
            ss_l.append(senti_sents_l[t][m])

    for i in range(len(reviews_sents_lt)):
        for j in range(len(reviews_sents_lt[i])):
            df_new = df_new.append(pd.DataFrame({'reviews':reviews_lt[i]}, index=[i]) )
    
    #inserting sentences_in_review column and sentiment scores of each sentence of each review
    df_new = df_new.assign(sentences_in_review = sents)        
    df_new = df_new.assign(sentiment_score = ss_l) 

    #inserting negation_cues and negation_scopes column
    cues_lt1=[]
    cues_lt2=[]
    cues_lt3=[]
    scope_lt1=[]
    scope_lt2=[]
    scope_lt3=[]
    pos_lt1=[]
    pos_lt2=[]
    pos_lt3=[]
    ne_lt = []
    for k in range(len(reviews_sents_lt)):
             
        neg_cues1, neg_cues2, neg_cues3, neg_scope1, neg_scope2, neg_scope3, pos1, pos2, pos3, ne = get_neg_cue_scope_modified(reviews_sents_lt[k])
        
        #print(ne)
        for p in range(len(neg_cues1)):
            cues_lt1.append(neg_cues1[p])
            cues_lt2.append(neg_cues2[p])
            cues_lt3.append(neg_cues3[p])
            scope_lt1.append(neg_scope1[p])
            scope_lt2.append(neg_scope2[p])
            scope_lt3.append(neg_scope3[p])
            pos_lt1.append(pos1[p])
            pos_lt2.append(pos2[p])
            pos_lt3.append(pos3[p])
            ne_lt.append(ne[p])
            
    df_new = df_new.assign(named_entities = ne_lt)
    df_new = df_new.assign(negation_cues = cues_lt1) 
    df_new = df_new.assign(negation_affix_cues = cues_lt2)
    df_new = df_new.assign(negation_suffix_cues = cues_lt3)
    df_new = df_new.assign(negation_scopes = scope_lt1)
    df_new = df_new.assign(negation_affix_scopes = scope_lt2)
    df_new = df_new.assign(negation_suffix_scopes = scope_lt3)
    df_new = df_new.assign(neg_scope_pos = pos_lt1)
    df_new = df_new.assign(neg_affix_scope_pos = pos_lt2)
    df_new = df_new.assign(neg_suffix_scope_pos = pos_lt3)
    
    
    
    #inserting each sentence of the review's sentiment score and return list of average sentiment score of each review

    return df_new, avg_senti_review

In [33]:
#d, avg_senti= create_dataframe_neg_cues_scope_modified(df_5)

###### for a review returns:: negation cues, negation scopes with negation cue, pos and named entities:

In [34]:
def get_neg_cue_scope_with_neg(reviews_list):
    txt_neg_cue = []
    txt_neg_scope = []
    txt_pos = []
    txt_named_ents = []
    for j in range(len(reviews_list)):
        #print(review[j])
        doc = nlp(reviews_list[j])
        operating_negs, prefix_negs, suffix_negs, all_neg = identify_all_negs(reviews_list[j])
        
        #print(all_neg)
        txt_neg_cue.append(all_neg)
        #neg_scope, part_of_speech, named_ents, scope_idx_l, token_idx_l = get_neg_scope_pos_ne_rule3_new(doc, txt_neg_cue[j])
        neg_scope, part_of_speech, named_ents = get_neg_scope_with_neg(doc, txt_neg_cue[j])
        txt_neg_scope.append(neg_scope)
        txt_pos.append(part_of_speech)
        txt_named_ents.append(named_ents)
    return txt_neg_cue, txt_neg_scope, txt_pos, txt_named_ents

###### 1st dataframe :: negation cues, negation scopes with negation cue, pos and named entities. 
example: df_new, avg_senti_review_list = create_dataframe_neg_cues_scope(df_10)

columns:

reviews,sentence_in_review, sentiment_score, negation_cues, negation_scopes, neg_named_entities

In [35]:
def create_dataframe_neg_cues_scope_with_neg(df_review):
    df_new = pd.DataFrame(columns = ['reviews','sentences_in_review'])

    reviews_sents_lt,reviews_lt = get_sents(df_review['reviews.text'])

    #get sentiment of each sentence of the review and avg sentiment of each review list   
    senti_sents_l = []
    for i in range(len(reviews_sents_lt)):
        senti_sents_l.append([])
        for j in range(len(reviews_sents_lt[i])):
            senti_sents_l[i].append(get_sentiment(reviews_sents_lt[i][j]))
    
    avg_senti_review = []
    for k in range(len(senti_sents_l)):
        avg_k = mean(senti_sents_l[k])
        avg_senti_review.append(avg_k)
    
    sents=[]
    ss_l=[]
    for t in range(len(reviews_sents_lt)):
        for m in range(len(reviews_sents_lt[t])):
            sents.append(reviews_sents_lt[t][m])
            ss_l.append(senti_sents_l[t][m])

    for i in range(len(reviews_sents_lt)):
        for j in range(len(reviews_sents_lt[i])):
            df_new = df_new.append(pd.DataFrame({'reviews':reviews_lt[i]}, index=[i]) )
    
    #inserting sentences_in_review column and sentiment scores of each sentence of each review
    df_new = df_new.assign(sentences_in_review = sents)        
    df_new = df_new.assign(sentiment_score = ss_l) 

    #inserting negation_cues and negation_scopes column
    cues_lt=[]
    scope_lt=[]
    pos_lt=[]
    ne_lt=[]
    negex_ne_lt = []
    negex_out_lt = []
    for k in range(len(reviews_sents_lt)):
        neg_cues, neg_scope, part_of_speech, ne = get_neg_cue_scope_with_neg(reviews_sents_lt[k])
        
        negex_ne_l, negex_out_l = get_negex_output_review(reviews_sents_lt[k])
        
        #print(ne)
        for p in range(len(neg_cues)):
            cues_lt.append(neg_cues[p])
            scope_lt.append(neg_scope[p])
            pos_lt.append(part_of_speech[p])
            ne_lt.append(ne[p])
            negex_ne_lt.append(negex_ne_l[p])
            negex_out_lt.append(negex_out_l[p])
    df_new = df_new.assign(named_entities = ne_lt)        
    df_new = df_new.assign(negation_cues = cues_lt)  
    df_new = df_new.assign(negation_scopes = scope_lt) 
    df_new = df_new.assign(POS_negation_scope = pos_lt)
    df_new = df_new.assign(negex_named_entities = negex_ne_lt)
    df_new = df_new.assign(negex_ne_output = negex_out_lt)

    return df_new, avg_senti_review

###### get negation scope output of all reviews (pass reviews column of the dataset in below method)

In [36]:
def get_neg_scope_reviews(reviews_col):
    reviews_sents_list, reviews_lt = get_sents(reviews_col)
    
    #list of number of negations of each review of dataset
    neg_count_list = []
    neg_scopes_list = []
    for i in range(len(reviews_sents_list)):
        neg_cues, neg_scopes, part_of_speech, named_ents = get_neg_cue_scope_named_ent(reviews_sents_list[i])
        neg_scopes_list.append(neg_scopes)
        
        #negex_ne_l, negex_out_l = get_negex_output_review(reviews_sents_list[i])

        count_negs=0
        for j in range(len(reviews_sents_list[i])):
            if len(neg_cues[j])!=0:
                c = len(neg_cues[j])
                count_negs = count_negs + c
        neg_count_list.append(count_negs)

    return neg_count_list, neg_scopes_list

###### get negex output of all reviews (pass reviews column of the dataset in below method)

In [37]:
def get_negex_reviews(reviews_col):
    reviews_sents_list, reviews_lt = get_sents(reviews_col)
    ne_negated_list = []
    negex_neg_review_count = 0
    for i in range(len(reviews_sents_list)):
        negex_ne_l, negex_out_l = get_negex_output_review(reviews_sents_list[i])
       
        for x in negex_out_l:
            #print(x)
            if x==True:
                negex_neg_review_count +=1
            break
    return ne_negated_list, negex_neg_review_count