In [None]:
from random import choice
from tqdm.notebook import tqdm
from json import loads
from pprint import pprint
from textwrap import dedent
import re

from collections import Counter
import seaborn as sb
import matplotlib.pyplot as plt

In [None]:
import os 
import logging
import sys

# Setup OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = ""

# Setup logging

log = logging.getLogger(__name__)
logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.INFO)

# Update sys.path (or use PYTHONPATH)

sys.path.insert(0, '..')

#### Get Data

In [None]:
import pandas as pd

df = pd.read_csv("./data/input/laptop_quad_test_input.csv")
df=df.dropna(subset=["text"])
df.head()

In [None]:
df.shape

In [None]:
def fixJSON(jsonStr):
    try:
        jsonStr = re.sub(r'\\', '', jsonStr)

        jsonStr = re.sub(r'{"', '{\"', jsonStr)
        jsonStr = re.sub(r'{ "', '{"', jsonStr)
        jsonStr = re.sub(r'"}', '\"}', jsonStr)
        jsonStr = re.sub(r'" }', '\"}', jsonStr)

        jsonStr = re.sub(r'":"', '\":\"', jsonStr)
        jsonStr = re.sub(r'" : "', '\":\"', jsonStr)
        jsonStr = re.sub(r'":', '\":', jsonStr)
        jsonStr = re.sub(r'" :', '\":', jsonStr)
        jsonStr = re.sub(r':"', ':\"', jsonStr)
        jsonStr = re.sub(r': "', ':\"', jsonStr)

        jsonStr = re.sub(r'","', '\",\"', jsonStr)
        jsonStr = re.sub(r'" , "', '\",\"', jsonStr)
        jsonStr = re.sub(r'",', '\",', jsonStr)
        jsonStr = re.sub(r'" ,', '\",', jsonStr)
        jsonStr = re.sub(r',"', ',\"', jsonStr)
        jsonStr = re.sub(r', "', ',\"', jsonStr)

        jsonStr = re.sub(r'\["', '\[\"', jsonStr)
        jsonStr = re.sub(r'"\]', '\"\]', jsonStr)

        split_1 = jsonStr.split('[')
        split_1 = '['+split_1[1]
#         print(split_1)
        split_2 = split_1.split(']')
        split_2 = split_2[0]+']'

        jsonStr = split_2
        
        print(loads(jsonStr))
        
        return loads(jsonStr)
    except:
        return []

#### Configure GPT-3

In [None]:
import openai
from textwrap import dedent

openai.api_key = os.getenv("OPENAI_API_KEY")

ABSA_PROMPT = dedent(
    f"""
    Please extract aspect categories, aspect terms, related segments and related sentiments from the following text and format output in JSON:

    This laptop is lightweight and has a decent keyboard but it has a slow processor.

    [
      {{ "category": "Design_Features", aspect": "Laptop", "segment": "This laptop is lightweight, "sentiment": "positive" }},
      {{ "category": "General", "aspect": "Keyboard", "segment": "has a decent keyboard", "sentiment": "neutral" }},
      {{ "category": "Operation_Performance", "aspect": "CPU", "segment": "it has a slow processor", "sentiment": "negative" }}
    ]
"""
)

# ABSA_PROMPT = dedent(
#     f"""
#     Please extract aspect categories, aspect terms, related segments and related sentiments from the following text and format output in JSON:

#     The menu has small variety of food. The drinks are quite good though, so the restaurant is not that bad but not special either.

#     [
#       {{ "category": "Style_Options", aspect": "Food", "segment": "The menu has small variety of food, "sentiment": "negative" }},
#       {{ "category": "Quality", "aspect": "Drinks", "segment": "The drinks are quite good", "sentiment": "positive" }},
#       {{ "category": "General", "aspect": "Restaurant", "segment": "the restaurant is not that bad but not special either", "sentiment": "neutral" }}
#     ]
# """
# )



def analyze(
    text,
    prompt_text=ABSA_PROMPT,
    extra_prompt="",
    temperature=0.5,
#     max_tokens=128,
    max_tokens=2048,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
):
    prompt = f"{prompt_text}\n{extra_prompt}\n{text}"

    return openai.Completion.create(
        model="text-davinci-002",
        prompt=prompt,
        temperature=temperature,
        max_tokens=max_tokens,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        presence_penalty=presence_penalty,
    )

#### Run GPT-3

In [None]:
analysis_results = []
extra_prompts = []

logging.getLogger("openai").setLevel(logging.INFO)
logging.getLogger("requests").setLevel(logging.WARNING)

for i in tqdm(range(len(df)), desc="Analyzing reviews"):
    text = df.loc[i, "text"]

    log.info(f"Analyzing feedback - \nText: {text}\n")

    extra_prompt = choice(extra_prompts) if extra_prompts else ""

    res = analyze(
        text=text,
        extra_prompt="",
#         max_tokens=1024,
        temperature=0.1,
        top_p=1,
    )

    raw_json = res["choices"][0]["text"].strip()
#     print(res["choices"][0]["text"])
#     print(raw_json)
    try:
        json_data = loads(raw_json)
        analysis_results.append(json_data)
        log.debug(f"JSON response: {pprint(json_data)}")
        extra_prompts.append(f"\n{text}\n{raw_json}")
    except Exception as e:
        log.error(f"Failed to parse '{raw_json}' -> {e}")
        analysis_results.append(fixJSON(raw_json))
        
df["analysis"] = analysis_results
df.to_csv("./data/output/laptop/laptop_out_exp2.csv", index=False)


#### Re-formatting output data 

In [None]:
import pandas as pd

df = pd.read_csv("./data/output/laptop/laptop_out_exp2.csv")

df

In [None]:
from ast import literal_eval

df.analysis = df.analysis.apply(literal_eval)

analysis_results = df.analysis

analysis_results[:5]

In [None]:
annotations = []

for i, entry in enumerate(analysis_results):
    for a in entry:
        a["review_id"] = i
        annotations.append(a)

analysis_df = pd.DataFrame(annotations)

analysis_df.to_csv("./data/output/laptop/laptop_analysis_exp2.csv", index=False)

analysis_df

In [None]:
# Sample n feedbacks
df_sample = analysis_df.sample(n=10).reset_index()

df_sample.to_csv("./data/output/laptop/laptop_analysis_sample_exp2.csv")

In [None]:
def format_output(analysis):
    term = []
    pol = []
    seg = []
    cat = []
    for i in analysis:
        term.append(i["aspect"])
        cat.append(i["category"])
        pol.append(i["sentiment"])
        seg.append(i["segment"])
    return pd.Series([term, cat, pol, seg])
df[["term", "cat", "pol", "seg"]] = df.apply(lambda x: format_output(x["analysis"]), axis=1)

In [None]:
df_temp = df.copy()
def change_sentiment_labels(data):
    pol_list = []
    for i in data:
        if i == "negative":
            pol_list.append(0)
        elif i == "neutral":
            pol_list.append(1)
        elif i == "positive":
            pol_list.append(2)
    return pol_list
df_temp['pol']=df_temp['pol'].apply(lambda x: change_sentiment_labels(x))

In [None]:
df_temp

In [None]:
df_temp.to_csv("./data/output/laptop/laptop_out_final_exp2.csv", index=False)

In [None]:
df2 = pd.DataFrame()

In [None]:
list_term = []
list_pol = []
list_cat = []

def terms_pol(term, cat, pol):
    for i in range(0, len(term)):
        list_term.append(term[i])
        list_cat.append(cat[i])
        list_pol.append(pol[i])
_ = df.apply(lambda x: terms_pol(x["term"], x["cat"], x["pol"]), axis=1)

In [None]:
df2 = pd.DataFrame({'terms': list_term, 'cat': list_cat, 'pol': list_pol})

In [None]:
df2.head()

#### Analyse output

In [None]:
df2['cat'].value_counts()

In [None]:
df2['terms'].value_counts()

In [None]:
df2['pol'].value_counts()

In [None]:
# df_t = df2.groupby('terms').pol.value_counts()
df_cat = df2.groupby(['cat','pol']).size().reset_index(name='counts')
df_terms = df2.groupby(['terms','pol']).size().reset_index(name='counts')
df_terms_cat = df2.groupby(['cat','terms']).size().reset_index(name='counts')

In [None]:
df_cat

In [None]:
df_terms

In [None]:
df_terms_cat

In [None]:
df_terms_cat.sort_values(by=['counts'],ascending=False)

In [None]:
df_terms_cat.groupby(["cat", "terms"])["counts"].count()

In [None]:
percent = df2.value_counts(normalize=True).sort_index()
sb.countplot(x=df2['cat'])
plt.xticks(rotation=45, 
           horizontalalignment='right',
           fontweight='light',
           fontsize='x-large')
plt.show()
print(percent)

In [None]:
percent = df2.value_counts(normalize=True).sort_index()
sb.countplot(x=df2['terms'])
plt.xticks(rotation=45, 
           horizontalalignment='right',
           fontweight='light',
           fontsize='x-large')
plt.show()
print(percent)

In [None]:
import plotly.express as px

In [None]:
fig = px.bar(
    df_cat,
    x="cat",
    y="counts",
    color="pol",
    barmode="stack",
    color_discrete_map={
        "positive": "#52AC5E",
        "negative": "#e34a2d",
        "neutral": "gray",
    },
    title="Categories vs Polarity",
    template="plotly_white",
)

fig.show()

In [None]:
fig = px.bar(
    df_terms,
    x="terms",
    y="counts",
    color="pol",
    barmode="stack",
    color_discrete_map={
        "positive": "#52AC5E",
        "negative": "#e34a2d",
        "neutral": "gray",
    },
    title="Aspect Terms vs Polarity",
    template="plotly_white",
)

fig.show()


In [None]:
fig = px.bar(
    df_terms_cat,
    x="cat",
    y="counts",
    color="terms",
    barmode="stack",
    color_discrete_map={
        "positive": "#52AC5E",
        "negative": "#e34a2d",
        "neutral": "gray",
    },
    title="Categories vs Aspect Terms",
    template="plotly_white",
)

fig.show()

In [None]:
fig = px.pie(df_cat, values='counts', names='cat', title='Categories Count')
fig.show()

In [None]:
fig = px.pie(df_terms, values='counts', names='terms', title='Terms Count')

fig.show()

In [None]:
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
# from nltk.corpus import stopwords
# stop_words = set(stopwords.words('english'))
# stop_words.update(["laptop", "computer"])

In [None]:
# Start with one review:
text = " ".join(category for category in df2.loc[df2['pol']=="positive", "cat"])

# Create and generate a word cloud image:
# wordcloud = WordCloud(max_words=100, stopwords=stop_words).generate(text)
wordcloud = WordCloud(max_words=100).generate(text)
# max_font_size=50, background_color="white"

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
# Start with one review:
text = " ".join(category for category in df2.loc[df2['pol']=="negative", "cat"])

# Create and generate a word cloud image:
# wordcloud = WordCloud(max_words=100, stopwords=stop_words).generate(text)
wordcloud = WordCloud(max_words=100).generate(text)
# max_font_size=50, background_color="white"

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
# Start with one review:
text = " ".join(category for category in df2.loc[df2['pol']=="neutral", "cat"])

# Create and generate a word cloud image:
# wordcloud = WordCloud(max_words=100, stopwords=stop_words).generate(text)
wordcloud = WordCloud(max_words=100).generate(text)
# max_font_size=50, background_color="white"

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
# Start with one review:
text = " ".join(term for term in df2.loc[df2['pol']=="positive","terms"])

# Create and generate a word cloud image:
# wordcloud = WordCloud(max_words=100, stopwords=stop_words).generate(text)
wordcloud = WordCloud(max_words=100).generate(text)
# max_font_size=50, background_color="white"

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
# Start with one review:
text = " ".join(term for term in df2.loc[df2['pol']=="negative","terms"])

# Create and generate a word cloud image:
# wordcloud = WordCloud(max_words=100, stopwords=stop_words).generate(text)
wordcloud = WordCloud(max_words=100).generate(text)
# max_font_size=50, background_color="white"

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
# Start with one review:
text = " ".join(term for term in df2.loc[df2['pol']=="neutral","terms"])

# Create and generate a word cloud image:
# wordcloud = WordCloud(max_words=100, stopwords=stop_words).generate(text)
wordcloud = WordCloud(max_words=100).generate(text)
# max_font_size=50, background_color="white"

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
import plotly.graph_objects as go

fig1 = go.Figure(data=[go.Pie(labels=df_cat.pol, values=df_cat.counts, hole=.3, title='Polarity Count')])
fig1.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=2, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}], [{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_cat.loc[df_cat['pol']=="positive",'cat'], 
                     values=df_cat.loc[df_cat['pol']=="positive",'counts'], 
                      name="pos"), 1, 1) #scalegroup='on',
fig.add_trace(go.Pie(labels=df_cat.loc[df_cat['pol']=="neutral",'cat'], 
                     values=df_cat.loc[df_cat['pol']=="neutral",'counts'], 
                     name="neu"), 2, 1) #scalegroup='on',
fig.add_trace(go.Pie(labels=df_cat.loc[df_cat['pol']=="negative",'cat'], 
                     values=df_cat.loc[df_cat['pol']=="negative",'counts'], 
                     name="neg"), 1, 2) #scalegroup='on'
fig.add_trace(go.Pie(labels=df_cat['pol'], 
                     values=df_cat['counts'], 
                     name="pol"), 2, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.3, hoverinfo="label+percent+name")
fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))


fig.show()

In [None]:
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=2, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}], [{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_terms.loc[df_terms['pol']=="positive",'terms'], 
                     values=df_terms.loc[df_terms['pol']=="positive",'counts'], 
                     name="pos"), 1, 1) #scalegroup='on'
fig.add_trace(go.Pie(labels=df_terms.loc[df_terms['pol']=="neutral",'terms'], 
                     values=df_terms.loc[df_terms['pol']=="neutral",'counts'], 
                     name="neu"), 2, 1) #scalegroup='on'
fig.add_trace(go.Pie(labels=df_terms.loc[df_terms['pol']=="negative",'terms'], 
                     values=df_terms.loc[df_terms['pol']=="negative",'counts'], 
                     name="neg"), 1, 2) #scalegroup='on'
fig.add_trace(go.Pie(labels=df_terms['pol'], 
                     values=df_terms['counts'], 
                     name="pol"), 2, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))


fig.show()

In [None]:
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels=df_terms_cat['cat'], 
                     values=df_terms_cat['counts'], 
                     name="category"), 1, 1)
fig.add_trace(go.Pie(labels=df_terms_cat['terms'], 
                     values=df_terms_cat['counts'], 
                     name="term"), 1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))


fig.show()

In [None]:
df_cat.loc[df_cat['counts'] < 20, 'cat'] = 'Others' # Represent less frequently observerd terms
fig = px.pie(df_cat, values='counts', names='cat', title='Categories Count')
fig.show()

In [None]:
df_terms.loc[df_terms['counts'] < 20, 'terms'] = 'Others' # Represent less frequently observerd terms
fig = px.pie(df_terms, values='counts', names='terms', title='Terms Count')
fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=2, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}], [{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_cat.loc[df_cat['pol']=="positive",'cat'], 
                     values=df_cat.loc[df_cat['pol']=="positive",'counts'], 
                      name="pos"), 1, 1) #scalegroup='on',
fig.add_trace(go.Pie(labels=df_cat.loc[df_cat['pol']=="neutral",'cat'], 
                     values=df_cat.loc[df_cat['pol']=="neutral",'counts'], 
                     name="neu"), 2, 1) #scalegroup='on',
fig.add_trace(go.Pie(labels=df_cat.loc[df_cat['pol']=="negative",'cat'], 
                     values=df_cat.loc[df_cat['pol']=="negative",'counts'], 
                     name="neg"), 1, 2) #scalegroup='on'
fig.add_trace(go.Pie(labels=df_cat['pol'], 
                     values=df_cat['counts'], 
                     name="pol"), 2, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.3, hoverinfo="label+percent+name")


fig.show()

In [None]:
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=2, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}], [{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_terms.loc[df_terms['pol']=="positive",'terms'], 
                     values=df_terms.loc[df_terms['pol']=="positive",'counts'], 
                     name="pos"), 1, 1) #scalegroup='on'
fig.add_trace(go.Pie(labels=df_terms.loc[df_terms['pol']=="neutral",'terms'], 
                     values=df_terms.loc[df_terms['pol']=="neutral",'counts'], 
                     name="neu"), 2, 1) #scalegroup='on'
fig.add_trace(go.Pie(labels=df_terms.loc[df_terms['pol']=="negative",'terms'], 
                     values=df_terms.loc[df_terms['pol']=="negative",'counts'], 
                     name="neg"), 1, 2) #scalegroup='on'
fig.add_trace(go.Pie(labels=df_terms['pol'], 
                     values=df_terms['counts'], 
                     name="pol"), 2, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")


fig.show()

In [None]:
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels=df_terms_cat['cat'], 
                     values=df_terms_cat['counts'], 
                     name="category"), 1, 1)
fig.add_trace(go.Pie(labels=df_terms_cat['terms'], 
                     values=df_terms_cat['counts'], 
                     name="term"), 1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))


fig.show()

#### Display results in HTML

This will display the annotated feedbacks in a prettier way using HTML.

In [None]:
import re
from IPython.display import display, HTML
from html import escape

css = """
<style>
    .container {
        background-color: #fff;
        padding: 15px
    }

    p.feedback {
        margin-top: 5px;
        color: #595f6d;
        line-height: 2
    }

    .annotation {
        color: #777;
        padding: 2px;
        font-weight: bold !important;
        border-radius: 1px;
        border-bottom: 4px solid;
    }

    .aspect {
        color: #6eb2e7;
        padding-left: 10px;
        font-size: 12px;
    }
</style>
"""


def ireplace(text, old, new):
    pattern = re.compile(old, re.IGNORECASE)
    return pattern.sub(new, text)


html = f"{css}"

for i, review in enumerate(df.to_dict("records")):
    text = escape(review["text"])

    try:
        for ann in analysis_results[i]:
            color = "#2bbf6d" if ann["sentiment"] == "positive" else "#cf2a43"

            text = ireplace(
                text,
                ann["segment"],
                f"<span class='annotation' style='border-color: {color}'>{escape(ann['segment'])} <span class='aspect'>{ann['aspect']}<span class='category'>{'#'+ann['category']}</span></span>",
            )

        html += f"""

            <div class='container'>
                <p class='feedback'>{text}</p>
            </div>
        """

    except Exception as e:
        print(f"Failed to parse {review['text']} {e}")
        continue

display(HTML(html))


#### Accuracy

In [None]:
import pandas as pd
from ast import literal_eval

In [None]:
#loading the data
data = pd.read_csv("./data/output/laptop/laptop_out_final_exp2.csv")
data.head(5)

In [None]:
data['term_true'] = data['term_true'].apply(literal_eval)
data['cat_true'] = data['cat_true'].apply(literal_eval)
data['pol_true'] = data['pol_true'].apply(literal_eval)

data['term'] = data['term'].apply(literal_eval)
data['cat'] = data['cat'].apply(literal_eval)
data['pol'] = data['pol'].apply(literal_eval)

In [None]:
def convert_lower_str(term, cat, pol):
    list_term = []
    list_cat = []
    list_pol = []
    
    
    # Gets the size of a and b.
    sizeA, sizeB, sizeC = len(term), len(cat), len(pol)

    # print(max([sizeA, sizeB, sizeC]))
    big = max(enumerate([sizeA, sizeB, sizeC]),key=lambda x: x[1])[0]

    if big == 0:
        zeros = ['nil' for _ in range(abs(sizeA-sizeB))]
        zeros1 = ['nil' for _ in range(abs(sizeA-sizeC))]
        cat += zeros
        pol += zeros1
    elif big == 1:
        zeros = ['nil' for _ in range(abs(sizeA-sizeB))]
        zeros1 = ['nil' for _ in range(abs(sizeB-sizeC))]
        term += zeros
        pol += zeros1
    elif big == 2:
        zeros = ['nil' for _ in range(abs(sizeA-sizeC))]
        zeros1 = ['nil' for _ in range(abs(sizeB-sizeC))]
        term += zeros
        cat += zeros1
        
    for i in range(0, len(term)):
        list_term.append(str(term[i]).lower())
        list_cat.append(str(cat[i]).lower())
        list_pol.append(str(pol[i]).lower())
    return pd.Series([list_term, list_cat, list_pol])
data[["term_true","cat_true","pol_true"]] = data.apply(lambda x: convert_lower_str(x["term_true"], x["cat_true"], x["pol_true"]), axis=1)
data[["term","cat","pol"]]= data.apply(lambda x: convert_lower_str(x["term"], x["cat"], x["pol"]), axis=1)

In [None]:
def terms_pol(term, cat, pol):
    list_temp = []
    for i in range(0, len(term)):
        list_temp.append([str(term[i]).lower(), str(cat[i]).lower(), str(pol[i]).lower()])
    return list_temp
data["True"]= data.apply(lambda x: terms_pol(x["term_true"], x["cat_true"], x["pol_true"]), axis=1)
data["Pred"]= data.apply(lambda x: terms_pol(x["term"], x["cat"], x["pol"]), axis=1)

In [None]:
# def compare(list1, list2):
#     for val in list1:
#         if val in list2:
#             return 1
#     return 0
def compare_all(list1, list2):
    if len(list1)==len(list2):
        if all([item in list1 for item in list2]):
            return 1
        else: 
            return 0
    else:
        return 0

In [None]:
data["All_Matched"]= data.apply(lambda x: compare_all(x["True"], x["Pred"]), axis=1)
data["T_All_Matched"]= data.apply(lambda x: compare_all(x["term_true"], x["term"]), axis=1)
data["C_All_Matched"]= data.apply(lambda x: compare_all(x["cat_true"], x["cat"]), axis=1)

In [None]:
print("Number of sentences: ", data.shape[0])
print("Acc of aspect term  %: ", (data["T_All_Matched"].sum()/data.shape[0])*100)
print("Acc of aspect category  %: ", (data["C_All_Matched"].sum()/data.shape[0])*100)
print("Overall acc %: ", (data["All_Matched"].sum()/data.shape[0])*100)