### Check the polarity of inference results based on stars of each review
##### data: 100 random-selected reviews from 2 restaurants

In [1]:
import pandas as pd
import json

In [6]:
# open the inference results

with open('../data/yelp/INFER_yelp-food-test_2rest100.conllu.json') as f:
    infer = json.load(f)

infer[:5]

[{'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:0',
  'text': 'I agree with the previous review.',
  'opinions': [{'Source': [['I'], ['0:1']],
    'Target': [['review.'], ['26:33']],
    'Polar_expression': [['agree'], ['2:7']],
    'Polarity': 'Positive',
    'Intensity': 'Standard'}]},
 {'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:1',
  'text': 'You can definitely find better restaurants in New Orleans.',
  'opinions': [{'Source': [[], []],
    'Target': [['restaurants'], ['31:42']],
    'Polar_expression': [['better'], ['24:30']],
    'Polarity': 'Positive',
    'Intensity': 'Standard'}]},
 {'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:2',
  'text': 'Buyer beware: the food does not live up to its hype.',
  'opinions': [{'Source': [[], []],
    'Target': [['the food'], ['14:22']],
    'Polar_expression': [['not live up to its hype.'], ['28:52']],
    'Polarity': 'Negative',
    'Intensity': 'Standard'}]},
 {'sent_id': 'gWLiqQWybWwIZS4gT9Spbw:0',
  'text': 'Acme has a welcoming atmosphere to a city guy.',
  'opini

In [7]:
# open the raw reviews

with open('../data/yelp/yelp-food-test_2rest100.json') as f:
    raw = json.load(f)

raw[:5]

[{'review_id': 'LxMrMDZvwNL7btnuqKYm8g',
  'user_id': '2ssBdVC1kKKe4wZNDzF5Vw',
  'business_id': '_ab50qdWOk0DdB6XOrBitw',
  'stars': 1.0,
  'useful': 0,
  'funny': 0,
  'cool': 0,
  'text': 'I agree with the previous review.',
  'date': '2016-08-22 19:13:22',
  'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:0',
  'opinions': []},
 {'review_id': 'LxMrMDZvwNL7btnuqKYm8g',
  'user_id': '2ssBdVC1kKKe4wZNDzF5Vw',
  'business_id': '_ab50qdWOk0DdB6XOrBitw',
  'stars': 1.0,
  'useful': 0,
  'funny': 0,
  'cool': 0,
  'text': 'You can definitely find better restaurants in New Orleans.',
  'date': '2016-08-22 19:13:22',
  'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:1',
  'opinions': []},
 {'review_id': 'LxMrMDZvwNL7btnuqKYm8g',
  'user_id': '2ssBdVC1kKKe4wZNDzF5Vw',
  'business_id': '_ab50qdWOk0DdB6XOrBitw',
  'stars': 1.0,
  'useful': 0,
  'funny': 0,
  'cool': 0,
  'text': 'Buyer beware: the food does not live up to its hype.',
  'date': '2016-08-22 19:13:22',
  'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:2',
  'opinions'

In [19]:
# add some information (stars, business_id,...) to inferrence from raw reviews

for i in range(len(infer)):
    raw_one = [j for j in raw if j['sent_id'] == infer[i]['sent_id']][0]
    # print(infer[i]['sent_id'], raw_one['sent_id'])
    infer[i]['business_id'] = raw_one['business_id']
    infer[i]['review_id'] = raw_one['review_id']
    infer[i]['stars'] = raw_one['stars']
    infer[i]['polar_ans'] = False

infer[:5]

[{'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:0',
  'text': 'I agree with the previous review.',
  'opinions': [{'Source': [['I'], ['0:1']],
    'Target': [['review.'], ['26:33']],
    'Polar_expression': [['agree'], ['2:7']],
    'Polarity': 'Positive',
    'Intensity': 'Standard'}],
  'business_id': '_ab50qdWOk0DdB6XOrBitw',
  'review_id': 'LxMrMDZvwNL7btnuqKYm8g',
  'stars': 1.0,
  'polar_ans': False},
 {'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:1',
  'text': 'You can definitely find better restaurants in New Orleans.',
  'opinions': [{'Source': [[], []],
    'Target': [['restaurants'], ['31:42']],
    'Polar_expression': [['better'], ['24:30']],
    'Polarity': 'Positive',
    'Intensity': 'Standard'}],
  'business_id': '_ab50qdWOk0DdB6XOrBitw',
  'review_id': 'LxMrMDZvwNL7btnuqKYm8g',
  'stars': 1.0,
  'polar_ans': False},
 {'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:2',
  'text': 'Buyer beware: the food does not live up to its hype.',
  'opinions': [{'Source': [[], []],
    'Target': [['the food'], ['14

In [21]:
# output json 
with open('../data/yelp/ANS-polar-stars_2rest100.json', 'w') as outfile:
    json.dump(infer, outfile, indent = 4, ensure_ascii=False)

In [24]:
# count the number of non-empty opinions
c = 0
for i in infer:
    if len(i['opinions']) > 0:
        c += 1

print(f'total: {len(infer)}')
print(f'non-empty: {c}')

total: 710
non-empty: 433


##### EDA of annotated 2rest100 (after annotation on the above output json)

In [39]:
with open('../data/yelp/ANS-polar-stars_2rest100.json') as f:
    infer_gold = json.load(f)

infer_gold[:5]

[{'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:0',
  'text': 'I agree with the previous review.',
  'opinions': [{'Source': [['I'], ['0:1']],
    'Target': [['review.'], ['26:33']],
    'Polar_expression': [['agree'], ['2:7']],
    'Polarity': 'Positive',
    'Intensity': 'Standard'}],
  'business_id': '_ab50qdWOk0DdB6XOrBitw',
  'review_id': 'LxMrMDZvwNL7btnuqKYm8g',
  'stars': 1.0,
  'polar_ans': True},
 {'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:1',
  'text': 'You can definitely find better restaurants in New Orleans.',
  'opinions': [{'Source': [[], []],
    'Target': [['restaurants'], ['31:42']],
    'Polar_expression': [['better'], ['24:30']],
    'Polarity': 'Positive',
    'Intensity': 'Standard'}],
  'business_id': '_ab50qdWOk0DdB6XOrBitw',
  'review_id': 'LxMrMDZvwNL7btnuqKYm8g',
  'stars': 1.0,
  'polar_ans': True},
 {'sent_id': 'LxMrMDZvwNL7btnuqKYm8g:2',
  'text': 'Buyer beware: the food does not live up to its hype.',
  'opinions': [{'Source': [[], []],
    'Target': [['the food'], ['14:2

In [40]:
# restaurant1: '_ab50qdWOk0DdB6XOrBitw'
# restaurant2: 'ac1AeYqs8Z4_e2X5M3if2A' 

empty = 0
rest1 = []
rest2 = []

for i in infer_gold:
    if len(i['opinions']) == 0:
        empty += 1
    else:
        if i['business_id'] == '_ab50qdWOk0DdB6XOrBitw':
            rest1.append(i)
        else:
            rest2.append(i)

print(f'non-empty: {len(infer_gold)-empty}, empty: {empty}')
print(f'r1: {len(rest1)}')
print(f'r2: {len(rest2)}')

non-empty: 433, empty: 277
r1: 214
r2: 219


In [58]:
# stars -> polar_ans -> True or Negative

print('---------------rest1----------------')

for k in range(2):

    s1, s2, s3, s4, s5 = 0, 0, 0, 0, 0 # Stars
    t1, t2, t3, t4, t5 = 0, 0, 0, 0, 0 # True
    tp1, tp2, tp3, tp4, tp5 = 0, 0, 0, 0, 0 # True Positive
    tn1, tn2, tn3, tn4, tn5 = 0, 0, 0, 0, 0 # True Negative

    if k == 0:
        lst = rest1
    else:
        lst = rest2

    for i in lst:
        if i['stars'] == 1.0:
            s1 += 1
            if i['polar_ans']:
                t1 += 1
                if i['opinions'][0]['Polarity'] == 'Positive':
                    tp1 += 1
                else:
                    tn1 += 1
        elif i['stars'] == 2.0:
            s2 += 1
            if i['polar_ans']:
                t2 += 1
                if i['opinions'][0]['Polarity'] == 'Positive':
                    tp2 += 1
                else:
                    tn2 += 1
        elif i['stars'] == 3.0:
            s3 += 1
            if i['polar_ans']:
                t3 += 1
                if i['opinions'][0]['Polarity'] == 'Positive':
                    tp3 += 1
                else:
                    tn3 += 1
        elif i['stars'] == 4.0:
            s4 += 1
            if i['polar_ans']:
                t4 += 1
                if i['opinions'][0]['Polarity'] == 'Positive':
                    tp4 += 1
                else:
                    tn4 += 1
        else:
            s5 += 1
            if i['polar_ans']:
                t5 += 1
                if i['opinions'][0]['Polarity'] == 'Positive':
                    tp5 += 1
                else:
                    tn5 += 1

    print(f'1s: {s1:3d}(T:{t1:2d}, F:{s1-t1:2d}) | (TP:{tp1:2d}, TN:{tn1:2d})')
    print(f'2s: {s2:3d}(T:{t2:2d}, F:{s2-t2:2d}) | (TP:{tp2:2d}, TN:{tn2:2d})')
    print(f'3s: {s3:3d}(T:{t3:2d}, F:{s3-t3:2d}) | (TP:{tp3:2d}, TN:{tn3:2d})')
    print(f'4s: {s4:3d}(T:{t4:2d}, F:{s4-t4:2d}) | (TP:{tp4:2d}, TN:{tn4:2d})')
    print(f'5s: {s5:3d}(T:{t5:2d}, F:{s5-t5:2d}) | (TP:{tp5:2d}, TN:{tn5:2d})')
    if k == 1:
        break
    print('---------------rest2----------------')


---------------rest1----------------
1s:   9(T: 9, F: 0) | (TP: 3, TN: 6)
2s:  24(T:15, F: 9) | (TP: 4, TN:11)
3s:  33(T:25, F: 8) | (TP:17, TN: 8)
4s:  62(T:49, F:13) | (TP:41, TN: 8)
5s:  86(T:73, F:13) | (TP:68, TN: 5)
---------------rest2----------------
1s:   8(T: 4, F: 4) | (TP: 2, TN: 2)
2s:  12(T: 7, F: 5) | (TP: 2, TN: 5)
3s:  33(T:25, F: 8) | (TP:19, TN: 6)
4s:  62(T:52, F:10) | (TP:47, TN: 5)
5s: 104(T:94, F:10) | (TP:89, TN: 5)
