# Policy E: Sentiment and Rating Mismatch

### Import Libraries

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import pandas as pd

### Load HuggingFace Model

In [2]:
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Sentiment Scoring Function
#### - Compute sentiment score using pre-trained HuggingFace model. Returns normalized text score in [0,1].

In [3]:
def roberta_sentiment_score(text: str):
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    probs = F.softmax(logits, dim=-1).numpy()[0]

    p_neg, p_neu, p_pos = probs
    s_text = p_pos - p_neg

    if p_neu > 0.75:
        s_prime = 0.5
    else:
        s_prime = (s_text + 1) / 2

    return s_prime

### Rating Normalization Function
#### - Returns normalized rating in [0,1].

In [4]:
def normalize_rating(rating: int):
    return (rating - 1) / 4

### Consistency Score Function
#### - Returns score whether review and rating is good match or not in [0,1].
#### - Close to 1 = good match, around 0.5 = unclear/mixed, close to 0 = contradictory

In [5]:
def consistency_score(text_score: float, rating_score: float):
    return 1 - abs(text_score - rating_score)

### Test

In [14]:
df = pd.read_csv("/Users/hyoon/Desktop/final_data_sampled.csv")
df_check = df.sample(10).copy()
df_check

Unnamed: 0.1,Unnamed: 0,rating,text,business_name,business_category,business_description,_id
6903,570864,4,They make the ice cream in front of you. You s...,Sweet Creams,['Ice cream shop'],"Compact shop offering handcrafted, Thai-inspir...",1.1677497168727284e+20_1530371352331
6201,47265,3,Kinda small but still very pretty,Sadie Seymour Botanical Gardens and the Kona E...,"['Botanical garden', 'Thrift store', 'Tourist ...",Small botanical garden with native Hawaiian pl...,1.0976952027301549e+20_1495551385327
6024,139791,3,The meal was passable but that is all. I coul...,Restaurant Yoshiya,"['Japanese restaurant', 'Restaurant']",Traditional Japanese dishes served in Kyoto-in...,1.0407223338159412e+20_1577909970192
5345,609746,1,The Concierge said there is no restaurant at t...,Pa'akai,"['Hawaiian restaurant', 'Bar', 'Restaurant']","High-end, oceanfront restaurant featuring loca...",1.027021154156352e+20_1483487780207
557,1491057,5,Outstanding pieces of History preserved for al...,Battleship Missouri Memorial,"['Historical place', 'Historical landmark', 'T...","Historic battleship, which was the site of the...",1.028785022390011e+20_1527728307302
3437,918980,5,"Owner is awesome, food is amazing. I'll drive ...",Kiani's Original Guava Chicken,"['Restaurant', 'Barbecue restaurant']","Compact, humble, cash-only counter serve makin...",1.153001593045995e+20_1557790048837
8888,1059425,4,"Great prices, awesome new location brand new b...",Denny's,"['American restaurant', 'Breakfast restaurant'...",Casual diner chain dishing up classic American...,1.0881285283532012e+20_1541430076929
1758,494667,5,"(Translated by Google) Super shoe store, very ...",Famous Footwear,['Shoe store'],Chain store offering a wide variety of brand-n...,1.1469620546497782e+20_1549179516310
5460,910764,5,One of the best fish and chips i have ever had...,Pahoa Fresh Fish,['Seafood restaurant'],Modest eatery turning out fried seafood offeri...,1.1401888693566566e+20_1523448660256
7762,572044,5,Great ocean and sunset view. Service was frien...,ULU Ocean Grill,"['Hawaiian restaurant', 'Asian restaurant', 'B...",An inventive Hawaiian menu is showcased at thi...,1.098393813343975e+20_1573581819646


In [15]:
t_score_list = []
r_score_list = []
c_score_list = []
for text, rating in zip(df_check["text"], df_check["rating"]):
    t_score = roberta_sentiment_score(text)
    r_score = normalize_rating(rating)
    c_score = consistency_score(t_score, r_score)
    
    t_score_list.append(t_score)
    r_score_list.append(r_score)
    c_score_list.append(c_score)

In [17]:
df_check["consistency_score"] = c_score_list
df_check

Unnamed: 0.1,Unnamed: 0,rating,text,business_name,business_category,business_description,_id,consistency_score
6903,570864,4,They make the ice cream in front of you. You s...,Sweet Creams,['Ice cream shop'],"Compact shop offering handcrafted, Thai-inspir...",1.1677497168727284e+20_1530371352331,0.76771
6201,47265,3,Kinda small but still very pretty,Sadie Seymour Botanical Gardens and the Kona E...,"['Botanical garden', 'Thrift store', 'Tourist ...",Small botanical garden with native Hawaiian pl...,1.0976952027301549e+20_1495551385327,0.558313
6024,139791,3,The meal was passable but that is all. I coul...,Restaurant Yoshiya,"['Japanese restaurant', 'Restaurant']",Traditional Japanese dishes served in Kyoto-in...,1.0407223338159412e+20_1577909970192,0.534383
5345,609746,1,The Concierge said there is no restaurant at t...,Pa'akai,"['Hawaiian restaurant', 'Bar', 'Restaurant']","High-end, oceanfront restaurant featuring loca...",1.027021154156352e+20_1483487780207,0.647083
557,1491057,5,Outstanding pieces of History preserved for al...,Battleship Missouri Memorial,"['Historical place', 'Historical landmark', 'T...","Historic battleship, which was the site of the...",1.028785022390011e+20_1527728307302,0.986916
3437,918980,5,"Owner is awesome, food is amazing. I'll drive ...",Kiani's Original Guava Chicken,"['Restaurant', 'Barbecue restaurant']","Compact, humble, cash-only counter serve makin...",1.153001593045995e+20_1557790048837,0.987711
8888,1059425,4,"Great prices, awesome new location brand new b...",Denny's,"['American restaurant', 'Breakfast restaurant'...",Casual diner chain dishing up classic American...,1.0881285283532012e+20_1541430076929,0.758707
1758,494667,5,"(Translated by Google) Super shoe store, very ...",Famous Footwear,['Shoe store'],Chain store offering a wide variety of brand-n...,1.1469620546497782e+20_1549179516310,0.953529
5460,910764,5,One of the best fish and chips i have ever had...,Pahoa Fresh Fish,['Seafood restaurant'],Modest eatery turning out fried seafood offeri...,1.1401888693566566e+20_1523448660256,0.988948
7762,572044,5,Great ocean and sunset view. Service was frien...,ULU Ocean Grill,"['Hawaiian restaurant', 'Asian restaurant', 'B...",An inventive Hawaiian menu is showcased at thi...,1.098393813343975e+20_1573581819646,0.991317


In [23]:
df_check["text"].iloc[6]

'Great prices, awesome new location brand new building.'