In [1]:
import pandas as pd
path = "DATA2_DOWNSAMPLED.csv"
final_df = pd.read_csv(path)
final_df

Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount
0,1.021213e+20,Shana Smith,5.0,Will have to come again soon..due for another cut,0x8805428f9ca29317:0x4122efd1046de654,Supercuts,Will have to come again soon..due for another cut,9
1,1.016310e+20,Brittiny Beil,5.0,I started my run with treatment at meadow Cree...,0x52b247197357f7f7:0x66d0d92cf109a1d5,Meadow Creek,I started my run with treatment at meadow Cree...,91
2,1.056987e+20,Michael Reagan,5.0,I like the professional but also friendly staf...,0x8858272f5a118b01:0xaa680edfef593625,AFC Urgent Care - Bon Secours - Simpsonville,I like the professional but also friendly staf...,46
3,1.054290e+20,Maureen Linker,5.0,I can't give enough praise for Dr K and her wo...,0x54950e4f37f2f0e1:0x2dd2b51972a685db,Prestige Family Dentistry,I can't give enough praise for Dr K and her wo...,94
4,1.002408e+20,Adriana Bohorquez,5.0,Love the amenities. My son asks me everyday to...,0x89b7fecef6f95e49:0xac324c8a4514eab,The Y in Pasadena,Love the amenities. My son asks me everyday to...,13
...,...,...,...,...,...,...,...,...
1224692,1.015856e+20,Th Cann,1.0,“I am happy to learn that it is your professio...,0x87f75f6d6de1af63:0xc5abbc8693804d76,"Mayo Clinic Hospital, Saint Marys Campus",“I am happy to learn that it is your professio...,239
1224693,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa36904616c1:0x88440359bfcd5334,LensCrafters,“I love the office and detail that is given ab...,17
1224694,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa342cb2c9db:0x80d82d44eb4fccf4,Eye Associates of Columbia at the Village of S...,“I love the office and detail that is given ab...,17
1224695,1.144222e+20,F H,1.0,‼️‼️Beware they keep changing the name of the ...,0x89b708e76e18472f:0xd726e5892f7518f0,UM Charles Regional Medical Center,‼️‼️Beware they keep changing the name of the ...,107


In [None]:

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
from tqdm import tqdm
import torch

# ;oad and clean your data
final_df['translated_text'] = final_df['translated_text'].fillna('').astype(str).str.strip()
final_df = final_df[final_df['translated_text'].str.len() > 0].copy()

# model info
model_name = 'siebert/sentiment-roberta-large-english'
device = 0 if torch.cuda.is_available() else -1

# load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# define custom pipeline with truncation and padding
class SafeSentimentPipeline(TextClassificationPipeline):
    def __call__(self, *args, **kwargs):
        kwargs['truncation'] = True
        kwargs['max_length'] = 512
        kwargs['padding'] = True
        return super().__call__(*args, **kwargs)

# create the pipeline
sentiment_pipe = SafeSentimentPipeline(
    model=model,
    tokenizer=tokenizer,
    device=device
)
# batching function
def batch_sentiment(texts, batch_size=16):
    results = []
    for i in tqdm(range(0, len(texts), batch_size), desc="Sentiment Analysis"):
        batch = texts[i:i+batch_size]
        try:
            batch_results = sentiment_pipe(batch)
        except Exception as e:
            print(f"\nBatch failed at index {i}: {e}")
            print("Example failing texts:", batch[:2])
            batch_results = [{'label': 'error', 'score': 0.0}] * len(batch)
        results.extend(batch_results)
    return results
# run the analysis
texts = final_df['translated_text'].tolist()
results = batch_sentiment(texts, batch_size=16)
# store results in df
final_df['sentiment_label'] = [r['label'] for r in results]
final_df['sentiment_score'] = [r['score'] for r in results]

  from .autonotebook import tqdm as notebook_tqdm
Device set to use cuda:0
Sentiment Analysis:   0%|          | 10/76544 [00:08<16:52:02,  1.26it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Sentiment Analysis: 100%|██████████| 76544/76544 [19:37:03<00:00,  1.08it/s]   


In [11]:
final_df.to_csv("DATA5_SIEBERT.csv", index=False)

In [2]:
import pandas as pd
path = "DATA5_SIEBERT.csv"
df = pd.read_csv(path)
df

Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount,sentiment_label,sentiment_score
0,1.021213e+20,Shana Smith,5.0,Will have to come again soon..due for another cut,0x8805428f9ca29317:0x4122efd1046de654,Supercuts,Will have to come again soon..due for another cut,9,NEGATIVE,0.999488
1,1.016310e+20,Brittiny Beil,5.0,I started my run with treatment at meadow Cree...,0x52b247197357f7f7:0x66d0d92cf109a1d5,Meadow Creek,I started my run with treatment at meadow Cree...,91,POSITIVE,0.998918
2,1.056987e+20,Michael Reagan,5.0,I like the professional but also friendly staf...,0x8858272f5a118b01:0xaa680edfef593625,AFC Urgent Care - Bon Secours - Simpsonville,I like the professional but also friendly staf...,46,POSITIVE,0.998923
3,1.054290e+20,Maureen Linker,5.0,I can't give enough praise for Dr K and her wo...,0x54950e4f37f2f0e1:0x2dd2b51972a685db,Prestige Family Dentistry,I can't give enough praise for Dr K and her wo...,94,POSITIVE,0.998932
4,1.002408e+20,Adriana Bohorquez,5.0,Love the amenities. My son asks me everyday to...,0x89b7fecef6f95e49:0xac324c8a4514eab,The Y in Pasadena,Love the amenities. My son asks me everyday to...,13,POSITIVE,0.998877
...,...,...,...,...,...,...,...,...,...,...
1224692,1.015856e+20,Th Cann,1.0,“I am happy to learn that it is your professio...,0x87f75f6d6de1af63:0xc5abbc8693804d76,"Mayo Clinic Hospital, Saint Marys Campus",“I am happy to learn that it is your professio...,239,NEGATIVE,0.999482
1224693,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa36904616c1:0x88440359bfcd5334,LensCrafters,“I love the office and detail that is given ab...,17,POSITIVE,0.998932
1224694,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa342cb2c9db:0x80d82d44eb4fccf4,Eye Associates of Columbia at the Village of S...,“I love the office and detail that is given ab...,17,POSITIVE,0.998932
1224695,1.144222e+20,F H,1.0,‼️‼️Beware they keep changing the name of the ...,0x89b708e76e18472f:0xd726e5892f7518f0,UM Charles Regional Medical Center,‼️‼️Beware they keep changing the name of the ...,107,NEGATIVE,0.999492


In [13]:
label_count = df['sentiment_label'].value_counts()
label_count

sentiment_label
POSITIVE    732872
NEGATIVE    491825
Name: count, dtype: int64

In [2]:
rating_count = df['rating'].value_counts()
rating_count

rating
5.0    500000
1.0    351845
4.0    209165
3.0     84593
2.0     77393
Name: count, dtype: int64

In [3]:
# map star ratings to sentiment
def map_rating_to_sentiment(rating):
    if rating == 'POSITIVE':
        return 1
    elif rating == 'NEGATIVE':
        return 0
    else:
        return 100

df['sent_label'] = df['sentiment_label'].apply(map_rating_to_sentiment)
df

Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount,sentiment_label,sentiment_score,sent_label
0,1.021213e+20,Shana Smith,5.0,Will have to come again soon..due for another cut,0x8805428f9ca29317:0x4122efd1046de654,Supercuts,Will have to come again soon..due for another cut,9,NEGATIVE,0.999488,0
1,1.016310e+20,Brittiny Beil,5.0,I started my run with treatment at meadow Cree...,0x52b247197357f7f7:0x66d0d92cf109a1d5,Meadow Creek,I started my run with treatment at meadow Cree...,91,POSITIVE,0.998918,1
2,1.056987e+20,Michael Reagan,5.0,I like the professional but also friendly staf...,0x8858272f5a118b01:0xaa680edfef593625,AFC Urgent Care - Bon Secours - Simpsonville,I like the professional but also friendly staf...,46,POSITIVE,0.998923,1
3,1.054290e+20,Maureen Linker,5.0,I can't give enough praise for Dr K and her wo...,0x54950e4f37f2f0e1:0x2dd2b51972a685db,Prestige Family Dentistry,I can't give enough praise for Dr K and her wo...,94,POSITIVE,0.998932,1
4,1.002408e+20,Adriana Bohorquez,5.0,Love the amenities. My son asks me everyday to...,0x89b7fecef6f95e49:0xac324c8a4514eab,The Y in Pasadena,Love the amenities. My son asks me everyday to...,13,POSITIVE,0.998877,1
...,...,...,...,...,...,...,...,...,...,...,...
1224692,1.015856e+20,Th Cann,1.0,“I am happy to learn that it is your professio...,0x87f75f6d6de1af63:0xc5abbc8693804d76,"Mayo Clinic Hospital, Saint Marys Campus",“I am happy to learn that it is your professio...,239,NEGATIVE,0.999482,0
1224693,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa36904616c1:0x88440359bfcd5334,LensCrafters,“I love the office and detail that is given ab...,17,POSITIVE,0.998932,1
1224694,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa342cb2c9db:0x80d82d44eb4fccf4,Eye Associates of Columbia at the Village of S...,“I love the office and detail that is given ab...,17,POSITIVE,0.998932,1
1224695,1.144222e+20,F H,1.0,‼️‼️Beware they keep changing the name of the ...,0x89b708e76e18472f:0xd726e5892f7518f0,UM Charles Regional Medical Center,‼️‼️Beware they keep changing the name of the ...,107,NEGATIVE,0.999492,0


In [5]:
label_count = df['sent_label'].value_counts()
label_count

sent_label
1    732872
0    491825
Name: count, dtype: int64

In [4]:
# map star ratings to sentiment
def map_rating_to_sentiment(rating):
    if rating in [1,2]:
        return 0
    elif rating == 3:
        return 100
    else:
        return 1

df['true_rating'] = df['rating'].apply(map_rating_to_sentiment)
df

Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount,sentiment_label,sentiment_score,sent_label,true_rating
0,1.021213e+20,Shana Smith,5.0,Will have to come again soon..due for another cut,0x8805428f9ca29317:0x4122efd1046de654,Supercuts,Will have to come again soon..due for another cut,9,NEGATIVE,0.999488,0,1
1,1.016310e+20,Brittiny Beil,5.0,I started my run with treatment at meadow Cree...,0x52b247197357f7f7:0x66d0d92cf109a1d5,Meadow Creek,I started my run with treatment at meadow Cree...,91,POSITIVE,0.998918,1,1
2,1.056987e+20,Michael Reagan,5.0,I like the professional but also friendly staf...,0x8858272f5a118b01:0xaa680edfef593625,AFC Urgent Care - Bon Secours - Simpsonville,I like the professional but also friendly staf...,46,POSITIVE,0.998923,1,1
3,1.054290e+20,Maureen Linker,5.0,I can't give enough praise for Dr K and her wo...,0x54950e4f37f2f0e1:0x2dd2b51972a685db,Prestige Family Dentistry,I can't give enough praise for Dr K and her wo...,94,POSITIVE,0.998932,1,1
4,1.002408e+20,Adriana Bohorquez,5.0,Love the amenities. My son asks me everyday to...,0x89b7fecef6f95e49:0xac324c8a4514eab,The Y in Pasadena,Love the amenities. My son asks me everyday to...,13,POSITIVE,0.998877,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
1224692,1.015856e+20,Th Cann,1.0,“I am happy to learn that it is your professio...,0x87f75f6d6de1af63:0xc5abbc8693804d76,"Mayo Clinic Hospital, Saint Marys Campus",“I am happy to learn that it is your professio...,239,NEGATIVE,0.999482,0,0
1224693,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa36904616c1:0x88440359bfcd5334,LensCrafters,“I love the office and detail that is given ab...,17,POSITIVE,0.998932,1,1
1224694,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa342cb2c9db:0x80d82d44eb4fccf4,Eye Associates of Columbia at the Village of S...,“I love the office and detail that is given ab...,17,POSITIVE,0.998932,1,1
1224695,1.144222e+20,F H,1.0,‼️‼️Beware they keep changing the name of the ...,0x89b708e76e18472f:0xd726e5892f7518f0,UM Charles Regional Medical Center,‼️‼️Beware they keep changing the name of the ...,107,NEGATIVE,0.999492,0,0


In [7]:
rating_count = df['true_rating'].value_counts()
rating_count

true_rating
1      710866
0      429238
100     84593
Name: count, dtype: int64

## **CHECK NEGATIVE REVIEW**

In [7]:
neg_df = df[(df['sent_label'] == 0) & ((df['rating'] == 4)|(df['rating'] == 5))]
neg_df

Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount,sentiment_label,sentiment_score,sent_label,true_rating
0,1.021213e+20,Shana Smith,5.0,Will have to come again soon..due for another cut,0x8805428f9ca29317:0x4122efd1046de654,Supercuts,Will have to come again soon..due for another cut,9,NEGATIVE,0.999488,0,1
22,1.025813e+20,skyler Perry,5.0,"Took me 20 minutes to get checked in, total of...",0x89e4d730992cc5bb:0xc423646626252ca8,ConvenientMD Urgent Care,"Took me 20 minutes to get checked in, total of...",24,NEGATIVE,0.999503,0,1
84,1.015563e+20,jennifer durant,5.0,(Translated by Google) No gym timing\n\n(Origi...,0x8855641cfabbb733:0xfefe9e6aecd5a05b,Planet Fitness,No gym timing,3,NEGATIVE,0.999333,0,1
123,1.162752e+20,Matt Carvell,5.0,Just a typical trader Joe's but like all of th...,0x89e363f6e54d9d3b:0x5877203d39716aca,Trader Joe's,Just a typical trader Joe's but like all of th...,15,NEGATIVE,0.987956,0,1
233,1.059714e+20,Andrew Flasch,5.0,We had been taking our pets (3 dogs) to Olin o...,0x52b2d560851c8de5:0x85deaf7c7a2c77a4,Maryland Avenue Pet Hospital,We had been taking our pets (3 dogs) to Olin o...,395,NEGATIVE,0.997014,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
1224505,1.136655e+20,Gustavo Ricardo Navarro Arcos,4.0,(Translated by Google) You get what you need\n...,0x88fb8fc00afae649:0xe81082ac5a9df5c5,Dollar General,You get what you need,5,NEGATIVE,0.990497,0,1
1224514,1.106037e+20,Hannah Burkhardt,4.0,You have to pay for parking,0x8807acdd647b3455:0x39d8011dc1836734,Trader Joe's,You have to pay for parking,6,NEGATIVE,0.999353,0,1
1224596,1.129794e+20,marky bucknam,4.0,it is okay,0x89e372051e6dda91:0x1d8d52cf707ad575,Dollar General,it is okay,3,NEGATIVE,0.991889,0,1
1224602,1.118241e+20,Kendra Jones,4.0,it was ok,0x8805191d54c3d39d:0x83f5b670292d4546,Ovation Communities,it was ok,3,NEGATIVE,0.997940,0,1


In [1]:
import pandas as pd
path = "DATA5_SIEBERT.csv"
df = pd.read_csv(path)

In [7]:
#count label
label_count = df['sentiment_label'].value_counts()
label_count

sentiment_label
POSITIVE    732872
NEGATIVE    491825
Name: count, dtype: int64

In [3]:
df

Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount,sentiment_label,sentiment_score
0,1.021213e+20,Shana Smith,5.0,Will have to come again soon..due for another cut,0x8805428f9ca29317:0x4122efd1046de654,Supercuts,Will have to come again soon..due for another cut,9,NEGATIVE,0.999488
1,1.016310e+20,Brittiny Beil,5.0,I started my run with treatment at meadow Cree...,0x52b247197357f7f7:0x66d0d92cf109a1d5,Meadow Creek,I started my run with treatment at meadow Cree...,91,POSITIVE,0.998918
2,1.056987e+20,Michael Reagan,5.0,I like the professional but also friendly staf...,0x8858272f5a118b01:0xaa680edfef593625,AFC Urgent Care - Bon Secours - Simpsonville,I like the professional but also friendly staf...,46,POSITIVE,0.998923
3,1.054290e+20,Maureen Linker,5.0,I can't give enough praise for Dr K and her wo...,0x54950e4f37f2f0e1:0x2dd2b51972a685db,Prestige Family Dentistry,I can't give enough praise for Dr K and her wo...,94,POSITIVE,0.998932
4,1.002408e+20,Adriana Bohorquez,5.0,Love the amenities. My son asks me everyday to...,0x89b7fecef6f95e49:0xac324c8a4514eab,The Y in Pasadena,Love the amenities. My son asks me everyday to...,13,POSITIVE,0.998877
...,...,...,...,...,...,...,...,...,...,...
1224692,1.015856e+20,Th Cann,1.0,“I am happy to learn that it is your professio...,0x87f75f6d6de1af63:0xc5abbc8693804d76,"Mayo Clinic Hospital, Saint Marys Campus",“I am happy to learn that it is your professio...,239,NEGATIVE,0.999482
1224693,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa36904616c1:0x88440359bfcd5334,LensCrafters,“I love the office and detail that is given ab...,17,POSITIVE,0.998932
1224694,1.130505e+20,Wendy Hughes,4.0,“I love the office and detail that is given ab...,0x88f8aa342cb2c9db:0x80d82d44eb4fccf4,Eye Associates of Columbia at the Village of S...,“I love the office and detail that is given ab...,17,POSITIVE,0.998932
1224695,1.144222e+20,F H,1.0,‼️‼️Beware they keep changing the name of the ...,0x89b708e76e18472f:0xd726e5892f7518f0,UM Charles Regional Medical Center,‼️‼️Beware they keep changing the name of the ...,107,NEGATIVE,0.999492


In [2]:
# List of target indices 
target_indices = [
    566, 627, 825, 1528, 2767, 2955, 3096, 3442, 5133, 5189, 5666, 5866, 5932, 6224,
    6248, 6422, 7319, 8312, 8365, 8630, 8758, 8838, 8927, 8995, 9019, 9209, 10150,
    11602, 13690, 14650, 15354, 15387, 15413, 15435, 15627, 15816, 16148, 16361,
    16597, 16741, 16799, 16812, 16826, 17289, 17857, 17880, 18053, 18333, 18530,
    18695, 19600, 19865, 20641, 20710, 20791, 21081, 21274, 23997, 26077, 27128,
    28022, 28313, 28442, 28813, 29888, 30164, 30172, 30589, 30990, 31118, 31383,
    31717, 32237, 32489, 32895, 33240, 34259, 34929, 35168, 35580, 36085, 36454,
    36819, 37221, 37379, 39492, 39574, 40370, 40464, 40833, 41122, 41811, 43830,
    44034, 44120, 44320, 44587, 44920, 45206, 45910, 46986, 47121, 47661, 48479,
    49113, 49567, 49716, 49767, 51039, 51473, 51520, 51854, 51861, 51932, 51983,
    52081, 52097, 53141, 53420, 53852, 54660, 55091, 55144, 55343, 55823, 56309,
    56577, 57091, 57704, 58069, 58134, 58459, 58505, 58508, 58759, 60142, 60583,
    61234, 61310, 61344, 61475, 61614, 61820, 62075, 62090, 62150, 62774, 63840,
    63985, 64239, 64369, 66059, 66245, 66346, 66546, 67480, 68087, 68170, 68486,
    68667, 70829, 70961, 71318, 71447, 71651, 71746, 72231, 72938, 73400, 73782,
    73849, 73990, 74045, 74886, 74912, 74948, 75021, 75198, 76090, 76164, 76584,
    76757, 77143, 77497, 77705
]

# Retrieve and display rows with those indices
filtered_df = df.loc[target_indices]
filtered_df


Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount,sentiment_label,sentiment_score
566,1.170197e+20,Ahmad Moore,5.0,I had a really bad toothache and my dental off...,0x88582b104b1ede17:0xf75ebdc6a6ff6c2e,Greer Family Dental Care,I had a really bad toothache and my dental off...,34,POSITIVE,0.998841
627,1.110046e+20,Freddie Garnier,5.0,"Before I found this place, I had been through ...",0x89e37a0fb3130c95:0xf129d95943544283,Dental Partners of Boston at Prudential Center,"Before I found this place, I had been through ...",88,POSITIVE,0.998834
825,1.180661e+20,charles ford,5.0,Be service ever. I will never get my glasses m...,0x87d4293afd8ecbb3:0xad6b5fb65a8e0273,LensMasters,Be service ever. I will never get my glasses m...,15,NEGATIVE,0.997988
1528,1.067278e+20,Victoria Kelly,5.0,In 1982 I was admitted to the psychiatrist uni...,0x52b333c3be1df95b:0x80ffb6434e6c8fd4,North Memorial Health Hospital,In 1982 I was admitted to the psychiatrist uni...,74,POSITIVE,0.997785
2767,1.115348e+20,Heidi Petersen,5.0,They live up to the name. I’m terrified of th...,0x878c82f5ef089f7f:0x1d8b2f69ffef401a,Friendly Dental,They live up to the name. I’m terrified of th...,15,POSITIVE,0.998600
...,...,...,...,...,...,...,...,...,...,...
76584,1.098285e+20,Akemi G (Akemi G),5.0,"Several years ago, I had a treatment (root can...",0x54950dbe39dc0f3f:0x236b6242e4e5bfdb,Hillside Dental Care,"Several years ago, I had a treatment (root can...",88,POSITIVE,0.998566
76757,1.046495e+20,Bob Calhoun,5.0,I cut the tip of my finger off and went to ano...,0x88fb88f129da2557:0xe0ec92e6662646,St. Joseph's/Candler Urgent Care - Bluffton,I cut the tip of my finger off and went to ano...,82,POSITIVE,0.998783
77143,1.131961e+20,Jeffrey Comeau,5.0,"Not good burgers, GREAT burgers. They should c...",0x89e306307225fbb1:0xc636b013348936e0,B.GOOD,"Not good burgers, GREAT burgers. They should c...",13,NEGATIVE,0.999471
77497,1.182099e+20,Guillen Royce Ojastro,5.0,I received my product very early and cant want...,0x88f8b084bbae02a9:0x3ec4c8882be0d480,Muscle & Strength,I received my product very early and cant want...,12,NEGATIVE,0.999436


In [20]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#accuracy
acc = accuracy_score(df['true_rating'], df['sent_label'])
print("Accuracy:",acc)

# classification report
print("\nClassification Report:")
print(classification_report(df['true_rating'], df['sent_label']))

Accuracy: 0.9025269107379214

Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.85      0.97      0.91    429238
           1       0.94      0.97      0.95    710866
         100       0.00      0.00      0.00     84593

    accuracy                           0.90   1224697
   macro avg       0.60      0.65      0.62   1224697
weighted avg       0.84      0.90      0.87   1224697



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [21]:
neut_NEG = df[(df['rating'] == 3) & (df['sentiment_label'] == 'NEGATIVE')].copy()
neut_NEG

Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount,sentiment_label,sentiment_score,true_rating,sent_label
500006,1.023989e+20,Stephanie Garza,3.0,well i could’ve died (it really felt like i wa...,0x87c96ee6220bff3d:0x3ba603ed95a0b0ca,Pat Walker Health Center,well i could’ve died (it really felt like i wa...,551,NEGATIVE,0.996449,100,0
500012,1.089317e+20,Lisa H,3.0,Dr. Siems had Little to No respect for me. He ...,0x87d2be8f6ff74a71:0xd0893f742a753053,"Marty L. Siems, M.D.",Dr. Siems had Little to No respect for me. He ...,55,NEGATIVE,0.999504,100,0
500020,1.127286e+20,Akiah,3.0,Super slow every time we come,0x87c9a2aa4292efa7:0x9b366c222d303d80,Walmart Pharmacy,Super slow every time we come,6,NEGATIVE,0.999489,100,0
500054,1.056717e+20,QuirkFrame Industries,3.0,I actually enjoyed the psychiatrist that teste...,0x87d42bb04d53509b:0x9d4eb1558c5a6bb2,Families Inc Counseling Services,I actually enjoyed the psychiatrist that teste...,100,NEGATIVE,0.999472,100,0
500066,1.023254e+20,AR_Girl 82 aka LeAnn,3.0,**This is not a review regarding treatment rec...,0x87d281f2f96e2f63:0xa5708b3283d7f3c5,Landberg Karl MD,**This is not a review regarding treatment rec...,52,NEGATIVE,0.999436,100,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1224629,1.069285e+20,maryann sainsbury,3.0,prices are high,0x80ca5ae4d5bce85d:0x754e125650df44e6,Lin's Market,prices are high,3,NEGATIVE,0.996259,100,0
1224657,1.002488e+20,Lisa Carpenter,3.0,typical dollar general store,0x885825ea58daf7e3:0xc1760a75f4d055d9,Dollar General,typical dollar general store,4,NEGATIVE,0.996810,100,0
1224685,1.035581e+20,Steve Raposo,3.0,you get what you pay for,0x89e4f2e19091749d:0x47ff829ae5e692ce,WOW! Work Out World Taunton,you get what you pay for,6,NEGATIVE,0.998456,100,0
1224686,1.109753e+20,faycal Ben,3.0,you get what you pay for,0x52b24d4fba6222b7:0x5d497374aeac67e5,Dollar General,you get what you pay for,6,NEGATIVE,0.998456,100,0


In [22]:
neut_POS = df[(df['rating'] == 3) & (df['sentiment_label'] == 'POSITIVE')].copy()
neut_POS

Unnamed: 0,user_id,username,rating,original_text,gmap_id,business_name,translated_text,wcount,sentiment_label,sentiment_score,true_rating,sent_label
500043,1.184322e+20,Stanley Crabb,3.0,"Mediocore... not great, but not bad either. M...",0x87d42bb04d53509b:0x9d4eb1558c5a6bb2,Families Inc Counseling Services,"Mediocore... not great, but not bad either. M...",45,POSITIVE,0.995965,100,1
500114,1.059021e+20,Giblet,3.0,"Personally, I felt that this place benefited m...",0x87d29f2424d19143:0x502aacbc2382a467,Methodist Behavioral Hospital,"Personally, I felt that this place benefited m...",687,POSITIVE,0.997072,100,1
500117,1.056790e+20,Sherree Hollingsworth,3.0,Methodist does the best they can with children...,0x87d29f2424d19143:0x502aacbc2382a467,Methodist Behavioral Hospital,Methodist does the best they can with children...,51,POSITIVE,0.997806,100,1
500142,1.092588e+20,Michael Dupslaff,3.0,Best Physician I've seen in probably 15 years.,0x87d2a47e24da7d65:0x128d8ae6795f40dc,Carter Family Medicine Clinic,Best Physician I've seen in probably 15 years.,8,POSITIVE,0.998548,100,1
500176,1.101283e+20,Leodis Randle (Psycho),3.0,My body feels good,0x87d2a0d5b1579883:0xff87bcce5352836f,Dara Thai Massage & Spa,My body feels good,4,POSITIVE,0.998621,100,1
...,...,...,...,...,...,...,...,...,...,...,...,...
1224628,1.101999e+20,S T,3.0,"prices are alright , store is clean",0x8857b227e02f7177:0x9c7c63aaaa55f7ef,Dollar General,"prices are alright , store is clean",7,POSITIVE,0.998430,100,1
1224631,1.131454e+20,leonard bayer,3.0,quick and easy,0x89c80774230d9089:0xcde7180f543a536a,Dollar General,quick and easy,3,POSITIVE,0.998734,100,1
1224673,1.014481e+20,Korben Murphy,3.0,we should have the option to box a bag for car...,0x87530f8cf3047c37:0x9418fc346aa949ff,EōS Fitness,we should have the option to box a bag for car...,28,POSITIVE,0.916588,100,1
1224674,1.014481e+20,Korben Murphy,3.0,we should have the option to box a bag for car...,0x875303bc54ee0f6d:0x3d7a2cee50435bb4,EōS Fitness,we should have the option to box a bag for car...,28,POSITIVE,0.916588,100,1
