# Mount the Drive

In [105]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Read the Data

In [106]:
%cd /content/drive/MyDrive/AI/Amazon_Review_Sentimental_Analysis/amazon_alexa.tsv
!ls

[Errno 20] Not a directory: '/content/drive/MyDrive/AI/Amazon_Review_Sentimental_Analysis/amazon_alexa.tsv'
/content
drive  sample_data


In [107]:
import pandas as pd
import re

data = pd.read_csv('/content/drive/MyDrive/AI/Amazon_Review_Sentimental_Analysis/amazon_alexa.tsv', sep='\t')
data.head(10)

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1
5,5,31-Jul-18,Heather Gray Fabric,I received the echo as a gift. I needed anothe...,1
6,3,31-Jul-18,Sandstone Fabric,"Without having a cellphone, I cannot use many ...",1
7,5,31-Jul-18,Charcoal Fabric,I think this is the 5th one I've purchased. I'...,1
8,5,30-Jul-18,Heather Gray Fabric,looks great,1
9,5,30-Jul-18,Heather Gray Fabric,Love it! I’ve listened to songs I haven’t hear...,1


In [108]:
mydata = data[['verified_reviews', 'feedback']].copy()
mydata.columns = ['review', 'label']
mydata.head()

Unnamed: 0,review,label
0,Love my Echo!,1
1,Loved it!,1
2,"Sometimes while playing a game, you can answer...",1
3,I have had a lot of fun with this thing. My 4 ...,1
4,Music,1


In [109]:
mydata.value_counts('label')

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,2893
0,257


# Balancing the Data

In [110]:
label_counts = mydata['label'].value_counts()

rows_to_drop = label_counts.max() - label_counts.min()

if rows_to_drop > 0:
    data_majority = mydata[mydata['label'] == 1]
    data_balanced = mydata.drop(data_majority.sample(rows_to_drop).index)
else:
  data_balanced = mydata.copy()

print(data_balanced['label'].value_counts())

label
1    257
0    257
Name: count, dtype: int64


# Text Preprocessing

## 1.Remove URL

In [111]:
str(mydata['review'][0])

'Love my Echo!'

In [112]:
pattern = re.compile(r'http[s]?:\/\/\S+')
pattern.sub('',str(mydata['review'][0]))

'Love my Echo!'

In [113]:
def remove_url(text):
  # Convert the input to a string to handle non-string data
  text = str(text)
  pattern = re.compile(r'http[s]?:\/\/\S+')
  return pattern.sub('', text)

In [114]:
mydata['review'] = mydata['review'].apply(lambda x: remove_url(x))

## 2.Remove HTML Tag

In [115]:
def remove_tag(text):
  pattern = re.compile(r'<.*?>')
  return pattern.sub('',text)

In [116]:
mydata['review'] = mydata['review'].apply(lambda x: remove_tag(x))

In [117]:
mydata['review'][0]

'Love my Echo!'

## 3. Handling Emoticons

In [118]:
# Emojis
emojis = {':)': 'smile', ':-)': 'smile', ';d': 'wink', ':-E': 'vampire', ':(': 'sad',
          ':-(': 'sad', ':-<': 'sad', ':P': 'raspberry', ':O': 'surprised',
          ':-@': 'shocked', ':@': 'shocked',':-$': 'confused', ':\\': 'annoyed',
          ':#': 'mute', ':X': 'mute', ':^)': 'smile', ':-&': 'confused', '$_$': 'greedy',
          '@@': 'eyeroll', ':-!': 'confused', ':-D': 'smile', ':-0': 'yell', 'O.o': 'confused',
          '<(-_-)>': 'robot', 'd[-_-]b': 'dj', ":'-)": 'sadsmile', ';)': 'wink',
          ';-)': 'wink', 'O:-)': 'angel','O*-)': 'angel','(:-D': 'gossip', '=^.^=': 'cat',';D':'laughing'}

In [119]:
def remove_emoticons(text):
  for emoji in emojis:
    text = text.replace(emoji, "Emoji" + emojis[emoji])
  return text

In [120]:
mydata['review'] = mydata['review'].apply(lambda x: remove_emoticons(x))

## 4.Remove Emoji

In [121]:
! pip install emoji



In [122]:
import emoji

def remove_emoji(text):
  return emoji.demojize(text)

In [123]:
mydata['review'] = mydata['review'].apply(lambda x: remove_emoji(x))

## 5.Remove Punction

In [124]:
import string
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [125]:
punc = string.punctuation
def remove_punc(text):
  return text.translate(str.maketrans('','',punc))

In [126]:
mydata['review'] = mydata['review'].apply(lambda x: remove_punc(x))

In [127]:
mydata['review'][0]

'Love my Echo'

## 6.Remove chat words and slang words

In [128]:
slang = '/content/drive/MyDrive/AI/Amazon_Review_Sentimental_Analysis/Copy of slang.txt'

In [129]:
slang

'/content/drive/MyDrive/AI/Amazon_Review_Sentimental_Analysis/Copy of slang.txt'

In [130]:
with open(slang,'r') as f:
  lines = f.readlines()

In [131]:
lines[0]

'AFAIK=As Far As I Know\n'

In [132]:
slang_dict = {}
for i in range(len(lines)):
  slang_dict[lines[i].split('=')[0]] = lines[i].split('=')[1][:-1]

In [133]:
slang_dict

{'AFAIK': 'As Far As I Know',
 'AFK': 'Away From Keyboard',
 'ASAP': 'As Soon As Possible',
 'ATK': 'At The Keyboard',
 'ATM': 'At The Moment',
 'A3': 'Anytime, Anywhere, Anyplace',
 'BAK': 'Back At Keyboard',
 'BBL': 'Be Back Later',
 'BBS': 'Be Back Soon',
 'BFN': 'Bye For Now',
 'B4N': 'Bye For Now',
 'BRB': 'Be Right Back',
 'BRT': 'Be Right There',
 'BTW': 'By The Way',
 'B4': 'Before',
 'CU': 'See You',
 'CUL8R': 'See You Later',
 'CYA': 'See You',
 'FAQ': 'Frequently Asked Questions',
 'FC': 'Fingers Crossed',
 'FWIW': "For What It's Worth",
 'FYI': 'For Your Information',
 'GAL': 'Get A Life',
 'GG': 'Good Game',
 'GN': 'Good Night',
 'GMTA': 'Great Minds Think Alike',
 'GR8': 'Great!',
 'G9': 'Genius',
 'IC': 'I See',
 'ICQ': 'I Seek you (also a chat program)',
 'ILU': 'ILU: I Love You',
 'IMHO': 'In My Honest/Humble Opinion',
 'IMO': 'In My Opinion',
 'IOW': 'In Other Words',
 'IRL': 'In Real Life',
 'KISS': 'Keep It Simple, Stupid',
 'LDR': 'Long Distance Relationship',
 'LM

In [134]:
def remove_chatwords(text):
  new_text = []
  for w in text.split():
    if w.upper() in slang_dict:
      new_text.append(slang_dict[w.upper()])
    else:
      new_text.append(w)
  return " ".join(new_text)

In [135]:
mydata['review'] = mydata['review'].apply(lambda x: remove_chatwords(x))

## 7.Convert into Lower Case

In [136]:
mydata['review'] = mydata['review'].str.lower()

## 8.Remove Extra White Spaces

In [137]:
def white_space(text):
  pattern = re.compile(r'\s+')
  return pattern.sub('',text)

In [138]:
mydata['review'] = mydata['review'].apply(lambda x: white_space(x))

## 9.Trim Leadind and Trailing Spaces

In [139]:
def trim(text):
  return text.strip()

In [140]:
mydata['review'] = mydata['review'].apply(lambda x: trim(x))

# Extract the review column as a list

In [141]:
# Extract the review column as a list
reviews = data_balanced['review'].tolist()

# Apply the preprocessing steps to the reviews
cleaned_reviews = [mydata,[(review) for review in reviews]] #Remove this line


#Corrected line to apply preprocessing
cleaned_reviews = [trim(remove_url(remove_tag(remove_emoticons(remove_emoji(remove_punc(remove_chatwords(str(review).lower()))))))) for review in reviews]


# Assign the cleaned reviews back to the 'review' column in the DataFrame
data_balanced['review'] = cleaned_reviews

In [142]:
data_balanced

Unnamed: 0,review,label
10,i sent it to my 85 year old dad and he talks t...,1
18,we love the size of the 2nd generation echo st...,1
24,i got a second unit for the bedroom i was expe...,1
27,sounds great love them,1
37,speaker is better than 1st generation echo,1
...,...,...
3104,works beautifully excellent sound,1
3106,neat tool we enjoy it with the family,1
3107,easy to connect and the skills created for our...,1
3111,love it it works great alexa still has some pr...,1


# Data Split

In [143]:
#Assuming your Dataframe is called "df"
total_rows = len(data_balanced)
test_size = int(total_rows * 0.95)

# Randomly sample train_size rows for the training set
test_set = data_balanced.sample(test_size)

# Get the remaining rows for the test set
train_set = data_balanced.drop(test_set.index)

# Sentiment w/LLM

In [144]:
! pip install -q -U google-generativeai

In [145]:
# Necessary Packages
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown

def to_markdown(text):
  text = text.replace('*', ' *')
  return Markdown(textwrap.indent(text,'> ', predictive=lambda _:Ture))

# Used to securely store your API key
from google.colab import userdata

In [153]:
# Use 'os.getenv(GOOGLE_API_KEY)' to fetch an environment variable
import google.generativeai as genai
from google.colab import userdata

# Retrieve your API key
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

In [154]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-exp-1206
models/gemini-exp-1121
models/gemini-exp-1114
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experimental


In [158]:
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content("Write a story about a magic backpack.")
print(response.text)

Elara wasn't your typical twelve-year-old.  While other kids obsessed over pop stars and social media, Elara dreamt of unexplored jungles and forgotten ruins.  Her obsession began with a dusty, leather backpack she’d found tucked away in her grandmother's attic.  It wasn't particularly special to look at – worn, stitched with faded thread – but it hummed with a faint, almost imperceptible energy.

The first time she used it, she packed a simple lunch.  When she reached school, however, the backpack contained not only her sandwich but also a perfectly ripe mango, a small jar of honey, and a handwritten note in a language she didn’t recognize.  Elara dismissed it as coincidence.  But the next day, she packed a single pencil, and found a full set of artist's charcoal, a sketchbook bound in worn leather, and a small, intricately carved wooden bird.

The backpack's magic was subtle, responsive to her needs, but always with a touch of the unexpected.  If she packed a worn copy of *Moby Dick*

In [159]:
test_set_sample = test_set.sample(20)

test_set_sample['pred_label'] = ''

test_set_sample

Unnamed: 0,review,label,pred_label
205,its got great sound and bass but it doesnt wor...,0,
1443,i love the idea is the star trek computer at h...,1,
1150,,0,
2716,the only negative we have on this product is t...,0,
3024,i was really happy with my original echo so i ...,0,
1240,i havent figured out how to make or receive ca...,0,
1355,come on its amaonmazing its way more than a s...,1,
661,this echo dot is horrible the volume on my pho...,0,
948,easy to set up,1,
558,stopped working after 7 months the warranty wa...,0,


In [175]:
# Convert the DataFrame to JSON using the to_json() method

json_data = test_set_sample[['review','pred_label']].to_json(orient='records')

# Print the JSON data
print(json_data)

[{"review":"its got great sound and bass but it doesnt work all of the time its still hot or miss when it recognizes things","pred_label":""},{"review":"i love the idea is the star trek computer at home the skills are great but the natural language work is mediocre","pred_label":""},{"review":"","pred_label":""},{"review":"the only negative we have on this product is the terrible sound quality a massive difference from the alexa which to us was a big reason we wanted to purchase thiswon\u2019t be buying another until the speaker and sound quality can improve","pred_label":""},{"review":"i was really happy with my original echo so i thought id get an echo dot to use in my bedroom i was really disappointed in the audio quality so i connected an external speaker via bluetooth the audio was much better but i started having problems with it loosing connection with the wifi presumably due to interference from the bluetooth then i connected a speaker via the auxiliary jack when i did that the

In [178]:
prompt = f"""
You are an expert linguist, who is good at classifying customer review sentiments into Positive/Negative labels.
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks.
Your task is to update predicted labels under 'pred_label' in the Json code.
Don't make any changes to Json code format, please.

```
{json_data}
```
"""
print(prompt)




You are an expert linguist, who is good at classifying customer review sentiments into Positive/Negative labels.
Help me classify customer reviews into: Positive(label=1), and Negative(label=0).
Customer reviews are provided between three back ticks.
In your output, only return the Json code back as output - which is provided between three backticks.
Your task is to update predicted labels under 'pred_label' in the Json code.
Don't make any changes to Json code format, please.

```
[{"review":"its got great sound and bass but it doesnt work all of the time its still hot or miss when it recognizes things","pred_label":""},{"review":"i love the idea is the star trek computer at home the skills are great but the natural language work is mediocre","pred_label":""},{"review":"","pred_label":""},{"review":"the only negative we have on this product is the terrible sound quality a massive difference from the alexa which to us was a big reason we wanted to purchase thiswon\u2019t be buying ano

In [179]:
response = model.generate_content(prompt)

print(response.text)

```json
[{"review": "its got great sound and bass but it doesnt work all of the time its still hot or miss when it recognizes things", "pred_label": 0}, {"review": "i love the idea is the star trek computer at home the skills are great but the natural language work is mediocre", "pred_label": 1}, {"review": "", "pred_label": 0}, {"review": "the only negative we have on this product is the terrible sound quality a massive difference from the alexa which to us was a big reason we wanted to purchase thiswon\u2019t be buying another until the speaker and sound quality can improve", "pred_label": 0}, {"review": "i was really happy with my original echo so i thought id get an echo dot to use in my bedroom i was really disappointed in the audio quality so i connected an external speaker via bluetooth the audio was much better but i started having problems with it loosing connection with the wifi presumably due to interference from the bluetooth then i connected a speaker via the auxiliary jac

In [180]:
import json

# Raw JSON string with the issue
raw_data = """
json
[{"review": "its got great sound and bass but it doesnt work all of the time its still hot or miss when it recognizes things", "pred_label": 0}, {"review": "i love the idea is the star trek computer at home the skills are great but the natural language work is mediocre", "pred_label": 1}, {"review": "", "pred_label": 0}, {"review": "the only negative we have on this product is the terrible sound quality a massive difference from the alexa which to us was a big reason we wanted to purchase thiswon\u2019t be buying another until the speaker and sound quality can improve", "pred_label": 0}, {"review": "i was really happy with my original echo so i thought id get an echo dot to use in my bedroom i was really disappointed in the audio quality so i connected an external speaker via bluetooth the audio was much better but i started having problems with it loosing connection with the wifi presumably due to interference from the bluetooth then i connected a speaker via the auxiliary jack when i did that the auxiliary jack picked up interference from the wifi and i was woken up in the middle of the night by a horrible buzzing sound im hoping amazon will take this thing back and give me a good deal on an echo spot which i hope will be a better nightstand device", "pred_label": 0}, {"review": "i havent figured out how to make or receive calls device tells me i need to register and i do not know what to do", "pred_label": 0}, {"review": "come on  its amaonmazing its way more than a smart speaker its my wakeup alarm my security blanket and infotainment center allinone its my connection to the world during night hours when im mostly sleepy or zonkedout zzzzzzz i can still access the world without my readingglasses late at night it sounds amazing for its size it can hear me when im far from awake mostly as they say 34this is a musthave device34 yep that is a true statement", "pred_label": 1}, {"review": "this echo dot is horrible the volume on my phone is louder than than this device my boss has one at this is not the case", "pred_label": 0}, {"review": "easy to set up", "pred_label": 1}, {"review": "stopped working after 7 months the warranty was only good for 3 months i would have assumed an amazon product sold on amazon would have had higher standards than guaranteeing that it would work for 3 whole months i mean what is this product supposed to be a piece of junkthey offered my a 15 discount if i wanted to buy another one why would i want to buy another one", "pred_label": 0}, {"review": "ask it to play motown radio on pandora and it keeps asking if i want to add a salsa station motown isnt close to salsa phonetically", "pred_label": 0}, {"review": "love my new addition to the house speaker quality is great and over all we love our alexa my only two  not complaints more suggestions for the future series would be better communication ability example for our smart lights in the house unless we specify kitchen light 1 or kitchen light 2 prior to the command she either won\u2019t respond or says she doesn\u2019t recognize it may be me but i want my alexa to respond when i say thank you afterwards i have to repeat alexa thank you and there are times she won\u2019t respond to me but will my husband i know it sounds odd we laugh about it but maybe it\u2019s a southern thing but i want my alexa to have manners too", "pred_label": 1}, {"review": "great product love the larger remote great deal", "pred_label": 1}, {"review": "", "pred_label": 0}, {"review": "i like the product except that the speakers are not the high quality i expected", "pred_label": 0}, {"review": "got this in a pretty good deal during the prime day event it sounds pretty good and works well so far", "pred_label": 1}, {"review": "awesome cute and smart", "pred_label": 1}, {"review": "when you think about it this really doesnt do much play music answer stupid questions finally it was able to hook up to thermostat i can see why price is dropping like a rock", "pred_label": 0}, {"review": "we gave the echo spot as a gift to my mother in law she loves it she can play music of all genres easy for her to use", "pred_label": 1}, {"review": "love itso happy to have everything available through this", "pred_label": 1}]
"""

# Remove the first problematic line if present
cleaned_data = raw_data.lstrip("json\n")

# Parse the JSON data
try:
    parsed_data = json.loads(cleaned_data)
    print("JSON successfully parsed!")
except json.JSONDecodeError as e:
    print(f"Failed to parse JSON: {e}")


JSON successfully parsed!


In [184]:
print(repr(cleaned_data))  # Show the exact content of the string


'[{"review": "its got great sound and bass but it doesnt work all of the time its still hot or miss when it recognizes things", "pred_label": 0}, {"review": "i love the idea is the star trek computer at home the skills are great but the natural language work is mediocre", "pred_label": 1}, {"review": "", "pred_label": 0}, {"review": "the only negative we have on this product is the terrible sound quality a massive difference from the alexa which to us was a big reason we wanted to purchase thiswon’t be buying another until the speaker and sound quality can improve", "pred_label": 0}, {"review": "i was really happy with my original echo so i thought id get an echo dot to use in my bedroom i was really disappointed in the audio quality so i connected an external speaker via bluetooth the audio was much better but i started having problems with it loosing connection with the wifi presumably due to interference from the bluetooth then i connected a speaker via the auxiliary jack when i did

In [185]:
cleaned_data = raw_data.strip()  # Remove leading/trailing whitespace or newlines
cleaned_data = cleaned_data.lstrip("json")  # Remove the `json` text if it exists


In [186]:
if not cleaned_data.strip():  # Check if the cleaned data is empty
    print("The JSON string is empty after cleaning.")
else:
    try:
        parsed_data = json.loads(cleaned_data)
        print("JSON successfully parsed!")
    except json.JSONDecodeError as e:
        print(f"JSON decoding failed: {e}")


JSON successfully parsed!


In [187]:
df_sample = pd.DataFrame(parsed_data)

In [188]:
df_sample

Unnamed: 0,review,pred_label
0,its got great sound and bass but it doesnt wor...,0
1,i love the idea is the star trek computer at h...,1
2,,0
3,the only negative we have on this product is t...,0
4,i was really happy with my original echo so i ...,0
5,i havent figured out how to make or receive ca...,0
6,come on its amaonmazing its way more than a s...,1
7,this echo dot is horrible the volume on my pho...,0
8,easy to set up,1
9,stopped working after 7 months the warranty wa...,0


In [189]:
# prompt: Overwrite pred_label from 'df' into pred_label in 'train_set_sample'

test_set_sample['pred_label'] = df_sample['pred_label'].values
test_set_sample

Unnamed: 0,review,label,pred_label
205,its got great sound and bass but it doesnt wor...,0,0
1443,i love the idea is the star trek computer at h...,1,1
1150,,0,0
2716,the only negative we have on this product is t...,0,0
3024,i was really happy with my original echo so i ...,0,0
1240,i havent figured out how to make or receive ca...,0,0
1355,come on its amaonmazing its way more than a s...,1,1
661,this echo dot is horrible the volume on my pho...,0,0
948,easy to set up,1,1
558,stopped working after 7 months the warranty wa...,0,0


In [190]:
# Plotting confusion matrix on the predictions

from sklearn.metrics import confusion_matrix

y_true = test_set_sample["label"]
y_pred = test_set_sample["pred_label"]

confusion_matrix(y_true, y_pred)

array([[10,  0],
       [ 1,  9]])

In [192]:
from sklearn.metrics import accuracy_score

In [193]:
print(f"\nAccuracy: {accuracy_score(y_true, y_pred)}")


Accuracy: 0.95
