In [51]:
import pandas as pd
import re, os, json
from dotenv import load_dotenv
import google.generativeai as genai
import textwrap
from IPython.display import Markdown
from IPython.display import display

In [2]:
amazon_alexa = pd.read_csv('amazon_alexa.tsv', delimiter="\t")
df =amazon_alexa[['verified_reviews', 'feedback']]
df.columns= ['reviews', 'feedback']

In [3]:
df.head(10)

Unnamed: 0,reviews,feedback
0,Love my Echo!,1
1,Loved it!,1
2,"Sometimes while playing a game, you can answer...",1
3,I have had a lot of fun with this thing. My 4 ...,1
4,Music,1
5,I received the echo as a gift. I needed anothe...,1
6,"Without having a cellphone, I cannot use many ...",1
7,I think this is the 5th one I've purchased. I'...,1
8,looks great,1
9,Love it! I’ve listened to songs I haven’t hear...,1


In [4]:
rows_to_drop = df['feedback'].value_counts().max() - df['feedback'].value_counts().min()

In [5]:
if rows_to_drop > 0:
    df_1 = df[df['feedback'] == 1]
    df_balanced = df.drop(df_1.sample(rows_to_drop).index)
 

In [6]:
df_balanced.feedback.value_counts()


feedback
1    257
0    257
Name: count, dtype: int64

In [7]:
""" \w matches with words a-zA-Z, 0-1 and _(underscore)
\s gives only space removing all the word
inthis case \w gives words after removing space
"""
word ="1. I am Ajeet and this is Me Ajeet!" 

In [8]:
def input_format(word):
    word = re.sub(r"[^\w\s]", "", word)
    word = re.sub(r"\b[a-zA-Z]\b", " ", word)        
    word = word.lower()
    word = re.sub(r"\s+"," " ,word)
    word = re.sub(r"<[^>]*>", " ", word)
    word = word.strip()
    return word

In [9]:
word = re.sub(r"[^\w\s]", "", word)
word

'1 I am Ajeet and this is Me Ajeet'

In [10]:
word = re.sub(r"\b[a-zA-Z]\b", " ", word)
word

'1   am Ajeet and this is Me Ajeet'

In [11]:
word = word.lower()
word

'1   am ajeet and this is me ajeet'

In [12]:
word = re.sub(r"\s+"," " ,word)
word

'1 am ajeet and this is me ajeet'

In [13]:
word = re.sub(r"<[^>]*>", " ", word)

In [14]:
word = word.strip()
word

'1 am ajeet and this is me ajeet'

In [15]:
df_balanced['reviews']

24      I got a second unit for the bedroom, I was exp...
31                                              I like it
46      It's like Siri, in fact, Siri answers more acc...
82      Great technology!! Can't believe the speaker v...
111     Sound is terrible if u want good music too get...
                              ...                        
3105    Still learning everything she can do.  Good fo...
3110    Love it! I personally prefer Spotify music, so...
3115    It is just not as loud as I thought it was goi...
3116              I enjoy it. Still discovering new uses.
3144                                              love it
Name: reviews, Length: 514, dtype: object

In [16]:
df_balanced['reviews'] = df_balanced['reviews'].astype(str)

In [17]:
reviews = df_balanced['reviews'].to_list()

In [18]:
[input_format(review) for review in reviews]


['got second unit for the bedroom was expecting the sounds to be improved but didnt really see difference at all overall not big improvement over the 1st generation',
 'like it',
 'its like siri in fact siri answers more accurately then alexa dont see real need for it in my household though it was good bargain on prime day deals',
 'great technology cant believe the speaker volume and quality is so wonderful wish had bought another one',
 'sound is terrible if want good music too get bose',
 'love my echo smart speaker love the volume and clarity it was easy to set up its wonderful gift for any age',
 'very cool product speaker sounds good with my spotify blasting through it',
 'not much features',
 'doesnt know half the things asked cant recognize my sprinkler wifi controller even though product details says it is compatible cant program or change my wifi thermostat settings and its matched up on same wifi router just handy music player thats if you have amazon music',
 'during prime 

In [19]:
df_balanced['reviews'] = [input_format(review) for review in reviews]
df_balanced

Unnamed: 0,reviews,feedback
24,got second unit for the bedroom was expecting ...,1
31,like it,1
46,its like siri in fact siri answers more accura...,0
82,great technology cant believe the speaker volu...,1
111,sound is terrible if want good music too get bose,0
...,...,...
3105,still learning everything she can do good for ...,1
3110,love it personally prefer spotify music so its...,1
3115,it is just not as loud as thought it was going...,1
3116,enjoy it still discovering new uses,1


In [20]:
len(df_balanced)
test_size = int(len(df_balanced) * 0.90)
train_df = df_balanced.sample(test_size)
test_df = df_balanced.drop(train_df.index)

In [21]:
train_df

Unnamed: 0,reviews,feedback
558,stopped working after 7 months the warranty wa...,0
369,not working,0
1643,love it,1
1906,it does not speak in spanishi bought it for my...,0
2488,had some trouble with my echo dot when first g...,1
...,...,...
256,have the original alexa and the tap and so far...,1
434,34never buy certified and refurbished echo dot...,0
1182,fun to work with still learning everything she...,1
1511,upgraded from an original echo on prime day an...,1


In [22]:
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [23]:
# def to_markdown(text):


In [24]:
def to_markdown(text):
    text = text.replace('', '')
    return Markdown(textwrap.indent(text, '>' , predicate=lambda _: True))

In [25]:
model = genai.GenerativeModel("gemini-pro")
response = model.generate_content('What is life?')
to_markdown(response.text)

>The nature of life is one of the most fundamental and enduring questions in philosophy, science, and religion. Life is generally defined as the condition of an organism that demonstrates biological processes, such as metabolism, growth, reproduction, and response to stimuli. However, there is no scientific consensus on a single, universal definition of life. The Scientific American article "What is Life?" by Michael Hanlon, argues that the closest we have to a universal definition is "living things are open dissipative systems that maintain homeostasis." In other words, living things are able to take in energy from their environment and use it to maintain their own internal organization.
>
>Some common characteristics of living organisms include:
>
>* **Organization:** Living organisms are highly organized structures, with many levels of complexity, from the molecular to the macroscopic.
>* **Metabolism:** Living organisms use energy to maintain their internal organization and to carry out life processes.
>* **Growth:** Living organisms grow and develop over time, increasing in size and complexity.
>* **Reproduction:** Living organisms reproduce, creating new individuals that share their genetic material.
>* **Response to stimuli:** Living organisms respond to changes in their environment, such as changes in temperature, light, or food availability.
>
>The study of life is called biology, and it is a vast and complex field. Biologists study all aspects of living organisms, from their molecular structure to their behavior and ecology. The goal of biology is to understand the fundamental principles of life and to explain the diversity and complexity of the living world.

In [36]:
df_balanced

Unnamed: 0,reviews,feedback,pred
24,got second unit for the bedroom was expecting ...,1,
31,like it,1,
46,its like siri in fact siri answers more accura...,0,
82,great technology cant believe the speaker volu...,1,
111,sound is terrible if want good music too get bose,0,
...,...,...,...
3105,still learning everything she can do good for ...,1,
3110,love it personally prefer spotify music so its...,1,
3115,it is just not as loud as thought it was going...,1,
3116,enjoy it still discovering new uses,1,


In [43]:
df_balanced['pred'] = ''
df_json = df_balanced[['reviews', 'pred', 'feedback']].sample(20)
prompt = f""" You are an expert linguist, who is good at classifying review Sentiment into Positive/Negative labels. 
Help me classify customer reviews into Positive(label = 1) or Negative(label = 0) Customer reviews are provided between three back ticks.
In your output, only return the json code back as output- which is provided between three back ticks.
Your task is to update predicted labels under 'pred_labels' in the json code.
Don't make any changes to the Json code format, please
```
{df_json}
```
"""
response = model.generate_content(prompt)

```
{
 "reviews": [
  "dont like that it goes into sleep mode after 2...",
  "bought 2 and both quit connecting to wifi and ...",
  "it is very slow compared to the echo",
  "very excited to see what it can do",
  "this was my 4th echo device 1st echo show it s...",
  "the only thing dont like is it shuts off by it...",
  "when you think about it this really doesnt do ...",
  "not working",
  "bought an echo dot that had been refurbished b...",
  "need to be able to connect to more 3rd party v...",
  "",
  "the only negative we have on this product is t...",
  "am not tec smart enough to make it work at all...",
  "wanted to use this as bedside clock with the a...",
  "easy to set up still trying to get it to conne...",
  "",
  "so far the certified refurbished echo dot work...",
  "love it",
  "have the original alexa and the tap and so far...",
  "good"
 ],
 "pred_labels": [
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  1
 ]
}
```


In [59]:
response_data = pd.DataFrame(json.loads(response.text.strip("`")))
response_data

Unnamed: 0,reviews,pred_labels
0,dont like that it goes into sleep mode after 2...,0
1,bought 2 and both quit connecting to wifi and ...,0
2,it is very slow compared to the echo,0
3,very excited to see what it can do,1
4,this was my 4th echo device 1st echo show it s...,0
5,the only thing dont like is it shuts off by it...,1
6,when you think about it this really doesnt do ...,0
7,not working,0
8,bought an echo dot that had been refurbished b...,0
9,need to be able to connect to more 3rd party v...,0


In [48]:
df_json
# count = ''
# for words in df_json['reviews']:
#     count = count + ' '  + words
# print(count)   

Unnamed: 0,reviews,pred,feedback
2349,dont like that it goes into sleep mode after 2...,,0
562,bought 2 and both quit connecting to wifi and ...,,0
1070,it is very slow compared to the echo,,0
1441,very excited to see what it can do,,1
1744,this was my 4th echo device 1st echo show it s...,,0
449,the only thing dont like is it shuts off by it...,,1
2594,when you think about it this really doesnt do ...,,0
369,not working,,0
396,bought an echo dot that had been refurbished b...,,0
1716,need to be able to connect to more 3rd party v...,,0


In [35]:
len(count)

89559

In [61]:
from sklearn.metrics import confusion_matrix
y = df_json['feedback']
y_hat = response_data['pred_labels']
confusion_matrix(y,y_hat)

array([[13,  0],
       [ 0,  7]])