# Get Data

In [None]:
import pandas as pd

In [None]:
!pip install kaggle

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json
User uploaded file "kaggle.json" with length 66 bytes


In [None]:
!kaggle datasets download "sid321axn/amazon-alexa-reviews"

Downloading amazon-alexa-reviews.zip to /content
  0% 0.00/164k [00:00<?, ?B/s]
100% 164k/164k [00:00<00:00, 80.2MB/s]


In [None]:
!unzip amazon-alexa-reviews.zip

Archive:  amazon-alexa-reviews.zip
  inflating: amazon_alexa.tsv        


In [None]:
df=pd.read_csv('amazon_alexa.tsv',sep='\t')
df

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1
...,...,...,...,...,...
3145,5,30-Jul-18,Black Dot,"Perfect for kids, adults and everyone in betwe...",1
3146,5,30-Jul-18,Black Dot,"Listening to music, searching locations, check...",1
3147,5,30-Jul-18,Black Dot,"I do love these things, i have them running my...",1
3148,5,30-Jul-18,White Dot,Only complaint I have is that the sound qualit...,1


# Topic Modeling

In [None]:
!pip install bertopic

In [None]:
df

Unnamed: 0,rating,date,variation,verified_reviews,feedback
0,5,31-Jul-18,Charcoal Fabric,Love my Echo!,1
1,5,31-Jul-18,Charcoal Fabric,Loved it!,1
2,4,31-Jul-18,Walnut Finish,"Sometimes while playing a game, you can answer...",1
3,5,31-Jul-18,Charcoal Fabric,I have had a lot of fun with this thing. My 4 ...,1
4,5,31-Jul-18,Charcoal Fabric,Music,1
...,...,...,...,...,...
3145,5,30-Jul-18,Black Dot,"Perfect for kids, adults and everyone in betwe...",1
3146,5,30-Jul-18,Black Dot,"Listening to music, searching locations, check...",1
3147,5,30-Jul-18,Black Dot,"I do love these things, i have them running my...",1
3148,5,30-Jul-18,White Dot,Only complaint I have is that the sound qualit...,1


In [None]:
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer

# we add this to remove stopwords, for lower volumes of data stopwords can cause issues
vectorizer_model = CountVectorizer(ngram_range=(1, 2), stop_words="english")

# deal with df if needed
if type(df['verified_reviews']) is list:
    text = df['verified_reviews']
else:
    text = df['verified_reviews'].tolist()

# initialize the model
model = BERTopic(
    vectorizer_model=vectorizer_model,
    language='english', calculate_probabilities=True,
    verbose=True
)
topics, probs = model.fit_transform(text)

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

Batches:   0%|          | 0/99 [00:00<?, ?it/s]

2023-01-18 14:10:30,244 - BERTopic - Transformed documents to Embeddings
2023-01-18 14:10:51,685 - BERTopic - Reduced dimensionality
2023-01-18 14:10:52,707 - BERTopic - Clustered reduced embeddings


In [None]:
for i in range(10):
    print(f"{topics[i]}: {df['verified_reviews'][i]}")

73: Love my Echo!
18: Loved it!
0: Sometimes while playing a game, you can answer a question correctly but Alexa says you got it wrong and answers the same as you.  I like being able to turn lights on and off while away from home.
-1: I have had a lot of fun with this thing. My 4 yr old learns about dinosaurs, i control the lights and play games like categories. Has nice sound when playing music as well.
2: Music
-1: I received the echo as a gift. I needed another Bluetooth or something to play music easily accessible, and found this smart speaker. Can’t wait to see what else it can do.
53: Without having a cellphone, I cannot use many of her features. I have an iPad but do not see that of any use.  It IS a great alarm.  If u r almost deaf, you can hear her alarm in the bedroom from out in the living room, so that is reason enough to keep her.It is fun to ask random questions to hear her response.  She does not seem to be very smartbon politics yet.
6: I think this is the 5th one I've 

In [None]:
freq = model.get_topic_info()
freq.head(10)

Unnamed: 0,Topic,Count,Name
0,-1,633,-1_music_love_great_play
1,0,284,0_alexa_love alexa_love_like
2,1,103,1_dot_echo dot_echo_dots
3,2,102,2_music_music great_listen_listen music
4,3,82,3_smart_hub_home_smart home
5,4,79,4_loves_gift_bought_loved
6,5,67,5_barry_ight barry_ight_barry barry
7,6,62,6_echo_love echo_screen_video
8,7,61,7_cool amazing_cool_amazing_cool terrific
9,8,59,8_dot_echo dot_speaker_echo


In [None]:
model.visualize_topics()

In [None]:
model.visualize_hierarchy()

In [None]:
model.visualize_barchart()