In [1]:
import nltk
from summarizer import Summarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob


In [2]:
# Sample review data
review_text = 'They are not reliable, dishonest and not operating with integrity I recommend them to anyone who is looking for some great mechanics. Tony always takes the time to answer any questions I may have, and provides me with all the options available- from repair to replacement. As a woman, it can be difficult to find mechanics you can trust with your vehicle, so Im very happy to have been referred to GT Auto. Tony and John are the best!'


In [3]:
### Step 3: Text Summarization
#  Extractive Summarization using BERT
bert_model = Summarizer()
summary = bert_model(review_text)


Downloading model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  super()._check_params_vs_input(X, default_n_init=10)


In [4]:
## Step 4: Keyword Extraction
# TF-IDF Keyword Extraction using scikit-learn
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform([summary])
feature_names = tfidf_vectorizer.get_feature_names_out()


In [6]:
# Get the top N keywords
top_keywords = [feature_names[i] for i in tfidf_matrix.sum(axis=0).argsort()[0, ::-1][:5]]


In [7]:
### Step 5: Sentiment Analysis
# Sentiment analysis using TextBlob
blob = TextBlob(summary)
sentiment = blob.sentiment.polarity  # Range between -1 and 1


In [8]:
sentiment

0.3

In [9]:
# Interpret sentiment
if sentiment > 0:
    sentiment_label = "Positive"
elif sentiment < 0:
    sentiment_label = "Negative"
else:
    sentiment_label = "Neutral"


In [10]:
### Step 6: Output
print("Original Review:\n", review_text)
print("\nSummarized Review:\n", summary)
print("\nTop Keywords:", top_keywords)
print("\nSentiment:", sentiment_label)


Original Review:
 They are not reliable, dishonest and not operating with integrity I recommend them to anyone who is looking for some great mechanics. Tony always takes the time to answer any questions I may have, and provides me with all the options available- from repair to replacement. As a woman, it can be difficult to find mechanics you can trust with your vehicle, so Im very happy to have been referred to GT Auto. Tony and John are the best!

Summarized Review:
 They are not reliable, dishonest and not operating with integrity I recommend them to anyone who is looking for some great mechanics. Tony always takes the time to answer any questions I may have, and provides me with all the options available- from repair to replacement.

Top Keywords: [array([['tony', 'time', 'available', 'dishonest', 'great', 'integrity',
        'looking', 'mechanics', 'operating', 'options', 'provides',
        'questions', 'recommend', 'reliable', 'repair', 'replacement',
        'takes', 'answer']

In [11]:
#Certainly! If you want to categorize sentiments into specific aspects or categories like "Ease of use" or "Value for money," you can use a more targeted approach.
#One common way to achieve this is by performing aspect-based sentiment analysis.
#This involves identifying specific aspects or topics within the text and then determining the sentiment associated with each aspect.

