In [None]:
%%capture
!pip install --upgrade git+https://github.com/UKPLab/sentence-transformers
!pip install keybert ctransformers[cuda]
!pip install --upgrade git+https://github.com/huggingface/transformers

## Loading the model

In [None]:
from ctransformers import AutoModelForCausalLM

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
model = AutoModelForCausalLM.from_pretrained(
    "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
    model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    model_type="mistral",
    gpu_layers=50,
    hf=True
)

In [None]:
from transformers import AutoTokenizer, pipeline

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

# Pipeline
generator = pipeline(
    model=model, tokenizer=tokenizer,
    task='text-generation',
    max_new_tokens=50,
    repetition_penalty=1.1
)

## Load Dataset

In [None]:
import pandas as pd

df = pd.read_csv("Starbucks_reviews.csv")
df.head()

In [None]:
df.state.value_counts()

In [None]:
florida_reviews = df[df.state == "FL"]['review'].tolist()

In [None]:
newhamphire_reviews = df[df.state == "NH"]['review'].tolist()

In [None]:
newjersey_reviews = df[df.state == "NJ"]['review'].tolist()

In [None]:
arkansas_reviews = df[df.state == "AR"]['review'].tolist()

In [None]:
prompt = f"""
I have the following document:

* {florida_reviews[0]}

Please give me the keywords that are present in this document and separate them with commas.
Make sure you to only return the keywords and say nothing else. For example, don't say:
"Here are the keywords present in the document"
"""
response = generator(prompt)
print(response[0]["generated_text"])

# Keyword Extraction with KeyLLM

In [None]:
example_prompt = """
<s>[INST]
I have the following document:
- The website mentions that it only takes a couple of days to deliver but I still have not received mine.

Please give me the keywords that are present in this document and separate them with commas.
Make sure you to only return the keywords and say nothing else. For example, don't say:
"Here are the keywords present in the document"
[/INST] meat, beef, eat, eating, emissions, steak, food, health, processed, chicken</s>"""


keyword_prompt = """
[INST]

I have the following document:
- [DOCUMENT]

Please give me the keywords that are present in this document and separate them with commas.
Make sure you to only return the keywords and say nothing else. For example, don't say:
"Here are the keywords present in the document"
[/INST]
"""

prompt = example_prompt + keyword_prompt

In [None]:
from keybert.llm import TextGeneration
from keybert import KeyLLM

# Load it in KeyLLM
llm = TextGeneration(generator, prompt=prompt)
kw_model = KeyLLM(llm)

keywords = kw_model.extract_keywords(arkansas_reviews); keywords

In [None]:
!pip install wordcloud matplotlib numpy

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np

# Flatten the list of lists into a single list
flattened_data = [word for sublist in keywords for word in sublist]

# Convert the list into a space-separated string
text = " ".join(flattened_data)

# Generate the word cloud
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text)

# Display the word cloud using matplotlib
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()