In [None]:
from wordcloud import WordCloud
from matplotlib import pyplot as plt

from utils.file_utils import get_cleaned_tickets_as_df
from utils.text_utils import get_pos_tags, lemmmatize_text, clean_text, clean_stop_words

In [None]:
tickets_df = get_cleaned_tickets_as_df()
tickets_df.head()

In [None]:
tickets_df["message_cleaned"] = tickets_df.message.apply(clean_text)
tickets_df["message_lemmatized"] = tickets_df.message_cleaned.apply(lemmmatize_text)
tickets_df["message_pos"] = tickets_df.message_lemmatized.apply(get_pos_tags)
tickets_df["message_stop"] = tickets_df.message_pos.apply(clean_stop_words)
tickets_df

In [None]:
# frequency of tags

from collections import Counter

grouped_tags_df = (
    tickets_df.groupby("tags")["message_lemmatized"].apply(" ".join).reset_index()
)

# Step 2: Split Text and Count Frequencies
word_counts = {}
for idx, row in grouped_tags_df.iterrows():
    category = row["tags"]
    text = row["message_lemmatized"]
    words = text.split()
    word_counts[category] = dict(Counter(words))

# Print the results
for category, counts in word_counts.items():
    print(f"Category: {category}")
    for word, count in counts.items():
        print(f"{word}: {count}")

In [None]:
spam = str(tickets_df[tickets_df["tags"] == "Spam"].message_stop)

word_cloud = WordCloud(max_words=40).generate(spam)
plt.figure(figsize=(5, 3))
plt.imshow(word_cloud)
plt.axis("off")
plt.title("Spam")
plt.show()


category = str(tickets_df[tickets_df["tags"] == "Bug"].message_stop)

word_cloud = WordCloud(max_words=40).generate(category)
plt.figure(figsize=(5, 3))
plt.imshow(word_cloud)
plt.axis("off")
plt.title("Bug")
plt.show()


category = str(tickets_df[tickets_df["tags"] == "Feature Request"].message_stop)

word_cloud = WordCloud(max_words=40).generate(category)
plt.figure(figsize=(5, 3))
plt.imshow(word_cloud)
plt.axis("off")
plt.title("Feature Request")
plt.show()


category = str(tickets_df[tickets_df["tags"] == "Product Question"].message_stop)

word_cloud = WordCloud(max_words=40).generate(category)
plt.figure(figsize=(5, 3))
plt.imshow(word_cloud)
plt.axis("off")
plt.title("Product Question")
plt.show()


category = str(tickets_df[tickets_df["tags"] == "Sales"].message_stop)

word_cloud = WordCloud(max_words=40).generate(category)
plt.figure(figsize=(5, 3))
plt.imshow(word_cloud)
plt.axis("off")
plt.title("Sales")
plt.show()