## Confluence Scraper

Scrape confluence pages for content of a user, run basic NLP processing, use word cloud, and compare users.

In [None]:
USER_1_ID = "User id from wiki/people/<id> url"
USER_2_ID = "User id from wiki/people/<id> url"

In [None]:
# Load user content for min 5000 words and cache it
import qmenzi.content.confluence as conf
from qmenzi.content.utils import write_content_to_file
user_1 = conf.scrap_content_for_user_id(USER_1_ID, min_words=5000)
write_content_to_file('user_1', user_1)

In [None]:
# Load user content from cache file and gnerate tokens
from qmenzi.content.utils import read_content_from_file
from qmenzi.content.nlp import get_tokens
user_1 = read_content_from_file('user_1')
tokens = get_tokens(user_1)
print(tokens)

In [None]:
# Run topic modeling on the tokens
from qmenzi.content.nlp import get_topics
topics = get_topics(tokens)
for topic in topics:
    print([t[0] for t in topic])

In [None]:
# Generate word cloud for user 1
from qmenzi.quizzes.wordcloud import create_wordcloud_for_content
create_wordcloud_for_content(''.join([c + '\n' for c in user_1])).show()

In [None]:
# Generate word cloud for user 2
import qmenzi.content.confluence as conf
user_2 = conf.scrap_content_for_user_id(USER_2_ID, min_words=5000)
create_wordcloud_for_content(''.join([c + '\n' for c in user_2])).show()

In [None]:
# Run sentiment analysis for user_1
from qmenzi.content.nlp import get_sentiment
for score in get_sentiment(user_1):
    print(score)

In [None]:
# Compare sentiment analysis with user 2
from qmenzi.content.nlp import get_sentiment
for score in get_sentiment(user_2):
    print(score)

In [None]:
# Generate poem for a topic using llama 3 model on replicate
import replicate
from qmenzi.quizzes.replicate import authenticate

authenticate()

words = ''.join([c[0] + ', ' for c in topics[0]])
prompt = f'Write me a poem about system architecture containing words {words}'
print(prompt)

input = {
    "prompt": prompt,
    "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
}

output = replicate.run(
    "meta/meta-llama-3-70b-instruct",
    input=input
)
print(''.join(output))
