### Create Embeddings

In [2]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Define file paths
data_path = 'attachment/attach_processed_length10.csv'
embeddings_save_path = 'attachment/models/short/attach_doc_embeddings1.npy'

print("Step 1: Loading the data...")
# Load the data
df = pd.read_csv(data_path, usecols=['text'], low_memory=False)

print("Step 2: Preparing the documents...")
# Specify what the 'docs' are
docs = df['text'].tolist()

# Create embeddings
model = SentenceTransformer('thenlper/gte-large')
embeddings = model.encode(docs, show_progress_bar=True)

# Save Embeddings
with open(embeddings_save_path, 'wb') as f:
    np.save(f, embeddings)

Step 1: Loading the data...
Step 2: Preparing the documents...


Batches: 100%|██████████| 63/63 [00:00<00:00, 66.52it/s]


### Run BERTopic

Struggling here because of the dimensionality stuff - I think

In [5]:
import pickle
from umap import UMAP
from hdbscan import HDBSCAN
import numpy as np
from nltk.tokenize import sent_tokenize
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
from sentence_transformers import SentenceTransformer
import collections
from bertopic.representation import PartOfSpeech
from bertopic.representation import MaximalMarginalRelevance
from bertopic.representation import KeyBERTInspired
from tqdm import tqdm
from bertopic.cluster import BaseCluster

# Define file paths
data_path = 'attachment/attach_short.csv'
embeddings_save_path = 'attachment/models/short/attach_short_embeddings.npy'
model_save_path = 'attachment/models/short/short_model1_dir'
data_save_path = "attachment/models/short/short_dataframe.csv"

print("Step 1: Loading the data {data_path}")
# Load the data
df = pd.read_csv(data_path, usecols=['text'], low_memory=False)

print("Step 2: Preparing the documents...")
# Specify what the 'docs' are
docs = df['text'].tolist()

print("Now: Loading embeddings")
# Load Embeddings
with open(embeddings_save_path, 'rb') as f:
    embeddings = np.load(f)

print("Now: Extracting Vocab")
###### Extract vocab to be used in BERTopic
vocab = collections.Counter()
tokenizer = CountVectorizer(ngram_range=(1, 3)).build_tokenizer()
for doc in tqdm(docs):
    vocab.update(tokenizer(doc))
vocab = [word for word, frequency in vocab.items() if frequency >= 100]; len(vocab)

print("Now: UMAP time ")
# Train model and reduce dimensionality of embeddings
umap_model = UMAP(
        n_components=10,  # has a wild impact hard to predict
        n_neighbors=20,  # Higher is a more gloabl strcture
        min_dist=0.1,   # Lower value means more dense packing
        random_state=42, # Reproducability
        metric="cosine", # have to pick something
        n_jobs=-1        # speed
        )
reduced_embeddings = umap_model.fit_transform(embeddings)

print("Now: HDBSCAN Time")
# Find clusters of semantically similar documents
hdbscan_model = HDBSCAN(
            min_cluster_size=200,           # smallest size group considered
            min_samples=20,                 # larger is more conservative - more noise
            leaf_size=40,                   # number of points per leaf node in the tree - default 40
            gen_min_span_tree=False,        # True creates minimum spanning trees - increasing RAM
            prediction_data=True,           # generates extra cached data of prediction labels for new data or reuse
            cluster_selection_method='eom', # eom is normal - leaf might get more homogeneous clusters
            cluster_selection_epsilon=0.5,  # default - merges clusters below threshold
            core_dist_n_jobs=-1,            # For speed
            )
clusters = hdbscan_model.fit(reduced_embeddings)

print("Now: Set Representation {main_representation}")
# Set the main_representation for the model
main_representation = KeyBERTInspired()

print("Now: Aspect model")
# Additional ways of representing a topic
aspect_model = [KeyBERTInspired(top_n_words=10), MaximalMarginalRelevance(diversity=.3)]

# Prepare sub-models
embedding_model = SentenceTransformer('thenlper/gte-large')
umap_model = Dimensionality(reduced_embeddings)
hdbscan_model = BaseCluster()
vectorizer_model = CountVectorizer(vocabulary=vocab, stop_words="english")
representation_model = {
    "Main": main_representation,
    "Aspect1": aspect_model,
}

print("Now: Fit BERTopic Model")
# Fit BERTopic without actually performing any clustering
topic_model= BERTopic(
    embedding_model=embedding_model,
    umap_model=umap_model,
    hdbscan_model=hdbscan_model,
    vectorizer_model=vectorizer_model,
    representation_model=representation_model,
    verbose=True
)

# Fit model and transform documents
topics, _= topic_model.fit_transform(docs, embeddings=embeddings)

print("Now: Loading full dataset adding 'topics'")
# Load the full dataset
full_data = pd.read_csv(data_path, low_memory=False)
# Add the topics to the full dataset
full_data['topics'] = topics

print("Now: Saving model as {model_save_path}")
# Save the BERTopic model as a .safetensors file
topic_model.save(model_save_path, serialization="safetensors", save_ctfidf=True, save_embedding_model=embedding_model)

print("Now: Saving data as {data_save_path}")
# Save the full dataset with topics as a CSV file
full_data.to_csv(data_save_path, index=False)

print("Data and model saved successfully.")

Step 1: Loading the data {data_path}
Step 2: Preparing the documents...
Now: Loading embeddings
Now: Extracting Vocab


100%|██████████| 1992/1992 [00:00<00:00, 995953.46it/s]

Now: UMAP time 





Now: HDBSCAN Time
Now: Set Representation {main_representation}
Now: Aspect model


2024-05-04 23:24:42,560 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-05-04 23:24:42,560 - BERTopic - Dimensionality - Completed ✓
2024-05-04 23:24:42,561 - BERTopic - Cluster - Start clustering the reduced embeddings


Now: Fit BERTopic Model


TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'

### Reduce Outliers?

In [2]:
from bertopic import BERTopic
import pandas as pd

print("Step 1: Loading the model...")
# Load the BERTopic model
topic_model = BERTopic.load('attachment/models/doc1_attach_doc1_model_dir', local_files_only=True)
                            
print("Step 2: Preparing the documents...")
# Load the data
doc_file = 'attachment/attach_short'
df = pd.read_csv(doc_file, usecols=['text', 'created_utc'], low_memory=False)

docs = df['text'].tolist()
created_utc = df['created_utc'].tolist()

print("Step 3: Reducing outliers...")
# Use the "c-TF-IDF" strategy with a threshold
new_topics, _ = topic_model.reduce_outliers(docs, topics, strategy="c-tf-idf", threshold=0.1)

print("Step 4: Reducing outliers...")
# Reduce all outliers that are left with the "distributions" strategy
final_topics, _ = topic_model.reduce_outliers(docs, new_topics, strategy="distributions")

print("Step 5: Updating the topic representations...")
# Update the topic representations with the new topics
topic_model.update_topics(docs, topics=final_topics)

print("Step 6: Performing topics over time analysis...")
# Perform topics over time analysis
topics_over_time = topic_model.topics_over_time(docs, final_topics, created_utc, nr_bins=40)

print("Step 7: Saving the results...")
# Save the topics and probabilities to the DataFrame
df['Topic'] = final_topics

# Save the DataFrame with the updated topics and probabilities
df.to_excel('G:/BERTopic/attachment/analysis/doc1/attach_doc1_reduced.xlsx', index=False)

# Save the topics over time results
topics_over_time.to_excel('G:/BERTopic/attachment/analysis/attach_doc1_reduced_ToT.xlsx', index=False)

Step 1: Loading the model...


TypeError: BERTopic.load() got an unexpected keyword argument 'local_files_only'

# Analysis

## Text output

In [5]:
from bertopic import BERTopic

# Extract the results
topics = topic_model.get_topics()
topic_freq = topic_model.get_topic_freq()
topic_info = topic_model.get_topic_info()
representative_docs = topic_model.get_representative_docs()

# Assuming 'topic_info' is already defined and includes topic representations
csv_file_path = 'attachment/analysis/attach_doc1_analysis.csv'  # Hardcoded save location for CSV

# Remove the 'Representative_Docs' column from topic_info DataFrame
topic_info = topic_info.drop(columns=['Representative_Docs'])

# Save the results in a more structured and readable manner
with open('attachment/analysis/attach_doc1_analysis.txt', 'w') as f:
    # Topics
    f.write("TOPICS:\n")
    for topic_num, terms in topics.items():
        terms_str = ', '.join([term[0] for term in terms])
        f.write(f"Topic {topic_num}: {terms_str}\n")
    f.write("\n")

    # Topic Frequency
    f.write("TOPIC FREQUENCY:\n")
    for index, row in topic_freq.iterrows():
        f.write(f"Topic {row['Topic']}: {row['Count']} entries\n")
    f.write("\n")

    # Topic Info
    f.write("TOPIC INFO:\n")
    for index, row in topic_info.iterrows():
        f.write(f"Topic {row['Topic']}\n")
        f.write(f" - Name: {row['Name']}\n")
        f.write(" - Representation:\n")
        for term in row['Representation']:
            f.write(f"   * {term}\n")
        f.write("\n")

    # Representative Docs
    f.write("REPRESENTATIVE DOCS:\n")
    for topic_num, docs in representative_docs.items():
        f.write(f"Topic {topic_num} representative docs:\n")
        for doc in docs:
            f.write(f" - {doc}\n")
        f.write("\n")

# Convert 'topic_info' DataFrame directly to CSV
topic_info.to_csv(csv_file_path, index=False)

## Visualizations

In [9]:
topic_model.visualize_topics()

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [None]:
hierarchical_topics = topic_model.hierarchical_topics(docs)
tree = topic_model.get_topic_tree(hierarchial_topics)
print(tree)


## Searching for topics

In [4]:
import pandas as pd

def explore_relevant_topics_to_file(topic_model, search_terms, txt_filename, csv_filename, top_n=5):
    """
    Find and save topics related to a list of search terms to a .txt file and a .csv file,
    along with representative documents for the topics.

    Parameters:
    - topic_model: The trained BERTopic model.
    - search_terms: A list of search terms/phrases related to the desired topics.
    - txt_filename: Name of the .txt file to save the results.
    - csv_filename: Name of the .csv file to save the topic information.
    - top_n: Number of top similar topics to retrieve for each search term.

    Returns:
    - None (writes the relevant topics, their terms, and representative docs to a .txt file
             and topic information to a .csv file)
    """
    topics_covered = set()  # To keep track of topics we've added representative docs for
    all_relevant_topics = set()  # To gather all unique topics from the search results
    topic_info_data = []  # To store topic information for the CSV file

    with open(txt_filename, 'w') as file:
        # Display search terms and their related topics at the top
        for term in search_terms:
            file.write(f"Searching for topics related to: '{term}'\n\n")
            topics, similarity = topic_model.find_topics(term, top_n=top_n)
            for topic, score in zip(topics, similarity):
                file.write(f"Topic {topic} (Similarity: {score:.4f})\n")
                all_relevant_topics.add(topic)  # Add topic to the set
                topic_info_data.append({'Topic': topic, 'Representation': tuple(topic_model.get_topic(topic)), 'Search Term': term})
            file.write("\n" + "-" * 50 + "\n")

        # Append topic details and representative documents at the end in numerical order
        for topic in sorted(all_relevant_topics):  # Sort topics numerically
            if topic not in topics_covered:
                topic_terms = topic_model.get_topic(topic)
                formatted_terms = ', '.join([f"{word[0]} ({word[1]:.4f})" for word in topic_terms])
                file.write(f"\nTopic {topic} Details: {formatted_terms}\n\n")
                reps = topic_model.get_representative_docs(topic)
                file.write(f"Representative Documents for Topic {topic}:\n")
                for doc in reps:
                    file.write("\n" + "-" * 30 + "\n")
                    file.write(f"{doc}\n")
                    file.write("-" * 30 + "\n")
                topics_covered.add(topic)
            file.write("-" * 50 + "\n")

    # Create a DataFrame from the topic information data
    topic_info_df = pd.DataFrame(topic_info_data)
    
    # Aggregate search terms for each topic
    topic_info_df = topic_info_df.groupby(['Topic', 'Representation'])['Search Term'].apply(', '.join).reset_index()
    
    # Save the topic information to a CSV file
    topic_info_df.to_csv(csv_filename, index=False)

# Updated list of search terms related to your research question
search_terms = [
    "Boundaries", "Limits", "Personal space", "Assertiveness", "Saying no",
    "Interpersonal boundaries", "Relationship limits", "Healthy relationships", "Assertive communication",
    "Personal growth", "Self-improvement", "Interpersonal skills", "Relationship building",
    "Communication skills", "Active listening", "Expressing emotions", "Nonverbal communication",
    "Self-care", "Self-compassion", "Mental health", "Emotional well-being", "Self-love",
    "Social connection", "Belonging", "Interpersonal relationships", "Social support", "Emotional intimacy",
    "Roleplaying", "Immersion", "Character development", "Alternate persona", "Escapism",
    "Authenticity", "Self-expression", "Identity exploration", "True self",
    "Anxiety relief", "Depression relief", "Therapeutic gaming", "Relaxation",
    "Player growth", "Personal development", "Supportive environment", "Encouraging rules",
    "Player education", "Mentoring", "Skill development", "Collaborative learning",
    "Flexibility", "Adaptability", "Open-mindedness", "Embracing change",
    "Time management", "Session planning", "Consistency", "Commitment",
    "Responsibility", "Maturity", "Life skills", "Independence",
    "Emotional intelligence", "Emotional regulation", "Self-awareness", "Empathy",
    "Safety", "Security", "Trust", "Comfort", "Supportive environment",
    "Trauma recovery", "Emotional healing", "Therapeutic roleplaying", "Coping mechanisms",
    "Genuineness", "Honesty",
    "Recognition", "Acknowledgment", "Validation", "Acceptance",
    "Inclusivity", "Self-acceptance",
    "Affirmation", "Support", "Encouragement", "Understanding",
    "Resilience", "Perseverance", "Problem-solving", "Determination", "Growth mindset"
]

# Use the function to explore the relevant topics and save to a .txt file and a .csv file
txt_filename = "rpdavavrelevant_topics.txt"
csv_filename = "rascdfvdfvaocessed2_relevant_topics.csv"
explore_relevant_topics_to_file(topic_model, search_terms, txt_filename, csv_filename)

This needs to properly load the documents - causing an error now.

In [None]:
import pandas as pd

# Assuming you've already loaded the BERTopic model and have the docs list
# Step 1: Get the document info
document_info = topic_model.get_document_info(docs)

# List of topics you want to extract
topics_to_include = [82, 332]

# Step 2: Filter the DataFrame by the given set of topics
filtered_df = document_info[document_info['Topic'].isin(topics_to_include)]

# Step 3: Select only the relevant columns
selected_df = filtered_df[['Document', 'Topic', 'Probability', 'Representation']]

# Step 4: Save the selected DataFrame to a .csv file with all documents
selected_df.to_csv("test.csv", index=False)

# Step 5: Create a DataFrame with only 200 documents for each topic
limited_df = pd.concat([filtered_df[filtered_df['Topic'] == topic].sample(min(len(filtered_df[filtered_df['Topic'] == topic]), 200)) 
                        for topic in topics_to_include])

# Step 6: Save the limited DataFrame to a .csv file with only 200 documents per topic
limited_df.to_csv("test.csv", index=False)

In [5]:
topics, similarity = topic_model.find_topics("Healing through my trauma.", top_n=5)
for topic, score in zip(topics, similarity):
    print(f"Topic {topic} (Similarity: {score:.4f}): {topic_model.get_topic(topic)}")


Topic 277 (Similarity: 0.8759): [('therapy', 0.9414172), ('therapist', 0.91887456), ('therapists', 0.91092235), ('treatment', 0.8863071), ('counseling', 0.8857188), ('counselor', 0.86533266), ('disorder', 0.8632303), ('depressed', 0.86154974), ('need', 0.8600832), ('depression', 0.85903966)]
Topic 411 (Similarity: 0.8633): [('abuse', 0.9436456), ('abusive', 0.9357648), ('abused', 0.9354367), ('abusers', 0.92333883), ('abuser', 0.9179987), ('abusing', 0.9120985), ('violence', 0.87317586), ('humiliation', 0.865414), ('narcissistic', 0.86392856), ('bullies', 0.8634559)]
Topic 707 (Similarity: 0.8620): [('leaving', 0.8951088), ('glad', 0.88974667), ('leave', 0.8869854), ('away', 0.87086636), ('damn', 0.85742104), ('stayed', 0.8564422), ('kudos', 0.8556378), ('wow', 0.84708214), ('congrats', 0.843223), ('left', 0.8405669)]
Topic 791 (Similarity: 0.8591): [('hurts', 0.9543725), ('painfully', 0.94996023), ('painful', 0.9490411), ('hurt', 0.9475064), ('pain', 0.9206236), ('pains', 0.9180995), 

In [6]:
topics, similarity = topic_model.find_topics("Finding out my real identity, sense of self, and who I am.", top_n=5)
for topic, score in zip(topics, similarity):
    print(f"Topic {topic} (Similarity: {score:.4f}): {topic_model.get_topic(topic)}")


Topic 277 (Similarity: 0.8727): [('therapy', 0.9414172), ('therapist', 0.91887456), ('therapists', 0.91092235), ('treatment', 0.8863071), ('counseling', 0.8857188), ('counselor', 0.86533266), ('disorder', 0.8632303), ('depressed', 0.86154974), ('need', 0.8600832), ('depression', 0.85903966)]
Topic 707 (Similarity: 0.8699): [('leaving', 0.8951088), ('glad', 0.88974667), ('leave', 0.8869854), ('away', 0.87086636), ('damn', 0.85742104), ('stayed', 0.8564422), ('kudos', 0.8556378), ('wow', 0.84708214), ('congrats', 0.843223), ('left', 0.8405669)]
Topic 561 (Similarity: 0.8672): [('guy', 0.9382906), ('dude', 0.9182868), ('guys', 0.91207933), ('dudes', 0.90497136), ('man', 0.8980681), ('lad', 0.8630356), ('lol', 0.8627504), ('gotta', 0.86112446), ('similiar', 0.8603344), ('shit', 0.85827065)]
Topic 260 (Similarity: 0.8621): [('naming', 0.91278374), ('names', 0.90596676), ('rename', 0.8928071), ('alphabet', 0.8837673), ('named', 0.8835945), ('identity', 0.87491393), ('nicknames', 0.8720069), 

In [7]:
topics, similarity = topic_model.find_topics("Learning how to set boundaries and love myself.", top_n=5)
for topic, score in zip(topics, similarity):
    print(f"Topic {topic} (Similarity: {score:.4f}): {topic_model.get_topic(topic)}")

Topic 326 (Similarity: 0.8852): [('advice', 0.9431957), ('advise', 0.9244689), ('advices', 0.91935015), ('tips', 0.89509547), ('helpful', 0.8796486), ('helped', 0.8678317), ('guide', 0.86672187), ('guideline', 0.8613769), ('practice', 0.86032736), ('useful', 0.8595037)]
Topic 277 (Similarity: 0.8787): [('therapy', 0.9414172), ('therapist', 0.91887456), ('therapists', 0.91092235), ('treatment', 0.8863071), ('counseling', 0.8857188), ('counselor', 0.86533266), ('disorder', 0.8632303), ('depressed', 0.86154974), ('need', 0.8600832), ('depression', 0.85903966)]
Topic 37 (Similarity: 0.8649): [('wife', 0.9342551), ('spouse', 0.9157546), ('married', 0.9155922), ('husband', 0.9071674), ('divorced', 0.88828653), ('marriage', 0.8837887), ('divorce', 0.87374574), ('woman', 0.8693119), ('marry', 0.8669511), ('shes', 0.8637876)]
Topic 132 (Similarity: 0.8614): [('sessions', 0.9506233), ('session', 0.9468782), ('schedule', 0.8829769), ('scheduled', 0.87409407), ('scheduling', 0.872935), ('hour', 0.

In [8]:
topics, similarity = topic_model.find_topics("No DnD is better than bad DnD", top_n=5)
for topic, score in zip(topics, similarity):
    print(f"Topic {topic} (Similarity: {score:.4f}): {topic_model.get_topic(topic)}")

Topic 60 (Similarity: 0.9535): [('dnd', 0.9541751), ('5e', 0.8953159), ('tabletop', 0.8872134), ('rpg', 0.8843616), ('dice', 0.8782903), ('roleplaying', 0.8761204), ('rpgs', 0.8753911), ('dungeons', 0.860209), ('skyrim', 0.85868824), ('game', 0.85015374)]
Topic 671 (Similarity: 0.9235): [('dnd', 0.92271566), ('dungeons', 0.9149616), ('dungeon', 0.8992481), ('tabletop', 0.8937398), ('roleplaying', 0.8739946), ('dragons', 0.86420673), ('games', 0.8636795), ('fantasy', 0.8614767), ('boardgames', 0.8607818), ('adventures', 0.8566214)]
Topic 0 (Similarity: 0.9076): [('dm', 0.9357201), ('dming', 0.9258032), ('dms', 0.9189105), ('dnd', 0.8966824), ('having', 0.8559009), ('session', 0.84839475), ('campaign', 0.84729385), ('did', 0.8470398), ('playing', 0.8466874), ('player', 0.8430222)]
Topic 384 (Similarity: 0.9047): [('5e', 0.93398345), ('dd', 0.9147495), ('4e', 0.9013214), ('d20', 0.88337034), ('3e', 0.8677), ('2e', 0.86590004), ('5th', 0.86286473), ('rpg', 0.8582752), ('roleplaying', 0.857

In [9]:
topics, similarity = topic_model.find_topics("How can I learn to talk with problematic players?", top_n=5)
for topic, score in zip(topics, similarity):
    print(f"Topic {topic} (Similarity: {score:.4f}): {topic_model.get_topic(topic)}")

Topic 2 (Similarity: 0.9107): [('kicked', 0.86845917), ('behavior', 0.86153066), ('behaviour', 0.85938233), ('kicking', 0.8577809), ('kick', 0.85508406), ('player', 0.85335755), ('situation', 0.84882915), ('players', 0.84702146), ('playing', 0.8460915), ('game', 0.84607625)]
Topic 556 (Similarity: 0.8974): [('autism', 0.9122175), ('autistic', 0.9102429), ('behaviour', 0.84673524), ('behaviours', 0.84385234), ('interaction', 0.84352696), ('playing', 0.8387015), ('disability', 0.8383012), ('character', 0.837766), ('mental', 0.8337878), ('player', 0.83329576)]
Topic 519 (Similarity: 0.8951): [('mute', 0.9486581), ('silence', 0.8751019), ('hearing', 0.87366223), ('deaf', 0.8679966), ('voice', 0.86521035), ('speak', 0.86410564), ('hear', 0.86345446), ('spellcaster', 0.86237115), ('deafened', 0.86232257), ('silent', 0.86194825)]
Topic 0 (Similarity: 0.8916): [('dm', 0.9357201), ('dming', 0.9258032), ('dms', 0.9189105), ('dnd', 0.8966824), ('having', 0.8559009), ('session', 0.84839475), ('cam

In [10]:
topics, similarity = topic_model.find_topics("Red flags and problem players", top_n=5)
for topic, score in zip(topics, similarity):
    print(f"Topic {topic} (Similarity: {score:.4f}): {topic_model.get_topic(topic)}")

Topic 2 (Similarity: 0.9206): [('kicked', 0.86845917), ('behavior', 0.86153066), ('behaviour', 0.85938233), ('kicking', 0.8577809), ('kick', 0.85508406), ('player', 0.85335755), ('situation', 0.84882915), ('players', 0.84702146), ('playing', 0.8460915), ('game', 0.84607625)]
Topic 3 (Similarity: 0.9029): [('gm', 0.9200864), ('gming', 0.88643324), ('gms', 0.8801797), ('players', 0.8622217), ('player', 0.85732067), ('game', 0.8488846), ('playing', 0.8462153), ('play', 0.840186), ('try', 0.83492213), ('run', 0.8348463)]
Topic 738 (Similarity: 0.8994): [('problems', 0.938794), ('problem', 0.9355047), ('problematic', 0.92335534), ('issue', 0.89789104), ('solution', 0.895911), ('issues', 0.8845091), ('question', 0.8794465), ('frustrations', 0.86549926), ('answer', 0.8557353), ('response', 0.85545033)]
Topic -1 (Similarity: 0.8975): [('rpg', 0.8760139), ('dm', 0.85331005), ('play', 0.8500152), ('playing', 0.84960186), ('combat', 0.8485923), ('character', 0.84767306), ('dice', 0.84736276), ('c