In [1]:
import numpy as np
import pandas as pd
import json
import os
os.environ["OMP_NUM_THREADS"] = '1'


from langchain.prompts import PromptTemplate
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain.embeddings import HuggingFaceEmbeddings

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

from umap import umap_ as UMAP
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt


In [2]:
from dotenv import load_dotenv
load_dotenv()

import os
import openai

openai_api_key = os.getenv("OPENAI_API_KEY")

openai.api_key = openai_api_key
client = openai.Client()

chat_model_name = 'gpt-3.5-turbo'
embedding_model_name = 'sentence-transformers/all-mpnet-base-v2'

n_pick = 150  # the number of reviews picked for each game
s_root = r'C:\Users\fbohm\Desktop\Projects\DataScience\cluster_analysis/'
s_db_json = 'review_db.json'
s_db_embed_json = 'review_db_embed.json'
s_db_table_json = 'review_db_table.json'
s_db_table_xlsx = 'review_db_table.xlsx'
s_db_table_pca_json = 'review_db_table_pca.json'
s_db_table_pca_xlsx = 'review_db_table_pca.xlsx'
s_kmeans_centers = 'kmeans_centers.json'
b_override = False

In [3]:

prompt_template_translation = PromptTemplate.from_template(
'''Please tell if the following game review sections are written in English. Respond with "Yes." or "No."
Furthermore, please translate each section into English if it is not. The sections are separated by labels "REASON" and "WISH".

[h0]==================================================================[\h0]
REASON: "兄弟们，我把星空退款的钱拿来买这个了，我做的对吗"
WISH: "加动态模糊和垂直同步选项"

IS ENGLISH:

No.

TRANSLATION:

REASON: "Brothers, I used the refund money from the stars to buy this. Did I do the right thing?"
WISH: "Add dynamic blur and vertical sync options."


[h0]==================================================================[\h0]
REASON: "My first D&D experience and I'm enjoying it a lot."
WISH: "I would like more guidance in the game."

IS ENGLISH: 

Yes.


[h0]==================================================================[\h0]
REASON: "{reason}"
WISH: "{wish}"

IS ENGLISH: 
'''
)

prompt_template_topic = PromptTemplate.from_template(
'''Please list the most important topics and their respective original context in the review of a game in a json format with "Topic", "Category", "Context" arguments.  No more than 10 topics.
Topics should be game features.  A feature in the game should be a noun rather than a verb or an adjective.
Each topic should be categorized as a "fact" or a "request".

[h0]==================================================================[\h0]
REVIEW: 

"The weapon durability in this game is frustrating; my sword breaks after just a few swings. The combat itself is fun, but I wish the durability lasted longer. Also, the audio effects are very immersive during battles."

TOPICS:

[
    {{
        "Topic": "Weapon Durability",
        "Category": "request",
        "Context": "My sword breaks after just a few swings. I wish the durability lasted longer."
    }},
    {{
        "Topic": "Combat and Fighting",
        "Category": "fact",
        "Context": "The combat itself is fun."
    }},
    {{
        "Topic": "Audio",
        "Category": "fact",
        "Context": "The audio effects are very immersive during battles."
    }}
]

[h0]==================================================================[\h0]
REVIEW: 

"Playing during the night adds a thrilling layer to the game. The lack of a proper save feature makes it hard to enjoy it though. Also, there are way too many random encounters that make progress difficult."

TOPICS:

[
    {{
        "Topic": "Night",
        "Category": "fact",
        "Context": "Playing during the night adds a thrilling layer to the game."
    }},
    {{
        "Topic": "Save Feature",
        "Category": "request",
        "Context": "The lack of a proper save feature makes it hard to enjoy fully."
    }},
    {{
        "Topic": "Randomness",
        "Category": "request",
        "Context": "There are way too many random encounters that make progress difficult."
    }}
]

[h0]==================================================================[\h0]
REVIEW: 

"{review}"

TOPICS:

'''
)

prompt_template_topic_view = PromptTemplate.from_template(
'''What's the sentiment of the review with regard to the topic?
Always answer with 'Positive' or 'Negative' or 'Inconclusive'

REVIEW: My first D&D experience and I'm enjoying it a lot.
TOPIC: D&D
SENTIMENT: Positive 

REVIEW: This game lacks a proper ending or epilog
TOPIC: epilogue
SENTIMENT: Negative

REVIEW: Posted: August 8
TOPIC: release date
SENTIMENT: Inconclusive 

REVIEW: {review}
TOPIC: {topic}
SENTIMENT: '''
)

In [14]:
# Read in the JSON file with survey results 

with open(s_root + 'Data/survey_results_clean.json', 'r') as f:
    db = json.load(f)

## Translate reviews

In [8]:
import json
from lingua import Language, LanguageDetectorBuilder



# Load JSON data from file
input_file_path = 'Data/survey_results_clean.json'  # Adjust the path if needed
output_file_path = 'Data/survey_results_trans.json'  # New JSON with language and translations

# Initialize the language detector
detector = LanguageDetectorBuilder.from_languages(
    Language.ENGLISH, Language.SPANISH, Language.CHINESE, Language.GERMAN, Language.FRENCH
).build()

# Load JSON data
with open(input_file_path, 'r', encoding='utf-8') as json_file:
    data = json.load(json_file)

# Process each entry
for entry in data:
    reason_text = entry.get("Please tell us why you chose the rating above:", "")
    wish_text = entry.get("If you had a magic wand and you could change, add, or remove anything from the game, what would it be and why?", "")

    # Detect language of both fields using Lingua
    detected_language_reason = detector.detect_language_of(reason_text).name.lower() if detector.detect_language_of(reason_text) else "unknown"
    detected_language_wish = detector.detect_language_of(wish_text).name.lower() if detector.detect_language_of(wish_text) else "unknown"

    # Determine overall language
    detected_language = detected_language_reason if detected_language_reason == detected_language_wish else "mixed"
    entry["language"] = detected_language

    # Only proceed to translation if either field is not in English
    if detected_language != "english":
        # Format the prompt for translation with both texts
        prompt_translation = prompt_template_translation.format(reason=reason_text, wish=wish_text)

        # Make the OpenAI API call to translate the review
        response = client.chat.completions.create(
            model=chat_model_name,
            messages=[
                {"role": "system", "content": "You are a helpful assistant expertised in game review analysis."},
                {"role": "user", "content": prompt_translation},
            ],
            max_tokens=1024,
        )

        # Extract the translation response
        translation_response = response.choices[0].message.content

        # Parse the response to extract translations for REASON and WISH
        if "REASON:" in translation_response and "WISH:" in translation_response:
            reason_translation = translation_response.split("REASON:")[1].split("WISH:")[0].strip()
            wish_translation = translation_response.split("WISH:")[1].strip()

            # Overwrite the original fields with translations
            entry["Please tell us why you chose the rating above:"] = reason_translation
            entry["If you had a magic wand and you could change, add, or remove anything from the game, what would it be and why?"] = wish_translation
        else:
            print(f"Translation not available for entry: {entry}")

# Save the modified data with translations
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, indent=4, ensure_ascii=False)

print(f"Translated data saved to {output_file_path}")


Translated data saved to Data/survey_results_trans.json


## Topic Extraction

In [10]:
with open(s_root + 'Data/survey_results_trans.json', 'r') as f:
    db = json.load(f)

entry = db[0]

# Extract important information from the 2nd and 3rd keys
review_text = entry["Please tell us why you chose the rating above:"]
additional_feedback = entry["If you had a magic wand and you could change, add, or remove anything from the game, what would it be and why?"]

# Combine both into a single review input for the prompt
combined_review = f"{review_text} {additional_feedback}"

# Format the prompt for the LLM
prompt_topic = prompt_template_topic.format(review=combined_review)

# Make the OpenAI API call
response = client.chat.completions.create(
    model=chat_model_name,
    messages=[
        {"role": "system", "content": "You are a helpful assistant expertised in game review analysis."},
        {"role": "user", "content": prompt_topic},
    ],
    max_tokens=1024,
)

# Print the response content
print(response.choices[0].message.content)

[
    {
        "Topic": "Survival Gameplay",
        "Category": "fact",
        "Context": "The game promises to be a survival zombie game with unique and interesting gameplay."
    },
    {
        "Topic": "Storyline",
        "Category": "fact",
        "Context": "The demo left me wanting more."
    },
    {
        "Topic": "Challenging Mechanics",
        "Category": "fact",
        "Context": "It is a challenging game to survive and be concerned about resources."
    }
]


## Sentiment Analysis

In [11]:
topic_response = response.choices[0].message.content
topics = json.loads(topic_response)

In [22]:
topic_response

'[\n    {\n        "Topic": "gameplay",\n        "Category": "fact",\n        "Context": "simple but engaging gameplay."\n    },\n    {\n        "Topic": "inventory system",\n        "Category": "fact",\n        "Context": "Easy to use inventory system."\n    },\n    {\n        "Topic": "tactical decision making",\n        "Category": "fact",\n        "Context": "Tactical decision making."\n    },\n    {\n        "Topic": "distractions",\n        "Category": "fact",\n        "Context": "if there is distractions in game then Good Job :)"\n    }\n]'

In [13]:
for topic in topics:
    topic_text = topic["Topic"]
    print(f'topic text: {topic_text}')
    topic_context = topic["Context"]
    print(f'topic context: {topic_context}')
    print('##################')
    

topic text: Survival Gameplay
topic context: The game promises to be a survival zombie game with unique and interesting gameplay.
##################
topic text: Storyline
topic context: The demo left me wanting more.
##################
topic text: Challenging Mechanics
topic context: It is a challenging game to survive and be concerned about resources.
##################


In [20]:
for topic in topics:
    topic_text = topic["Topic"]
    topic_context = topic["Context"]
    
    # Format the prompt for sentiment analysis
    prompt_sentiment = prompt_template_topic_view.format(review=topic_context, topic=topic_text)
    
    # Call the API for sentiment analysis
    sentiment_response = client.chat.completions.create(
        model=chat_model_name,
        messages=[
            {"role": "system", "content": "You are a helpful assistant expertised in sentiment analysis."},
            {"role": "user", "content": prompt_sentiment},
        ],
        max_tokens=1024,
    )
    
    # Extract and print the sentiment result
    sentiment = sentiment_response.choices[0].message.content.strip()   
    # Ensure 'topics' key is initialized for each entry
    if "topics" not in entry:
        entry["topics"] = []

# Append the topic information with sentiment to the "topics" list
    entry["topics"].append({
        "topic": topic_text,
        "sentiment": sentiment,
        "sentence": topic_context
    })
    print(f"Topic: {topic_text}\nContext: {topic_context}\nSentiment: {sentiment}\n")

Topic: Survival Gameplay
Context: The game promises to be a survival zombie game with unique and interesting gameplay.
Sentiment: Positive

Topic: Storyline
Context: The demo left me wanting more.
Sentiment: Positive

Topic: Challenging Mechanics
Context: It is a challenging game to survive and be concerned about resources.
Sentiment: Positive



In [22]:
output_file_path = s_root + 'Data/survey_results_with_topics.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(db, json_file, indent=4, ensure_ascii=False)

## Everything put together 
### in a loop