In [None]:
import json

data_file = '../game_data.json'


with open(data_file, encoding="utf-8") as f:
    game_data = json.load(f)

In [None]:
import matplotlib.pyplot as plt

def create_histogram(data, title, xlabel, ylabel, color='tab:blue', figuresize=(10, 6)):
    labels = list(data.keys())
    values = list(data.values())

    n_bars = len(labels)
    figure_width = max(labels) - min(labels)

    bar_width = figure_width / (1.5 * n_bars)

    plt.figure(figsize=figuresize)
    plt.bar(labels, values, color=color, edgecolor='black', width=bar_width)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()
# Function to get sentiment scores
weight_neg = 0.5
weight_compound = 1 - weight_neg

def get_sentiment_scores(data, subsections = ["gameplay"]):
    sentiment_scores = {}
    for title, content in data.items():
        text = "\n".join([content['text'][subsection] for subsection in subsections if subsection in content["text"]])
        sentiment = analyzer.polarity_scores(text)
        sentiment["neg_compound"] = -sentiment["neg"] * weight_neg + sentiment["compound"] * weight_compound
        sentiment_scores[title] = sentiment
    return sentiment_scores

# Function to create a histogram of sentiment scores
def create_sentiment_histograms(sentiment_data, sentiments_to_plot=None):
    # Default to all sentiment types if none are specified
    if sentiments_to_plot is None:
        sentiments_to_plot = ['pos', 'neu', 'neg', 'compound']

    # Determine the number of plots
    num_plots = len(sentiments_to_plot)
    cols = 2  # We prefer a 2-column layout
    rows = (num_plots + 1) // cols  # Calculate rows needed

    # Setting up the figure for multiple subplots
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
    if rows > 1:
        axes = axes.flatten()  # Flatten if we have more than one row
    else:
        axes = [axes]  # Wrap in list if only one row (i.e., 1 or 2 plots)

    fig.suptitle('Sentiment Analysis Histograms')

    # Plotting each requested sentiment
    for i, sentiment in enumerate(sentiments_to_plot):
        scores = [details[sentiment] for details in sentiment_data.values()]
        ax = axes[i]
        ax.hist(scores, bins=200, color='tab:blue', edgecolor='black')
        ax.set_title(f'{sentiment.capitalize()} Sentiment Score')
        ax.set_xlabel('Sentiment Score')
        ax.set_ylabel('Number of Games')

    # Turn off any unused subplots
    for j in range(i + 1, len(axes)):
        axes[j].axis('off')

    # Adjust layout for better spacing
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

def print_top_bottom_sentiment_games(sentiment_scores, key='compound', n = 10):
    # Sorting the games based on the compound sentiment score
    sorted_games = sorted(sentiment_scores.items(), key=lambda x: x[1][key])
    

    # Printing the 10 most negative games
    print(f"{n} Most Negative Games in terms of {key} sentiment:")
    for game, score in sorted_games[:n]:
        print(f"{game}: {score}")

    print("\n")  # Adding a newline for better readability

    # Printing the 10 most positive games
    print(f"{n} Most Positive Games in terms of {key} sentiment:")
    for game, score in sorted_games[-n:]:
        print(f"{game}: {score}")

def is_subsection_length_valid(data, subsections, min_length, max_length):
    """
    Check if the length of a subsection is within the specified range.
    
    :param data: The data dictionary of a game
    :param subsection: The subsection to check within the data
    :param min_length: The minimum length of the subsection string
    :param max_length: The maximum length of the subsection string
    :return: True if the length is within range, False otherwise
    """
    subsection_text = ""
    for subsection in subsections:
        subsection_text += data.get('text', {}).get(subsection, "") + "\n"
    word_count = len(subsection_text.split())
    return min_length <= word_count <= max_length

def count_long_subsections(game_data, subsection='gameplay', min_length=0, max_length=float('inf')):
    """
    Counts the number of entries in the game_data dictionary that have a specified subsection
    with a string length within the given range.
    
    :param game_data: Dictionary containing game information
    :param subsection: The subsection to look for within the entries (default is 'gameplay')
    :param min_length: The minimum length of the subsection string to count (default is 0)
    :param max_length: The maximum length of the subsection string to count (default is infinity)
    :return: The count of entries with the subsection string length within the specified range
    """
    return sum(is_subsection_length_valid(data, subsection, min_length, max_length) for _, data in game_data.items())

def filter_entries_by_length(game_data, subsections=['gameplay'], min_length=0, max_length=float('inf')):
    """
    Creates a dictionary with only the entries from game_data that have a specified subsection
    with a string length within the given range.
    
    :param game_data: Dictionary containing game information
    :param subsection: The subsection to look for within the entries (default is 'gameplay')
    :param min_length: The minimum length of the subsection string to filter by (default is 0)
    :param max_length: The maximum length of the subsection string to filter by (default is infinity)
    :return: A new dictionary with filtered entries
    """
    return {game: data for game, data in game_data.items() if is_subsection_length_valid(data, subsections, min_length, max_length)}




In [None]:
from collections import Counter


unique_keys = Counter()
intersection = 0
for game, content in game_data.items():
        intersection += 1 if "plot" in content['text'].keys() and "gameplay" in content['text'].keys() else 0
        unique_keys.update(content['text'].keys())

# Display the unique keys
unique_keys


In [None]:
import os
import json

def get_or_create_value(function_to_apply, file_path):
    # Check if the file exists
    if os.path.exists(file_path):
        # Load the value from the file
        with open(file_path, 'r') as file:
            value = json.load(file)
    else:
        # Call the function to create the value
        value = function_to_apply()
        # Save the value to the file
        with open(file_path, 'w') as file:
            json.dump(value, file)
    
    return value

# Example usage
# Assuming get_sentiment_scores is a function you have defined elsewhere

In [None]:
sentiment_scores = get_or_create_value(lambda: {key: get_sentiment_scores(game_data, list(key)) for key in unique_keys.keys()} , 'vader_sentiment_scores.json')


In [None]:
create_sentiment_histograms(sentiment_scores["plot"])

In [None]:
create_sentiment_histograms(sentiment_scores["gameplay"])

In [None]:
create_sentiment_histograms(sentiment_scores["reception"])

In [None]:
print_top_bottom_sentiment_games(sentiment_scores["plot"])


In [None]:
sentiment_scores["gameplay"]["Tetris"], sentiment_scores["gameplay"]["God of War (2005 video game)"], sentiment_scores["gameplay"]["Minecraft"], sentiment_scores["gameplay"]["Mortal Kombat (1992 video game)"],

In [None]:
sorted(sentiment_scores["gameplay"].items(), key=lambda x: -abs(((x[1]["neg"]) * x[1]["compound"])))

In [None]:
constrains = {
     "min_length": 100,
    "max_length": 10000
}
subsections = ['gameplay','plot','story','synopsis','plot and gameplay','plot and gameplay']

filtered_by_length = filter_entries_by_length(game_data, subsections=subsections, **constrains)

In [None]:
sentiment_filtered_by_length = get_or_create_value( lambda: get_sentiment_scores(filtered_by_length, subsections), file_path= "filtered_gameplay_sentiment.json")

In [None]:
create_sentiment_histograms(sentiment_data=sentiment_filtered_by_length, sentiments_to_plot= ['pos', 'neu', 'neg', 'compound', "neg_compound"])

In [None]:
print_top_bottom_sentiment_games(sentiment_filtered_by_length, "compound", n = 50)

In [None]:
game_data_with_vader_sentiment_on_gameplay = get_or_create_value(lambda: {
    game: {**data, 'sentiment': sentiment_filtered_by_length[game]["compound"]}
    for game, data in filtered_by_length.items() }, "game_data_with_vader_sentiment_on_gameplay.json")

In [None]:
print_top_bottom_sentiment_games(sentiment_filtered_by_length, "neg_compound", n = 50)

In [None]:
len(sentiment_filtered_by_length)