In [None]:
import json

data_file = '../game_data.json'


with open(data_file, encoding="utf-8") as f:
    game_data = json.load(f)

In [None]:
import matplotlib.pyplot as plt

def create_histogram(data, title, xlabel, ylabel, color='tab:blue', figuresize=(10, 6)):
    labels = list(data.keys())
    values = list(data.values())

    n_bars = len(labels)
    figure_width = max(labels) - min(labels)

    bar_width = figure_width / (1.5 * n_bars)

    plt.figure(figsize=figuresize)
    plt.bar(labels, values, color=color, edgecolor='black', width=bar_width)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()
# Function to get sentiment scores
weight_neg = 0.5
weight_compound = 1 - weight_neg

def get_sentiment_scores(data, subsections = ["gameplay"]):
    sentiment_scores = {}
    for title, content in data.items():
        text = "\n".join([content['text'][subsection] for subsection in subsections if subsection in content["text"]])
        sentiment = analyzer.polarity_scores(text)
        sentiment["neg_compound"] = -sentiment["neg"] * weight_neg + sentiment["compound"] * weight_compound
        sentiment_scores[title] = sentiment
    return sentiment_scores

# Function to create a histogram of sentiment scores
def create_sentiment_histograms(sentiment_data, sentiments_to_plot=None):
    # Default to all sentiment types if none are specified
    if sentiments_to_plot is None:
        sentiments_to_plot = ['pos', 'neu', 'neg', 'compound']

    # Determine the number of plots
    num_plots = len(sentiments_to_plot)
    cols = 2  # We prefer a 2-column layout
    rows = (num_plots + 1) // cols  # Calculate rows needed

    # Setting up the figure for multiple subplots
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
    if rows > 1:
        axes = axes.flatten()  # Flatten if we have more than one row
    else:
        axes = [axes]  # Wrap in list if only one row (i.e., 1 or 2 plots)

    fig.suptitle('Sentiment Analysis Histograms')

    # Plotting each requested sentiment
    for i, sentiment in enumerate(sentiments_to_plot):
        scores = [details[sentiment] for details in sentiment_data.values()]
        ax = axes[i]
        ax.hist(scores, bins=200, color='tab:blue', edgecolor='black')
        ax.set_title(f'{sentiment.capitalize()} Sentiment Score')
        ax.set_xlabel('Sentiment Score')
        ax.set_ylabel('Number of Games')

    # Turn off any unused subplots
    for j in range(i + 1, len(axes)):
        axes[j].axis('off')

    # Adjust layout for better spacing
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

def print_top_bottom_sentiment_games(sentiment_scores, key='compound', n = 10):
    # Sorting the games based on the compound sentiment score
    sorted_games = sorted(sentiment_scores.items(), key=lambda x: x[1][key])
    

    # Printing the 10 most negative games
    print(f"{n} Most Negative Games in terms of {key} sentiment:")
    for game, score in sorted_games[:n]:
        print(f"{game}: {score}")

    print("\n")  # Adding a newline for better readability

    # Printing the 10 most positive games
    print(f"{n} Most Positive Games in terms of {key} sentiment:")
    for game, score in sorted_games[-n:]:
        print(f"{game}: {score}")

def is_subsection_length_valid(data, subsections, min_length, max_length):
    """
    Check if the length of a subsection is within the specified range.
    
    :param data: The data dictionary of a game
    :param subsection: The subsection to check within the data
    :param min_length: The minimum length of the subsection string
    :param max_length: The maximum length of the subsection string
    :return: True if the length is within range, False otherwise
    """
    subsection_text = ""
    for subsection in subsections:
        subsection_text += data.get('text', {}).get(subsection, "") + "\n"
    word_count = len(subsection_text.split())
    return min_length <= word_count <= max_length

def count_long_subsections(game_data, subsection='gameplay', min_length=0, max_length=float('inf')):
    """
    Counts the number of entries in the game_data dictionary that have a specified subsection
    with a string length within the given range.
    
    :param game_data: Dictionary containing game information
    :param subsection: The subsection to look for within the entries (default is 'gameplay')
    :param min_length: The minimum length of the subsection string to count (default is 0)
    :param max_length: The maximum length of the subsection string to count (default is infinity)
    :return: The count of entries with the subsection string length within the specified range
    """
    return sum(is_subsection_length_valid(data, subsection, min_length, max_length) for _, data in game_data.items())

def filter_entries_by_length(game_data, subsections=['gameplay'], min_length=0, max_length=float('inf')):
    """
    Creates a dictionary with only the entries from game_data that have a specified subsection
    with a string length within the given range.
    
    :param game_data: Dictionary containing game information
    :param subsection: The subsection to look for within the entries (default is 'gameplay')
    :param min_length: The minimum length of the subsection string to filter by (default is 0)
    :param max_length: The maximum length of the subsection string to filter by (default is infinity)
    :return: A new dictionary with filtered entries
    """
    return {game: data for game, data in game_data.items() if is_subsection_length_valid(data, subsections, min_length, max_length)}




In [None]:
from collections import Counter


unique_keys = Counter()
intersection = 0
for game, content in game_data.items():
        intersection += 1 if "plot" in content['text'].keys() and "gameplay" in content['text'].keys() else 0
        unique_keys.update(content['text'].keys())

# Display the unique keys
unique_keys


In [None]:
import os
import json

def get_or_create_value(function_to_apply, file_path):
    # Check if the file exists
    if os.path.exists(file_path):
        # Load the value from the file
        with open(file_path, 'r') as file:
            value = json.load(file)
    else:
        # Call the function to create the value
        value = function_to_apply()
        # Save the value to the file
        with open(file_path, 'w') as file:
            json.dump(value, file)
    
    return value

# Example usage
# Assuming get_sentiment_scores is a function you have defined elsewhere

In [None]:
sentiment_scores = get_or_create_value(lambda: {key: get_sentiment_scores(game_data, list(key)) for key in unique_keys.keys()} , 'vader_sentiment_scores.json')


In [None]:
create_sentiment_histograms(sentiment_scores["plot"])

In [None]:
create_sentiment_histograms(sentiment_scores["gameplay"])

In [None]:
create_sentiment_histograms(sentiment_scores["reception"])

In [None]:
print_top_bottom_sentiment_games(sentiment_scores["plot"])


In [None]:
sentiment_scores["gameplay"]["Tetris"], sentiment_scores["gameplay"]["God of War (2005 video game)"], sentiment_scores["gameplay"]["Minecraft"], sentiment_scores["gameplay"]["Mortal Kombat (1992 video game)"],

In [None]:
sorted(sentiment_scores["gameplay"].items(), key=lambda x: -abs(((x[1]["neg"]) * x[1]["compound"])))

In [None]:
constrains = {
     "min_length": 100,
    "max_length": 10000
}
subsections = ['gameplay','plot','story','synopsis','plot and gameplay','plot and gameplay']

filtered_by_length = filter_entries_by_length(game_data, subsections=subsections, **constrains)

In [None]:
sentiment_filtered_by_length = get_or_create_value( lambda: get_sentiment_scores(filtered_by_length, subsections), file_path= "filtered_gameplay_sentiment.json")

In [None]:
create_sentiment_histograms(sentiment_data=sentiment_filtered_by_length, sentiments_to_plot= ['pos', 'neu', 'neg', 'compound', "neg_compound"])

In [None]:
print_top_bottom_sentiment_games(sentiment_filtered_by_length, "compound", n = 50)

In [None]:
game_data_with_vader_sentiment_on_gameplay = get_or_create_value(lambda: {
    game: {**data, 'sentiment': sentiment_filtered_by_length[game]["compound"]}
    for game, data in filtered_by_length.items() }, "game_data_with_vader_sentiment_on_gameplay.json")

In [None]:
print_top_bottom_sentiment_games(sentiment_filtered_by_length, "neg_compound", n = 50)

In [None]:
import pandas as pd

def compare_violence_scores(data, most_violent, least_violent):
    # Initialize dictionaries to hold the scores for the most and least violent games
    most_violent_scores = {'neg': [], 'neu': [], 'pos': [], 'compound': [], "neg_compound": []}
    least_violent_scores = {'neg': [], 'neu': [], 'pos': [], 'compound': [], "neg_compound": []}
    
    # Helper function to calculate average of a list
    def average(lst):
        return sum(lst) / len(lst) if lst else 0
    
    # Extract scores for each game in the most and least violent games lists
    for game in most_violent:
        if game in data:
            most_violent_scores['neg'].append(data[game]['neg'])
            most_violent_scores['neu'].append(data[game]['neu'])
            most_violent_scores['pos'].append(data[game]['pos'])
            most_violent_scores['compound'].append(data[game]['compound'])
            most_violent_scores['neg_compound'].append(data[game]['neg_compound'])
    
    for game in least_violent:
        if game in data:
            least_violent_scores['neg'].append(data[game]['neg'])
            least_violent_scores['neu'].append(data[game]['neu'])
            least_violent_scores['pos'].append(data[game]['pos'])
            least_violent_scores['compound'].append(data[game]['compound'])
            least_violent_scores['neg_compound'].append(data[game]['neg_compound'])
    
    # Calculate averages for both groups
    averages = {
        'Score': ['neg', 'neu', 'pos', 'compound', "neg_compound"],
        'Average Most Violent': [
            average(most_violent_scores['neg']),
            average(most_violent_scores['neu']),
            average(most_violent_scores['pos']),
            average(most_violent_scores['compound']),
            average(most_violent_scores['neg_compound'])
        ],
        'Average Least Violent': [
            average(least_violent_scores['neg']),
            average(least_violent_scores['neu']),
            average(least_violent_scores['pos']),
            average(least_violent_scores['compound']),
            average(least_violent_scores['neg_compound'])
        ]
    }
    
    # Create a DataFrame to display the table
    df = pd.DataFrame(averages)
    
    # Calculate and add a column for the difference between the most and least violent scores
    df['Difference (Most - Least)'] = df['Average Most Violent'] - df['Average Least Violent']
    
    return df

most_violent = ["Doom (2016 video game)", "Grand Theft Auto V", "Mortal Kombat (1992 video game)", "God of War (2005 video game)", "Manhunt", "Gears of War (video game)", "Call of Duty 4: Modern Warfare", "Dead Space (2008 video game)", "Resident Evil (1996 video game)", "Hotline Miami"]
least_violent = ["Animal Crossing: New Horizons", "Stardew Valley", "The Sims 4", "Minecraft", "Tetris", "Monument Valley (video game)", "Super Mario Odyssey", "Journey (2012 video game)", "Katamari Damacy", "Fez (video game)"]
compare_violence_scores(sentiment_filtered_by_length, most_violent, least_violent)

In [None]:
def rank_games_by_scores(data, game_lists):
    # Flatten the data to a list of tuples for sorting
    flattened_data = [(game, scores['neg_compound'], scores['compound']) for game, scores in data.items()]

    # Sort the games based on 'neg' score and 'compound' score
    neg_sorted = sorted(flattened_data, key=lambda x: x[1])  # Descending for 'neg'
    compound_sorted = sorted(flattened_data, key=lambda x: x[2])  # Ascending for 'compound' (more negative is "more violent")

    # Initialize a list to hold the rank data
    rank_data = []

    for game_list in game_lists:
        for game in game_list:
            # Find the rank of each game based on 'neg' and 'compound' scores
            neg_rank = next((i for i, v in enumerate(neg_sorted, 1) if v[0] == game), None)
            compound_rank = next((i for i, v in enumerate(compound_sorted, 1) if v[0] == game), None)

            # Append the rank data to the list
            rank_data.append({'Game': game, 'Neg Rank': neg_rank, 'Compound Rank': compound_rank})

    # Create the DataFrame from the list of rank data
    ranks_df = pd.DataFrame(rank_data)

    return ranks_df

rank_games_by_scores(sentiment_filtered_by_length, [most_violent, least_violent])


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Function to plot score distributions for the most and least violent games.
def plot_score_distributions(data, most_violent, least_violent):
    # Create a DataFrame for the scores
    scores_list = []
    for game in data:
        if game in most_violent or game in least_violent:
            score_data = data[game]
            score_data['Game'] = game
            score_data['Violence'] = 'Most Violent' if game in most_violent else 'Least Violent'
            scores_list.append(score_data)
    
    scores_df = pd.DataFrame(scores_list)

    # Plotting the negativity score distribution
    plt.figure(figsize=(10, 5))
    sns.histplot(data=scores_df, x='neg', hue='Violence', element='step', stat='density', common_norm=False)
    plt.title('Negativity Score Distribution')
    plt.show()

    # Plotting the compound score distribution
    plt.figure(figsize=(10, 5))
    sns.histplot(data=scores_df, x='neg_compound', hue='Violence', element='step', stat='density', common_norm=False)
    plt.title('Compound Score Distribution')
    plt.show()

# Call the function with hypothetical data
plot_score_distributions(sentiment_filtered_by_length, most_violent, least_violent)
