In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from tabulate import tabulate

# Load the dataset
data = pd.read_csv('games_50k.csv', encoding='ISO-8859-1')

# Ensure 'AppID' is a string
data['AppID'] = data['AppID'].astype(str)

# Combine Tags, Genres, and About the game into a single feature
data['Combined'] = data['Tags'].fillna('') + ' ' + data['Genres'].fillna('') + ' ' + data['About the game'].fillna('')

# Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the 'Combined' column into vectors using the TF-IDF Vectorizer
X = vectorizer.fit_transform(data['Combined'])


In [None]:
# Check the type and unique values of 'AppID'
print(data['AppID'].dtype)
print(data['AppID'].unique()[:10])

In [None]:
# Print all app IDs to manually verify if the test_app_id exists
print(data['AppID'].tolist())

Genre-Tag Map

In [4]:
# Define the genre-tag map
genre_tag_map = {
    "Action": [
        "Third-Person Shooter", "Point & Click", "Action-Adventure", "Shooter", 
        "Hack and Slash", "Combat", "Arena Shooter", "Character Action Game", 
        "Looter Shooter", "Bullet Hell", "Top-Down Shooter", "Violent", "Open World",
        "Souls-like", "Superhero"
    ],
    "Sports": [
        "Tennis", "Football", "Cricket", "Baseball", "PvP", "Realistic", "Multiplayer",
        "Local Co-Op", "Racing", "Online Co-Op", "3D", "Short", "Singleplayer", "Simulation"
    ],
    "RPG": [
        "Turn-Based Tactics", "Tactical RPG", "Action RPG", "Strategy RPG", 
        "Party-Based RPG", "JRPG", "Character Customization", "Multiple Endings", 
        "Magic", "Fantasy", "Turn-Based Combat", "Choices Matter"
    ],
    "Indie": [
        "Pixel Graphics", "Roguelike", "Survival", "Crafting", "Narrative", 
        "Metroidvania", "Minimalist", "Hand-drawn", "Visual Novel", "Idler", 
        "Art", "RPG Maker", "Simulation"
    ],
    "Casual": [
        "Casual", "Puzzle", "Family Friendly", "Trivia", "Relaxing", "Clicker", 
        "Idle", "Mouse Only", "Party Game", "Free to Play", "Word Game", 
        "Controller", "Tap", "Idle", "Simple"
    ],
    "Simulation": [
        "2D Platformer", "3D Platformer", "Base-Building", "Life Simulation", 
        "Time Management", "Sandbox", "City Builder", "Crafting", "Sports", 
        "Farming", "Economic", "Management", "Programming", "VR"
    ],
    "Strategy": [
        "RTS", "Tower Defense", "Turn-Based Strategy", "Grid-Based Movement", 
        "Resource Management", "Wargame", "Base-Building", "Simulation", 
        "Tactical", "Political", "Grand Strategy", "Economic"
    ],
    "Horror": [
        "Psychological Horror", "Survival Horror", "Atmospheric", "Demons", 
        "Zombies", "Dark Fantasy", "Gothic", "Supernatural", "Spooky", 
        "Thriller", "Horror", "Haunted", "Psychological"
    ],
    "Adventure": [
        "Action-Adventure", "Exploration", "Story Rich", "Mystery", "Open World", 
        "Interactive Fiction", "Point & Click", "Text-Based", "Hidden Object", 
        "Adventure", "Classic", "Survival", "Narrative"
    ],
    "Action-Adventure": [
        "Action-Adventure", "Quick-Time Events", "Exploration", "Puzzle", 
        "Combat", "Platformer", "Open World", "Adventure", "Mystery", 
        "Survival", "Story Rich", "Hidden Object", "Action"
    ],
    "Action-RPG": [
        "Action RPG", "Turn-Based Combat", "Character Customization", 
        "Roguelike", "Fantasy", "Multiple Endings", "Dark Fantasy", 
        "Story Rich", "Exploration", "Crafting", "Choices Matter", 
        "Skill Tree", "Action"
    ]
}


In [5]:
def recommend_games(app_id, num_recommendations = 10):
    app_id = str(app_id).strip()  # Ensure proper format
    
    # Check if app_id exists in the dataset
    if app_id not in data['AppID'].astype(str).values:
        return {"error": "Game ID not found"}

    
    # Find the index of the game with the given app_id
    idx = data.index[data['AppID'].astype(str) == app_id].tolist()[0]
    
    # Retrieve data of the input game
    input_game_data = data.loc[idx, ['Name', 'Genres', 'Tags', 'About the game']]
    
    # Handle missing or non-string values
    input_game_data = input_game_data.fillna('')
    input_game_data['Tags'] = str(input_game_data['Tags'])
    input_game_data['Genres'] = str(input_game_data['Genres'])
    input_game_data['Name'] = str(input_game_data['Name']).lower()
    
    # Process Tags, Genres, and Title
    primary_genre = input_game_data['Genres'].split(',')[0].strip().lower()  # Only first genre
    primary_tag = input_game_data['Tags'].split(',')[0].strip().lower()  # Only first tag
    input_tags = set(tag.strip().lower() for tag in input_game_data['Tags'].split(','))
    input_title_words = set(word.strip().lower() for word in input_game_data['Name'].split())
    
    # Get the top 3 tags
    top_tags = [tag.strip().lower() for tag in input_game_data['Tags'].split(',')[:3]]
    
    # Compute cosine similarity between the input game and all other games
    cosine_sim = cosine_similarity(X[idx:idx+1], X).flatten()
    
    # Get indices of the most similar games
    similar_indices = cosine_sim.argsort()[-num_recommendations-1:-1]
    
    # Retrieve recommended games
    recommended_games = data.iloc[similar_indices][['Name', 'Genres', 'Tags']]
    
    # Handle missing or non-string values in recommended games
    recommended_games = recommended_games.fillna('')
    recommended_games['Tags'] = recommended_games['Tags'].astype(str)
    recommended_games['Genres'] = recommended_games['Genres'].astype(str)
    recommended_games['Name'] = recommended_games['Name'].astype(str).str.lower()
    
    # Adjust recommendations based on tag priority, title similarity, and first genre and tag match
    recommendations = []
    for i in similar_indices:
        game_data = data.iloc[i]
        genres = str(game_data['Genres']).split(',')
        tags = set(tag.strip().lower() for tag in str(game_data['Tags']).split(','))
        title_words = set(word.strip().lower() for word in game_data['Name'].split())
        
        # Check if the primary genre (first genre) and primary tag (first tag) are present
        genre_match = primary_genre == genres[0].strip().lower()
        tag_match = primary_tag == str(game_data['Tags']).split(',')[0].strip().lower()
        
        # Calculate tag overlap
        tag_overlap = len(input_tags.intersection(tags))
        
        # Calculate tag priority score based on the presence of top tags
        top_tag_priority = sum(1 for tag in top_tags if tag in tags)
        
        # Calculate title similarity based on word overlap
        title_overlap = len(input_title_words.intersection(title_words))

        # Check if the tags match the expected genre-specific tags from genre_tag_map
        genre_specific_tags = genre_tag_map.get(primary_genre.capitalize(), [])
        genre_specific_tag_match = len(tags.intersection(set(genre_specific_tags)))
        
        # Combine scores
        score = (genre_specific_tag_match * 0.4) + (cosine_sim[i] * 0.9) + (tag_overlap * 0.3) + (top_tag_priority * 0.7) + (title_overlap * 0.2)

        
        if genre_match:
            score += 5  # Extra weight for matching first genre
        if tag_match:
            score += 5  # Extra weight for matching first tag
        
        # Add to recommendations list
        recommendations.append((score, game_data))
    
    # Sort recommendations by the combined score
    recommendations.sort(reverse=True, key=lambda x: x[0])
    top_recommendations = [rec[1] for rec in recommendations[:num_recommendations]]
    
    # Reformat recommendations to Name, Genres, Tags
    formatted_recommendations = pd.DataFrame(top_recommendations)[['Name', 'Genres', 'Tags']]

    
    # Combine input game data with recommended games
    result = {
        'input_game': input_game_data[['Name', 'Genres', 'Tags']].to_dict(),
        'recommendations': formatted_recommendations
    }
    
    return result


Game Recommender Test

In [None]:
from tabulate import tabulate

# Call the function with an ID (valid or invalid)
result = recommend_games('240')  # Replace with a test ID

# Check if the result contains an error
if 'error' in result:
    print(result['error'])  # Print the error message
else:
    
    # Convert the dictionary to a list of lists
    input_game_data = result['input_game']
    input_game_table = [[key, input_game_data[key]] for key in input_game_data]  # Convert dictionary to list of lists
    
    print(tabulate(input_game_table, headers=["Field", "Value"], tablefmt="grid"))
    
    # Filter out recommended games with 'NaN' in Tags column
    filtered_recommendations = result['recommendations'].dropna(subset=['Tags'])
    # Remove duplicates based on 'Name'
    filtered_recommendations.drop_duplicates(subset=['Name'])

    # Display recommended games
    print("\nRecommended Games:")
    print(tabulate(filtered_recommendations, headers="keys", tablefmt="grid"))


Printing Main Genre and Main Tags

In [None]:
def print_main_genre_and_tags(app_id, num_tags=5):
    app_id = str(app_id).strip()  # Ensure proper format - STRING

    # Check if app_id exists in the dataset
    if app_id not in data['AppID'].astype(str).values:
        print("Game ID not found")
        return

    # Find the index of the game with the given app_id
    idx = data.index[data['AppID'].astype(str) == app_id].tolist()[0]
    
    # Retrieve the game's Genres and Tags
    game_data = data.loc[idx, ['Name', 'Genres', 'Tags']]

    # Handle missing values or empty fields
    genres = game_data['Genres'] if pd.notna(game_data['Genres']) else ''
    tags = game_data['Tags'] if pd.notna(game_data['Tags']) else ''

    # Extract the first genre and first few tags (up to num_tags)
    main_genre = genres.split(',')[0].strip().lower() if genres else 'No genre available'
    main_tags = [tag.strip().lower() for tag in tags.split(',')[:num_tags]] if tags else ['No tags available']

    # Print the main genre and first few tags
    print(f"Game: {game_data['Name']}")
    print(f"Main Genre: {main_genre}")
    print(f"Top {len(main_tags)} Tags: {', '.join(main_tags)}")

# TEST
print_main_genre_and_tags('550', num_tags=5)


Genre-Tag Match Rate

In [None]:
def genre_tag_matching_rate(app_id, max_recommendations=10):
    genre_match_rates = []
    tag_match_rates = []

    for n in range(1, max_recommendations + 1):
        result = recommend_games(app_id, num_recommendations=n)

        input_game = result['input_game']
        primary_genre = input_game['Genres'].split(',')[0].strip().lower()
        primary_tag = input_game['Tags'].split(',')[0].strip().lower()

        genre_match_count = 0
        tag_match_count = 0

        recommendations_df = result['recommendations']

        if not isinstance(recommendations_df, pd.DataFrame):
            print("Recommendations data is not a DataFrame.")
            return None, None

        for index, row in recommendations_df.iterrows():
            rec_genres = [genre.strip().lower() for genre in row['Genres'].split(',')] if pd.notna(row['Genres']) else []
            rec_tag = row['Tags'].split(',')[0].strip().lower() if pd.notna(row['Tags']) else ''

            if primary_genre in rec_genres:
                genre_match_count += 1
            if rec_tag in primary_tag:
                tag_match_count += 1

        genre_match_rate = (genre_match_count / n) * 100
        tag_match_rate = (tag_match_count / n) * 100

        genre_match_rates.append(genre_match_rate)
        tag_match_rates.append(tag_match_rate)

    # Calculate averages
    avg_genre_match_rate = sum(genre_match_rates) / len(genre_match_rates)
    avg_tag_match_rate = sum(tag_match_rates) / len(tag_match_rates)

    #PRINT INPUT GAME
    print(f"Input Game: {input_game['Name'].title()}")
    
    # Visualization

    plt.figure(figsize=(10, 6))
    plt.plot(range(1, max_recommendations + 1), genre_match_rates, marker='o', label='Genre Matching Rate', color='skyblue')
    plt.plot(range(1, max_recommendations + 1), tag_match_rates, marker='o', label='Tag Matching Rate', color='salmon')
    plt.xlabel('Number of Recommendations')
    plt.ylabel('Matching Rate (%)')
    plt.title('Genre and Tag Matching Rates by Number of Recommendations')
    plt.ylim(0, 150)
    plt.xticks(range(1, max_recommendations + 1))
    plt.grid(True)
    plt.legend()
    plt.show()

    #PRINT OUTPUT GAMES WITH STATISTICS- AVERAGE GENRE MATCH RATE & AVERAGE TAG MATCH RATE
   
    print("Recommended Games:")
    for game in recommendations_df['Name']:
        print(f"- {game.title()}")

    # Print or return the averages
    print(f"Average Genre Matching Rate: {avg_genre_match_rate:.2f}%")
    print(f"Average Tag Matching Rate: {avg_tag_match_rate:.2f}%")

    return genre_match_rates, tag_match_rates, avg_genre_match_rate, avg_tag_match_rate

# Example usage
genre_match, tag_match, avg_genre, avg_tag = genre_tag_matching_rate('550')


In [None]:
def evaluate_diversity(app_id, num_recommendations=10):
    result = recommend_games(app_id, num_recommendations)
    recommended_games = result['recommendations']
    
    # Calculate the diversity based on genres
    all_genres = []
    for genres in recommended_games['Genres']:
        all_genres.extend([genre.strip() for genre in genres.split(',')])
    
    unique_genres = set(all_genres)
    diversity_score = len(unique_genres) / len(all_genres)
    
    return diversity_score

# Example usage
app_id = '550'  # Replace with your app ID
diversity = evaluate_diversity(app_id)
print(f"Genre Diversity: {diversity:.2f}")
