# Video Game Recommendation
The following notebook will demonstrate a visual data analysis of various video game data and a video game recommendation algorithm. Selecting a platform from the "Platform" dropdown below will show three visual analyses related to that platform.
After a platform is selected, you may choose a genre from the "Genre" dropdown, at which point the games fitting into those categories will be listed in the "Game" dropdown box. Selecting a game from this box will populate the "Recommended"
dropdown with up to 5 game recommendations. These recommendations are based on multiple similarity factors including developer, name, critic score, and user score which can be examined in the table to the right of the dropdowns.

In [54]:
%matplotlib widget

import pandas as pd
import matplotlib as mpl
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import ipywidgets as widgets
import IPython.core.display
import IPython.display


def game_change(change):
    global games_frame

    refresh_recommended_list(change["new"])


def genre_change(change):
    global selected_genre

    selected_genre = change["new"]

    refresh_games_list()


def platform_change(change):
    global selected_platform
    global selected_genre
    global genre_dropdown_list
    global genre_dropdown

    selected_platform = change["new"]

    selected_platform_frame = videogame_database[videogame_database["Platform"] == selected_platform]
    genre_dropdown_list = selected_platform_frame["Genre"].unique().tolist()
    genre_dropdown_list.sort()

    selected_genre = genre_dropdown_list[0]
    genre_dropdown.options = genre_dropdown_list
    genre_dropdown.value = selected_genre

    refresh_games_list()
    update_plots()


def refresh_games_list():
    global games_list
    global games_frame
    global games_dropdown
    global selected_platform
    global selected_genre

    games_frame = videogame_database[(videogame_database["Platform"] == selected_platform) & (videogame_database["Genre"] == selected_genre)]
    games_list = games_frame["Name"].tolist()
    games_list.sort()

    games_dropdown.options = games_list

    refresh_recommended_list(games_list[0])


def refresh_recommended_list(selected_game_name):
    global recommended_games_dropdown
    global recommended_games_frame
    global recommended_games_list
    global games_frame
    global dropdown_output

    selected_game_row = games_frame[games_frame["Name"] == selected_game_name]
    vectorizer = CountVectorizer()

    # Create developer name similarity vector.
    developer_vocabulary = vectorizer.fit(selected_game_row["Developer"])
    developer_selected_vector = developer_vocabulary.transform(selected_game_row["Developer"])
    developer_similarity_vector = developer_vocabulary.transform(games_frame["Developer"])

    # Create game name similarity vector.
    name_vocabulary = vectorizer.fit(selected_game_row["Name"])
    name_selected_vector = name_vocabulary.transform(selected_game_row["Name"])
    name_similarity_vector = name_vocabulary.transform(games_frame["Name"])

    # Create critic score similarity vector.
    critic_score_selected_vector = selected_game_row["Critic_Score"]
    critic_score_similarity_vector = games_frame["Critic_Score"]

    critic_score_selected_vector = np.array(critic_score_selected_vector)
    critic_score_similarity_vector = np.array(critic_score_similarity_vector)

    # Create user score similarity vector.
    user_score_selected_vector = selected_game_row["User_Score"]
    user_score_similarity_vector = games_frame["User_Score"]

    user_score_selected_vector = user_score_selected_vector.replace(to_replace="tbd", value="0")
    user_score_similarity_vector = user_score_similarity_vector.replace(to_replace="tbd", value="0")

    user_score_selected_vector = np.array(user_score_selected_vector)
    user_score_similarity_vector = np.array(user_score_similarity_vector)

    user_score_selected_vector = user_score_selected_vector.astype("float64")
    user_score_similarity_vector = user_score_similarity_vector.astype("float64")

    # Determine the similarity of all characteristics.
    developer_cosine = cosine_similarity(developer_selected_vector, developer_similarity_vector)
    name_cosine = cosine_similarity(name_selected_vector, name_similarity_vector)

    # Invert critic score difference and scale to cosine similarities.
    critic_score_similarity = ((100 - np.absolute(critic_score_selected_vector - critic_score_similarity_vector)) / 100)
    user_score_similarity = ((10 - np.absolute(user_score_selected_vector - user_score_similarity_vector)) / 10)

    similarity_vector = developer_cosine + name_cosine + critic_score_similarity + user_score_similarity

    recommended_games_frame = pd.DataFrame(games_frame[["Name", "Developer", "Critic_Score", "User_Score"]])
    recommended_games_frame.insert(0, "Similarity", similarity_vector[0])
    recommended_games_frame.sort_values(by="Similarity", ascending=False, inplace=True)
    recommended_games_frame = recommended_games_frame.head(6)
    recommended_games_list = recommended_games_frame["Name"].tolist()

    # Don't include the already selected game in the recommendations.
    recommended_games_list.remove(selected_game_name)

    recommended_games_dropdown.options = recommended_games_list
    recommended_games_dropdown.value = recommended_games_list[0]

    with dropdown_output:
        dropdown_output.clear_output()

        items1 = widgets.VBox([platform_dropdown, genre_dropdown, games_dropdown, recommended_games_dropdown], layout=widgets.Layout(padding="50px 50px 0px 0px"))
        items2 = widgets.Output()
        with items2:
            display(recommended_games_frame)
        display(widgets.HBox([items1, items2]))

    update_output()


def update_output():
    global dropdown_output
    global plot_output

    IPython.display.clear_output()

    display(dropdown_output)
    display(plot_output)


def update_plots():
    global videogame_database
    global selected_platform
    global platform_dropdown
    global genre_dropdown
    global games_dropdown
    global plot_output
    global recommended_games_dropdown
    global recommended_games_frame
    global figure1
    global figure2
    global figure3

    with plot_output:
        plot_output.clear_output()

        mpl.pyplot.close(1)
        mpl.pyplot.close(2)
        mpl.pyplot.close(3)
        
        print("This bar chart shows the average global sales among games on this platform by each genre available.")
        figure1 = mpl.pyplot.figure(figsize=(10,7))
        print("This pie chart shows the percentage of games in each genre for this platform.")
        figure2 = mpl.pyplot.figure(figsize=(10,10))
        print("This scatter plot shows possible correlations between critic scores and global sales.")
        figure3 = mpl.pyplot.figure(figsize=(10,5))

        selected_platform_frame = videogame_database[videogame_database["Platform"] == selected_platform]
        genre = selected_platform_frame["Genre"].unique().tolist()
        genre.sort()

        # Calculate the average global sales for each platform.
        selected_platform_frame = selected_platform_frame.astype({"Global_Sales": "float64"})
        mean_global_sales = selected_platform_frame.sort_values("Genre").groupby(["Genre"])["Global_Sales"].mean()
        sales = mean_global_sales.tolist()

        title = "Average Global Sales by Genre on Platform: {0} (In Millions)".format(selected_platform)

        mpl.pyplot.figure(1)
        mpl.pyplot.barh(genre, sales, color ='blue', align="edge")
        mpl.pyplot.title(title)
        mpl.pyplot.xlabel("Sales")
        mpl.pyplot.ylabel("Genre")

        # Calculate the percentage of games in each genre on the platform.
        title = "Percentage of Games in Genre on Platform: {0}".format(selected_platform)

        genre_percentage = selected_platform_frame.sort_values("Genre").groupby(["Genre"])["Name"].count()

        mpl.pyplot.figure(2)
        mpl.pyplot.pie(genre_percentage, None, genre)
        mpl.pyplot.title(title)
        mpl.pyplot.legend(loc="right")

        # Calculate the scatter of global sales and critic scores on the platform.
        sales = selected_platform_frame["Global_Sales"]
        scores = selected_platform_frame["Critic_Score"]

        title = "Distribution of Sales over Critic Score on Platform: {0}".format(selected_platform)

        mpl.pyplot.figure(3)
        mpl.pyplot.scatter(scores, sales)
        mpl.pyplot.title(title)
        mpl.pyplot.xlabel("Scores")
        mpl.pyplot.ylabel("Sales")

    update_output()


videogame_database = pd.read_csv("Video_Games_Sales_as_at_22_Dec_2016.csv")

platform_dropdown_list = videogame_database["Platform"].unique().tolist()

selected_platform = platform_dropdown_list[0]
selected_platform_frame = videogame_database[videogame_database["Platform"] == selected_platform]

genre_dropdown_list = selected_platform_frame["Genre"].unique().tolist()
genre_dropdown_list.sort()

selected_genre = genre_dropdown_list[0]

platform_dropdown = widgets.Dropdown(options=platform_dropdown_list, description="Platform:", value=selected_platform)
genre_dropdown = widgets.Dropdown(options=genre_dropdown_list, description="Genre:", value=selected_genre)

games_list = [None]
games_dropdown = widgets.Dropdown(options=games_list, description="Game:", value=None)

recommended_games_list = [None]
recommended_games_dropdown = widgets.Dropdown(options=recommended_games_list, description="Recommended:", value=None, style = {"description_width": "initial"})

# Do initial output setup.
plot_output = widgets.Output()
dropdown_output = widgets.Output()

refresh_games_list()

platform_dropdown.observe(platform_change, names = "value")
genre_dropdown.observe(genre_change, names = "value")
games_dropdown.observe(game_change, names = "value")

update_plots()
update_output()

Output()

Output()