In [1]:
import pandas as pd

def categories_to_app_id(filepath: str) -> pd.DataFrame:
    """
    Read the CSV file into a DataFrame, filter based on conditions, and sort the DataFrame.

    Args:
    file_path (str): The file path of the CSV file.

    Returns:
    pd.DataFrame: The filtered and sorted DataFrame.
    """
    # Read the CSV file into a DataFrame
    df = pd.read_csv(filepath)

    # Filter the DataFrame based on conditions
    filtered_df = df[(df['Median playtime forever'] > 10) &
                     (df['Median playtime two weeks'] > 10) &
                     (df['Average playtime two weeks'] > 10) &
                     (df['Estimated owners'] != '0 - 0')]
    
    # Drop rows with missing categories
    filtered_df = filtered_df.dropna(subset=['Categories'])
    
    # Sort the filtered DataFrame by 'Average playtime two weeks' in descending order
    filtered_df = filtered_df.sort_values(by='AppID', ascending=True)

    # Remove extra columns
    filtered_df = filtered_df[['AppID', 'Name', 'Categories']]

    return filtered_df

# Main
categories_to_app_id_df = categories_to_app_id('games.csv')
categories_to_app_id_df.head()

Unnamed: 0,AppID,Name,Categories
39428,10,Counter-Strike,"Multi-player,PvP,Online PvP,Shared/Split Scree..."
49557,70,Half-Life,"Single-player,Multi-player,PvP,Online PvP,Stea..."
9907,80,Counter-Strike: Condition Zero,"Single-player,Multi-player,Valve Anti-Cheat en..."
36775,220,Half-Life 2,"Single-player,Steam Achievements,Steam Trading..."
21610,240,Counter-Strike: Source,"Multi-player,Cross-Platform Multiplayer,Steam ..."


In [2]:
import pandas as pd

def filter_played_games(data: pd.DataFrame) -> pd.DataFrame:
    """
    Filters the input DataFrame to only contain the game and the amount of hours played.

    Args:
    data (pd.DataFrame): The input DataFrame containing steam data.

    Returns:
    pd.DataFrame: A DataFrame containing filtered data with columns User, Game, and Hours.
    """
    filtered_data = data[data['Type'] == 'play'][['User', 'Game', 'Hours']]
    return filtered_data

# Read the CSV file into a DataFrame
steam_data = pd.read_csv('steam-200k.csv')
filtered_steam_data = filter_played_games(steam_data)
filtered_steam_data.head()

Unnamed: 0,User,Game,Hours
1,151603712,The Elder Scrolls V Skyrim,273.0
3,151603712,Fallout 4,87.0
5,151603712,Spore,14.9
7,151603712,Fallout New Vegas,12.1
9,151603712,Left 4 Dead 2,8.9


In [3]:
!pip install scikit-surprise

Defaulting to user installation because normal site-packages is not writeable
Looking in links: /usr/share/pip-wheels


In [27]:
# Import necessary libraries
from surprise import Dataset, Reader, SVD

filtered_steam_data = pd.read_csv('steam-200k.csv')

reader = Reader(rating_scale=(1, 10))  # Default rating scale as a fallback
datasets = {}
for user_id, user_data in filtered_steam_data.groupby('User'):
    max_rating = user_data['Hours'].max()
    datasets[user_id] = Dataset.load_from_df(user_data[['User', 'Game', 'Hours']], Reader(rating_scale=(1, max_rating)))

def get_game_recommendations(user_id):
    # Build an SVD collaborative filtering model
    data = datasets[user_id]
    model = SVD()
    
    # Train the model on the dataset
    trainset = data.build_full_trainset()
    model.fit(trainset)
            
    # Get the inner id of the user
    user_inner_id = trainset.to_inner_uid(user_id)

    # Get the list of all game ids
    all_game_ids = filtered_steam_data['Game'].unique()

    # Remove the games that the user has already rated
    games_rated_by_user = filtered_steam_data[filtered_steam_data['User'] == user_id]['Game']
    game_ids_to_predict = [game_id for game_id in all_game_ids if game_id not in games_rated_by_user]

    # Get the predicted ratings for the games
    game_ratings = [model.predict(user_inner_id, game_id).est for game_id in game_ids_to_predict]

    # Sort the games by predicted ratings and get the top 10 recommendations
    top_n = sorted(zip(game_ids_to_predict, game_ratings), key=lambda x: x[1], reverse=True)[:10]

    return top_n

# Example usage
user_id = 59945701
recommendations = get_game_recommendations(user_id)
print(recommendations)

[('Ultra Street Fighter IV', 20.075171840540897), ('FINAL FANTASY XIII', 9.412356500452885), ('The Elder Scrolls V Skyrim', 8.640108871469765), ("Sid Meier's Civilization V", 7.910995258167773), ('Fallout 4', 7.8584615384615395), ('Spore', 7.8584615384615395), ('HuniePop', 7.8584615384615395), ('Path of Exile', 7.8584615384615395), ('Poly Bridge', 7.8584615384615395), ('Left 4 Dead', 7.8584615384615395)]
