In [11]:
#!pip install scikit-surprise
import pandas as pd

try:
  from surprise import *
except ImportError:
    !pip install scikit-surprise
    from surprise import *
from surprise import accuracy
from surprise.model_selection import train_test_split,cross_validate,GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from pandas import DataFrame


def gen_rating(hours):
  if(hours <= 100):
    return ((hours - 0) / (100 - 0) ) *4+1
  return 5;

df = pd.read_csv("steam-200k.csv", header = None, names = ['UserID', 'Game', 'Action', 'Hours', 'Other'])
print(df.head())
df['Hours_Played'] = df['Hours'].astype('float32')
df.loc[(df['Action'] == 'purchase') & (df['Hours'] == 1.0), 'Hours_Played'] = 0
df.UserID = df.UserID.astype('int')
df = df.sort_values(['UserID', 'Game', 'Hours_Played'])

clean_df = df.drop_duplicates(['UserID', 'Game'], keep = 'last').drop(['Action', 'Hours', 'Other'], axis = 1)

df_game_id = pd.DataFrame()
df_game_id['Game'] = df['Game']
df_game_id = df_game_id.drop_duplicates(['Game'])
df_game_id['GameID'] = df_game_id.index
df_game_id = df_game_id.sort_values(['GameID','Game'])
df_game_id.reset_index(drop=True, inplace=True)


merged_df = pd.merge(clean_df, df_game_id[['GameID', 'Game']], on='Game')
merged_df['Hours_Played'] = merged_df['Hours_Played'].apply(gen_rating)
df=merged_df
merged_df = merged_df.drop(columns='Game')


print(clean_df.head())
print(merged_df.head())
print(df_game_id.head())
print('#####')
n_users = len(merged_df.UserID.unique())
n_games = len(merged_df.GameID.unique())
sparsity = merged_df.shape[0] / float(n_users * n_games);
print('Number of users: {}'.format(n_users))
print('Number of games: {}'.format(n_games))
print('Number of interactions: {}'.format(len(merged_df)))
print('{:.2%} of the user-item matrix is filled'.format(sparsity))

reader = Reader(rating_scale=(1, 5))

data = Dataset.load_from_df(merged_df[['UserID', 'GameID', 'Hours_Played']], reader)


      UserID                        Game    Action  Hours  Other
0  151603712  The Elder Scrolls V Skyrim  purchase    1.0      0
1  151603712  The Elder Scrolls V Skyrim      play  273.0      0
2  151603712                   Fallout 4  purchase    1.0      0
3  151603712                   Fallout 4      play   87.0      0
4  151603712                       Spore  purchase    1.0      0
       UserID                   Game  Hours_Played
65430    5250            Alien Swarm           4.9
65424    5250        Cities Skylines         144.0
65435    5250         Counter-Strike           0.0
65436    5250  Counter-Strike Source           0.0
65437    5250          Day of Defeat           0.0
    UserID  Hours_Played  GameID
0     5250         1.196   65429
1    76767         1.032   65429
2   298950         1.064   65429
3   975449         1.392   65429
4  1364546         1.036   65429
                                Game  GameID
0  GUILTY GEAR XX ACCENT CORE PLUS R     102
1             Gu

In [12]:
trainset, testset = train_test_split(data, test_size=0.2)

sim_item = {
    "name": "cosine",
    "user_based": False,
}

sim_user = {
    "name": "cosine",
    "user_based": True,
}

benchmark = []
algos = [(KNNBasic(sim_options = sim_user),'KNNBasic User-based'), (KNNBasic(sim_options = sim_item), 'KNNBasic Item-based'),(KNNWithMeans(sim_options = sim_user),'KNNWithMeans User-based'), (KNNWithMeans(sim_options = sim_item), 'KNNWithMeans Item-based'), (SVD(), 'SVD'), (NormalPredictor(), 'NormalPredictor'), (SlopeOne(), 'SlopeOne'),(SVDpp(), 'SVDpp')]
for algorithm, st in algos:
    results = cross_validate(algorithm, data, measures=['RMSE','MAE'], cv=3, verbose=False)
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = pd.concat([tmp,pd.Series(st, index=['Algorithm'])])
    benchmark.append(tmp)
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


Unnamed: 0_level_0,test_rmse,test_mae,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SVDpp,0.800078,0.439608,18.334788,7.151919
SVD,0.814787,0.450213,1.861465,0.29618
KNNBasic User-based,0.830335,0.433964,8.344363,9.999701
SlopeOne,0.837254,0.452465,2.119854,5.258794
KNNWithMeans Item-based,0.841906,0.45775,1.9705,6.106755
KNNWithMeans User-based,0.867308,0.471028,8.389267,10.384435
KNNBasic Item-based,0.988275,0.52829,1.925922,5.797064
NormalPredictor,1.20059,0.792866,0.206611,0.412048


In [13]:
param_grid_k = {
    'k': [10, 20],
    'min_k': [1, 3],
    'sim_options': {
        'name': ['cosine'],
        'user_based': [True, False]
    }
}
param_grid_s = {
    'n_factors': [50, 100],
    'n_epochs': [20],
    'lr_all': [0.002, 0.01],
    'reg_all': [0.02, 0.1]
}

param_grid_spp = {
    'n_factors': [50, 100],
    'n_epochs': [20],
    'lr_all': [0.002, 0.01],
    'reg_all': [0.02, 0.1]
}

gsKNN = GridSearchCV(KNNWithMeans, param_grid_k, measures=['rmse'], cv=3)
gsSVD = GridSearchCV(SVD, param_grid_s, measures=['rmse'], cv=3)
gsSVDpp = GridSearchCV(SVDpp, param_grid_spp, measures=['rmse'], cv=3)

gsKNN.fit(data)
gsSVD.fit(data)
gsSVDpp.fit(data)



best_params_k = gsKNN.best_params['rmse']
best_params_s = gsSVD.best_params['rmse']
best_params_spp = gsSVDpp.best_params['rmse']

algo_s = SVD(n_factors=best_params_s['n_factors'], n_epochs=best_params_s['n_epochs'],
             lr_all=best_params_s['lr_all'], reg_all=best_params_s['reg_all'])


algo_k = KNNWithMeans(k=best_params_k['k'], min_k=best_params_k['min_k'], sim_options=best_params_k['sim_options'])
algo_spp = SVDpp(n_factors=best_params_spp['n_factors'], n_epochs=best_params_spp['n_epochs'],
             lr_all=best_params_spp['lr_all'], reg_all=best_params_spp['reg_all'])

print(best_params_k)
print(best_params_s)
print(best_params_spp)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing th

In [14]:
algo_s.fit(trainset)
algo_k.fit(trainset)
algo_spp.fit(trainset)

predictions_s = algo_s.test(testset)
predictions_k = algo_k.test(testset)
predictions_spp = algo_spp.test(testset)

print(predictions_s[:10])
print(predictions_k[:10])
print(predictions_spp[:10])

accuracy.rmse(predictions_s)
accuracy.rmse(predictions_k)
accuracy.rmse(predictions_spp)


Computing the cosine similarity matrix...
Done computing similarity matrix.
[Prediction(uid=52321307, iid=73822, r_ui=5.0, est=3.8265628972345453, details={'was_impossible': False}), Prediction(uid=59825286, iid=59846, r_ui=1.1439999961853027, est=1.0627106522802954, details={'was_impossible': False}), Prediction(uid=135147743, iid=142801, r_ui=1.1160000038146973, est=2.043095391858776, details={'was_impossible': False}), Prediction(uid=64350600, iid=29355, r_ui=1.0, est=1.0590221114705476, details={'was_impossible': False}), Prediction(uid=202011768, iid=65433, r_ui=1.0680000019073486, est=1.7668578960008636, details={'was_impossible': False}), Prediction(uid=172381385, iid=55942, r_ui=1.88, est=3.3537897963250325, details={'was_impossible': False}), Prediction(uid=190420357, iid=179599, r_ui=1.0, est=1, details={'was_impossible': False}), Prediction(uid=76892907, iid=186447, r_ui=1.0, est=1.101999200953902, details={'was_impossible': False}), Prediction(uid=107377573, iid=39420, r_ui

0.7936298868981558

In [15]:
import requests

def search_game_by_title(game_title):
    search_url = f"https://store.steampowered.com/api/storesearch/?term={game_title}&cc=us"
    response = requests.get(search_url)

    if response.status_code != 200:
        print(f"Errore: {response.status_code}")
        return None

    search_data = response.json()

    if search_data['total'] == 0:
        print("Nessun gioco trovato")
        return None

    app_id = search_data['items'][0]['id']
    print(f"Gioco: {search_data['items'][0]['name']} (App ID: {app_id})")
    return app_id

def get_game_tags(app_id):
    url = f"http://store.steampowered.com/api/appdetails?appids={app_id}"
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Error: {response.status_code}")
        return

    game_data = response.json()

    if not game_data[str(app_id)]['success']:
        print("App id non valido")
        return

    game_info = game_data[str(app_id)]['data']

    genres = game_info.get('genres', [])

    print("\nGenres:")
    lst=[]
    for genre in genres:
        lst.append(genre['description'])
        print(genre['description'])
    return lst

game_title = "Portal 2"
app_id = search_game_by_title(game_title)

if app_id:
    print(get_game_tags(app_id))

Gioco: Portal 2 (App ID: 620)

Genres:
Action
Adventure
['Action', 'Adventure']


In [16]:


idf_df = df_game_id[['GameID','Game']]
print(idf_df)
print(idf_df.shape)
vectorizer = TfidfVectorizer()

print(idf_df.head())

tag_matrix = vectorizer.fit_transform(idf_df['Game'])

print(DataFrame(tag_matrix.toarray()))
cosine_sim_matrix1 = cosine_similarity(tag_matrix, tag_matrix)

print(cosine_sim_matrix1)


def recommend_games(game_title, games_df, cosine_sim_matrix):
    if game_title not in games_df['Game'].values:
        print(f"Game '{game_title}' not found in the dataset.")
        return None

    game_idx = games_df[games_df['Game'] == game_title].index[0]

    similarity_scores = pd.DataFrame(cosine_sim_matrix[game_idx], columns=["score"])
    similarity_scores = similarity_scores.sort_values("score", ascending=False)
    print(similarity_scores)

    sim_scores = list(enumerate(cosine_sim_matrix[game_idx]))


    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = sim_scores[1:10]

    game_indices = [i[0] for i in sim_scores]

    print(game_title)
    return games_df['Game'].iloc[game_indices]
df = df_game_id
df['tags'] = df['Game'].apply(lambda x: get_game_tags(search_game_by_title(x)))


[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
Error: 429
Nessun gioco trovato
Error: 429
Nessun gioco trovato
Error: 429
Nessun gioco trovato
Error: 429
Gioco: Avernum 2: Crystal Souls (App ID: 337850)

Genres:
Adventure
Indie
RPG
Strategy
Gioco: Bierzerkers (App ID: 348460)
Error: 429
Gioco: Death to Spies (App ID: 9800)
Error: 429
Gioco: Death Track®: Resurrection (App ID: 7840)
Error: 429
Gioco: Double Dragon Trilogy (App ID: 314150)
Error: 429
Gioco: EvilQuest (App ID: 263820)
Error: 429
Gioco: Fantasy Wars (App ID: 63900)
Error: 429
Gioco: Haegemonia: Legions of Iron (App ID: 294770)
Error: 429
Gioco: Hunters Of The Dead (App ID: 318570)
Error: 429
Gioco: Hyperdimension Neptunia Re;Birth3 V Generation (App ID: 353270)
Error: 429
Gioco: Off-Road Drive (App ID: 200230)
Error: 429
Gioco: OH! RPG! (App ID: 421810)
Error: 429
Gioco: One Piece Pirate Warriors 3 (App ID: 331600)
Error: 429
Nessun gioco trovato
Error: 429
Nessun gioco trovato
Error: 429
Gioco: Reign: Con

In [17]:
df['tags'] = df['tags'].apply(lambda x: ' '.join(x) if isinstance(x, list) else x)
print(df)
df['tags'] = df.apply(lambda row: str(row['tags']) +" "+ row['Game'] if row['tags'] is not None else row['Game'], axis=1)
print(df)

vectorizer = TfidfVectorizer()

print(idf_df.head())

tag_matrix = vectorizer.fit_transform(df['tags'])

print(DataFrame(tag_matrix.toarray()))

cosine_sim_matrix2 = cosine_similarity(tag_matrix, tag_matrix)

print(cosine_sim_matrix2)

print(recommend_games('Sam & Max 101 Culture Shock', df_game_id, cosine_sim_matrix1))

print(recommend_games('Sam & Max 101 Culture Shock', df, cosine_sim_matrix2))


                                   Game  GameID                           tags
0     GUILTY GEAR XX ACCENT CORE PLUS R     102                         Action
1                Guilty Gear X2 #Reload     112                           None
2         Rebuild 3 Gangs of Deadsville     219  Indie RPG Simulation Strategy
3                 Epic Battle Fantasy 4     275                  Adventure RPG
4                            NOT A HERO     285         Action Adventure Indie
...                                 ...     ...                            ...
5150                        Tompi Jones  199406                           None
5151                       Space Colony  199583            Simulation Strategy
5152                       Life is Hard  199661                           None
5153                  Executive Assault  199665                           None
5154                         Dig or Die  199705                           None

[5155 rows x 3 columns]
                           