# Games Recommender

## Part 2- Building the recommender

In [1]:
import pandas as pd
import numpy as np
import re
import random
import pickle
pd.set_option('display.max_columns',None)
from IPython.display import Image, display, clear_output, HTML
import traceback

### Loading the datasets and models

In [2]:
game_clusters=pd.read_csv('datasets/games_wt_clusters.csv',index_col=0)

In [3]:
game_clusters.head()

Unnamed: 0,title,release_date,developers,summary,platforms,genres,rating,plays,playing,backlogs,wishlist,lists,reviews,clusters
0,Elden Ring,2022-02-25,"['FromSoftware', 'Bandai Namco Entertainment']","Elden Ring is a fantasy, action and open world...","['windows pc', 'playstation 4', 'xbox one', 'p...","['adventure', 'rpg']",4.5,21000,4100,5600,5500,4600,3000,28
1,The Legend of Zelda: Breath of the Wild,2017-03-03,"['Nintendo', 'Nintendo EPD Production Group No...",The Legend of Zelda: Breath of the Wild is the...,"['wii u', 'nintendo switch']","['adventure', 'puzzle']",4.4,35000,3100,5600,3000,5100,3000,43
2,Hades,2018-12-07,['Supergiant Games'],A rogue-lite hack and slash dungeon crawler in...,"['windows pc', 'mac', 'playstation 4', 'xbox o...","['adventure', 'brawler', 'indie', 'rpg']",4.3,25000,3500,7300,4000,3200,2100,8
3,Hollow Knight,2017-02-24,['Team Cherry'],A 2D metroidvania with an emphasis on close co...,"['windows pc', 'mac', 'linux', 'nintendo switch']","['adventure', 'indie', 'platform']",4.4,25000,2700,9600,2600,3400,2100,11
4,Undertale,2015-09-15,"['tobyfox', '8-4']","A small child falls into the Underground, wher...","['windows pc', 'mac', 'linux', 'playstation 4'...","['adventure', 'indie', 'rpg', 'turn based stra...",4.2,32000,728,5700,2100,3900,2500,18


In [4]:
pl_cols=list(pickle.load(open('datasets/pl_cols.pkl', 'rb')))
ge_cols=list(pickle.load(open('datasets/ge_cols.pkl', 'rb')))

In [5]:
scaler=pickle.load(open('models/scaler.sav','rb'))
model= pickle.load(open('models/cluster_model.sav','rb'))

### Recommendation using Webscraping

- Website used for scraping game data: \
https://www.backloggd.com/games/lib/popular/

In [6]:
from bs4 import BeautifulSoup
import requests

In [7]:
pd.set_option('display.max_colwidth',None)

In [8]:
def clean_name(name):
    
    '''
    Lowers the game title
    Converts any special character to -
    é to e- special case for pokémon games
    '''
    
    name = name.lower()
    name = re.sub("'", '', name)
    name = re.sub("é",'e',name) 
    name = re.sub(r'[^a-zA-Z0-9]+', '-', name)
    return name

In [9]:
game_clusters['title']=game_clusters['title'].apply(clean_name)

- The name should match the site's search url to get info for the game \
https://www.backloggd.com/games/super-mario-world

- Super Mario World -> super-mario-world

In [10]:
def start():
    
    '''
    Takes game name as user-input 
    Calls function get_info()
    if input is 'Quit' then stop!
    '''   
    
    x=input("Enter the game you have played ('Quit' to exit): \n")
    if x.lower()!='quit':
        get_info(x)
    else:
        clear_output(wait=True)
        print('Hope you enjoy the recommendation!')
        print('Have fun playing!')

In [11]:
def get_info(game):
    
    '''
    Searches for the game given by user
    Shows top searches and tells user to select their game from it
    If their game is not available in the top search, -1 to search game again
    Once selected a game from search, calls function get_game_data()
    Makes cluster prediction using the retrieved game data
    Calls function recommend()
    '''  
    
    url=f'https://www.backloggd.com/search/games/{game}'
    response=requests.get(url)
    soup=BeautifulSoup(response.content,"html.parser")
    
    # dataframe consisting of search results
    search=pd.DataFrame(columns=['name'])
    
    for n in soup.select('div > a > h3'):
        nm = n.get_text().strip()
        search=pd.concat([search,pd.DataFrame({'name':[nm]})],ignore_index=True)

    display(search)
    
    # user-input to select a game from search df
    print('If you dont like what you see, type: -1', '\n')
    b= input('Enter the number you see your title on: ')
    
    if b=='-1':
        clear_output()
        print('Search again- ')
        start()
    else:
        b=int(b)
        try:
            # mirror's edge -> mirrors-edge
            print('\nFirst try')
            name=clean_name(search['name'][b])
            final_df,rating = get_game_data(name)
            cluster = model.predict(final_df)[0]
            clear_output(wait=True)
            print(search['name'][b])
            print('Rating: ', rating,'\n')
            recommend(name,rating,cluster)          
        except:
            try:
                # '&' -> 'and'
                print('Second try')
                name=search['name'][b]
                name=re.sub('&','and',name)
                name=clean_name(name)
                final_df,rating = get_game_data(name)
                cluster = model.predict(final_df)[0]
                clear_output(wait=True)
                print(search['name'][b])
                print('Rating: ', rating,'\n')
                recommend(name,rating,cluster)
            except:
                try:
                    # mirror's edge -> mirror-s-edge
                    print('Third try')
                    name=search['name'][b]
                    name = re.sub(r'[^a-zA-Z0-9é]+', '-', name).lower()
                    name = re.sub("é",'e',name)
                    final_df,rating = get_game_data(name)
                    cluster = model.predict(final_df)[0]
                    clear_output()
                    print(search['name'][b])
                    print('Rating: ', rating,'\n')
                    recommend(name,rating,cluster)
                except:
                    try:
                        print('Last try')
                        name=search['name'][b]
                        name=re.sub('\+','plus',name)
                        name=clean_name(name)
                        print(name)
                        final_df,rating = get_game_data(name)
                        cluster = model.predict(final_df)[0]
                        clear_output()
                        print(search['name'][b])
                        print('Rating: ', rating,'\n')
                        recommend(name,rating,cluster)
                    except:
                        clear_output(wait=True)
                        #traceback.print_exc()
                        print('Unavailable!', '\n')
                        print('Try another...')
                        get_info(game)

In [12]:
def get_game_data(name):
    
    '''
    Finds all the parameters of the game required for predicting cluster
    Scales and transform as needed for the model
    Returns the dataframe
    '''
    
    url2=f"https://www.backloggd.com/games/{name}/"
    response=requests.get(url2)
    soup=BeautifulSoup(response.content,"html.parser")
    
    # getting categorical data: platforms and genres
    platforms=pd.DataFrame(0,index=[0],columns=pl_cols)
    genres= pd.DataFrame(0,index=[0],columns=ge_cols)
    
    pf=[]   
    for  p in soup.select('div > a.game-page-platform '):
        pf.append(p.get_text().strip().lower().replace(' ','_'))
    
    for i in pf:
        if i in platforms.columns:
            platforms[i]=1
            
    ge=[]
    for g in soup.select('div > p.genre-tag > a'):
        ge.append(g.get_text().strip().lower().replace(' ','_'))
    
    for g in ge:
        if g in genres.columns:
            genres[g]=1
    
    #getting numerical data
    num_data=numeric_data(soup)
    
    #scaling the numerical data
    num_trans=pd.DataFrame(scaler.transform(num_data),columns= num_data.columns)
    
    final_data=pd.concat([num_trans,platforms,genres],axis=1)
    
    return final_data, num_data['rating'][0]

In [13]:
def recommend(name,rating,cluster):
    '''
    Chooses a random game based on the predicted cluster for the user game
    Tries to recommend game with rating higher or equal to user game
    Else recommends any game from the cluster
    Retrieves game data and cover art for the recommended game
    Asks for a new recommendation for the same user game
    If no start again or quit
    '''
       
    while True:

        recommend=game_clusters[(game_clusters['clusters']==cluster) & (game_clusters['title']!=name)].reset_index(drop=True)
        
        try:
            game=random.choice(recommend['title'][recommend['rating']>=rating].values)
        except Exception as e:
            print(e)
            game=random.choice(recommend['title'].values)

        url3=f"https://www.backloggd.com/games/{game}/"
        response=requests.get(url3)
        soup=BeautifulSoup(response.content,"html.parser")

        #getting the title, release date and developers for the recommended game
        title=pd.DataFrame(0,index=[0],columns=['Title','Release Date'])

        title['Title']=soup.select(' #title > div > div > div > h1')[0].get_text().strip()
        title['Release Date']= soup.select('span > a')[0].get_text().strip()

        de=[]
        for d in soup.select('#title > div > a:nth-child(n)'):
            de.append(d.get_text().strip())
        title['Developers & Publishers']= ', '.join(de)

        #available platforms for the recommended
        pf=[]   
        for  p in soup.select('div > a.game-page-platform '):
            pf.append(p.get_text().strip())
        title['Available Platforms']=', '.join(pf)

        #genres of the recommended
        ge=[]
        for g in soup.select('div > p.genre-tag > a'):
            ge.append(g.get_text().strip())
        title['Genres']=', '.join(ge)


        print("Here's my recommendation-")

        #getting the ratings and player stats for the recommended
        players=numeric_data(soup)
        players.columns=[i.capitalize() for i in players.columns ]

        for i in soup.select('#interaction-sidebar > div:nth-child(n) > div > div > div > img'):
            img_url=i['src']

        #displaying details of the recommended game
        display(Image(img_url))
        display(title.T.rename(columns={0:'Game Info'}))
        display(players.T.rename(columns={0:'Rating and Stats'}))

        html_link= f'<a href={url3} target="_blank">click here</a>'
        display(HTML("To know more about this game and read some reviews, " 
                     +html_link))

        print('\n')
        c=input('Would you like another recommendation your game (Y) or new recommendation (N): ')
        if c.lower()!='y':
            clear_output(wait=True)
            start()
            break
        else:
            clear_output(wait=True)

In [14]:
def numeric_data(soup):
    
    num_data=pd.DataFrame(0,index=[0],columns=['rating','plays','playing','backlogs','wishlist','lists','reviews'])
    
    num_data['rating']= float(soup.select('#score > h1')[0].get_text().strip())
    
    for index, column in enumerate(num_data.columns[1:5]):
        num_data[column]=soup.select('div.col-auto.ml-auto.pl-0 > p')[index].get_text().strip()
        num_data[column]=num_data[column].replace({"K":"*1e3"}, regex=True).map(pd.eval).astype(int)
    
    for index, column in enumerate(num_data.columns[5:]):
        num_data[column]=soup.select('div > a > p.game-page-sidecard')[index].get_text().strip()
        num_data[column]=num_data[column].str.split(' ').str[0]
        num_data[column]=num_data[column].replace({"K":"*1e3"}, regex=True).map(pd.eval).astype(int)
    
    return num_data
        

In [15]:
start()

Hope you enjoy the recommendation!
Have fun playing!
