In [318]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [330]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [429]:
vg = pd.read_csv("/kaggle/input/popular-video-games-1980-2023/games.csv",
                usecols=["Title","Rating","Genres","Summary"])
vg.head()

Unnamed: 0,Title,Rating,Genres,Summary
0,Elden Ring,4.5,"['Adventure', 'RPG']","Elden Ring is a fantasy, action and open world..."
1,Hades,4.3,"['Adventure', 'Brawler', 'Indie', 'RPG']",A rogue-lite hack and slash dungeon crawler in...
2,The Legend of Zelda: Breath of the Wild,4.4,"['Adventure', 'RPG']",The Legend of Zelda: Breath of the Wild is the...
3,Undertale,4.2,"['Adventure', 'Indie', 'RPG', 'Turn Based Stra...","A small child falls into the Underground, wher..."
4,Hollow Knight,4.4,"['Adventure', 'Indie', 'Platform']",A 2D metroidvania with an emphasis on close co...


<div style="border-radius:10px; border:#6B8BA0 solid; padding: 15px; background-color: #F2EADF; font-size:100%; text-align:left">

<h3 align="left"><font color='#6B8BA0'>👀 Features: </font></h3>


1. **Title:** The title or name of the video game. 

2. **Rating:** The rating assigned to the video game. 

3. **Genres:** The different categories or genres to which the video game belongs. This feature categorizes the game based on its fundamental characteristics, gameplay style, or thematic elements. Games can belong to multiple genres.

4. **Summary:** A brief text providing an overview or summary of the video game. This description includes key information about the game's storyline, features, gameplay mechanics, and other relevant details. It serves as a concise introduction to the game.


In [430]:
vg.isnull().sum()

Title       0
Rating     13
Genres      0
Summary     1
dtype: int64

In [431]:
vg.dropna(inplace=True,axis=0)
vg

Unnamed: 0,Title,Rating,Genres,Summary
0,Elden Ring,4.5,"['Adventure', 'RPG']","Elden Ring is a fantasy, action and open world..."
1,Hades,4.3,"['Adventure', 'Brawler', 'Indie', 'RPG']",A rogue-lite hack and slash dungeon crawler in...
2,The Legend of Zelda: Breath of the Wild,4.4,"['Adventure', 'RPG']",The Legend of Zelda: Breath of the Wild is the...
3,Undertale,4.2,"['Adventure', 'Indie', 'RPG', 'Turn Based Stra...","A small child falls into the Underground, wher..."
4,Hollow Knight,4.4,"['Adventure', 'Indie', 'Platform']",A 2D metroidvania with an emphasis on close co...
...,...,...,...,...
1507,Back to the Future: The Game,3.2,"['Adventure', 'Point-and-Click']",Back to the Future: The Game is one of Telltal...
1508,Team Sonic Racing,2.9,"['Arcade', 'Racing']",Team Sonic Racing combines the best elements o...
1509,Dragon's Dogma,3.7,"['Brawler', 'RPG']","Set in a huge open world, Dragon’s Dogma: Dark..."
1510,Baldur's Gate 3,4.1,"['Adventure', 'RPG', 'Strategy', 'Tactical', '...","An ancient evil has returned to Baldur's Gate,..."


In [432]:
vg.duplicated().sum()

394

In [None]:
vg = vg.drop_duplicates()
vg

In [None]:
vg.loc[:,"Summary"] = decode(vg["Summary"])

In [435]:
vg.reset_index(inplace=True, drop=True)

In [412]:
tfidf2 = TfidfVectorizer(stop_words="english", tokenizer=lemmatize)
tfidf_matrix2 = tfidf2.fit_transform(vg['Summary'])
tfidf2.get_feature_names_out()

array([' ', '                          ', '                           ',
       ..., 'zoom', 'zur', 'zx'], dtype=object)

In [413]:
cosine_sim2 = cosine_similarity(tfidf_matrix2,
                               tfidf_matrix2)
cosine_sim2

array([[1.        , 0.        , 0.0337736 , ..., 0.02851176, 0.        ,
        0.03163872],
       [0.        , 1.        , 0.03250327, ..., 0.03133605, 0.        ,
        0.        ],
       [0.0337736 , 0.03250327, 1.        , ..., 0.05683583, 0.03013112,
        0.05234871],
       ...,
       [0.02851176, 0.03133605, 0.05683583, ..., 1.        , 0.        ,
        0.00986341],
       [0.        , 0.        , 0.03013112, ..., 0.        , 1.        ,
        0.01874014],
       [0.03163872, 0.        , 0.05234871, ..., 0.00986341, 0.01874014,
        1.        ]])

### <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#BE5F78; font-size:140%; text-align:left;padding: 0px; border-bottom: 3px solid #EDCCAF">Suggest - PUBG</p>

<center><img src="https://i.imgur.com/M5zLuie.png" style ><center>

In [437]:
vg[vg["Title"].str.contains(r"pubg", regex=True,case=False)].drop_duplicates()

Unnamed: 0,Title,Rating,Genres,Summary
818,PUBG: Battlegrounds,2.4,['Shooter'],PUBG Battlegrounds is a battle royale shooter ...


In [452]:
index2 = vg[vg["Title"].str.contains(r"pubg", regex=True,case=False)].drop_duplicates().index[0]

In [453]:
similarity_scores2 = pd.DataFrame(cosine_sim2[index2],
                                 columns=["score"])

games2 = similarity_scores2.sort_values("score", ascending=False)[1:6].index

vg['Title'].iloc[games2]

547        Fortnite
589    Apex Legends
370     Rollerdrome
641      Splatoon 2
237        Valorant
Name: Title, dtype: object