In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
df = pd.read_csv("/kaggle/input/best-video-games-of-all-time/data.csv")

In [None]:
df.head()

# Metascore histogram showing distribution

In [None]:
sns.histplot(df, x='Metascore',kde=True,color='g')

# Barplots of mean values of Metascore grouped by Platform and Platform pie chart

In [None]:
grouped = df.groupby('Platform')
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
sns.barplot(
           x=grouped['Metascore'].mean(),
           y=grouped['Metascore'].mean().index,
           ax=axes[0])

for container in axes[0].containers:
    axes[0].bar_label(container, color='black', size=15)
    
    
axes[1].pie(df['Platform'].value_counts(),
           labels = df['Platform'].value_counts().index,
           autopct='%0.2f%%')

plt.show()

# Text preprocessing

In [None]:
def text_processing(x):
    x = list(x)
    while "[" in x:
        x.remove("[")
        
    while "]" in x:
        x.remove("]")
        
    return "".join(x)

In [None]:
df['new_desc'] = df['Title'].apply(text_processing)

In [None]:
cv = CountVectorizer(max_features=5000, stop_words='english')
vectors = cv.fit_transform(df['new_desc']).toarray()

In [None]:
similarity = cosine_similarity(vectors)

# Recommendation System function

In [None]:
def similar_game(name, platform):
    indices = df[df['Name'] == name]
    indices = indices[indices['Platform'] == platform].index[0]
    distances = similarity[indices]
    arr = sorted(list(enumerate(distances)), reverse = True, key=lambda x: x[1])[1:8]
    
    for i in arr:
        print(df.loc[i[0], 'Name'], df.loc[i[0], 'Platform'])

In [None]:
similar_game("Grand Theft Auto IV", 'Xbox 360')

In [None]:
similar_game("BioShock", "PC")