# Chatbot for game recommandation

In [4]:
#!pip install langchain
#!pip install langchain_openai

In [3]:
#!pip uninstall openai langchain langchain_openai -y
#!pip install openai langchain langchain_openai --upgrade

In [5]:
#!pip show typing_extensions

In [40]:
import json
import numpy as np
from scipy.spatial.distance import cosine
from sentence_transformers import SentenceTransformer
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

### Embedding

In [41]:
from sentence_transformers import SentenceTransformer
import json

embedding_model = SentenceTransformer('all-MiniLM-L6-v2', cache_folder="./cache")


In [42]:

# Charger les jeux IGDB
#with open("igdb_games.json", "r") as f:
    #games_data = json.load(f)

#for game in games_data:
 #   game["embedding"] = embedding_model.encode(game.get("summary", "No description")).tolist()  

# Sauvegarde en JSON
#with open("igdb_games_embedding.json", "w") as f:
   # json.dump(games_data, f, indent=4)  

#print("Base de données des jeux enregistrée avec embeddings !")

### Sentiment analysis

In [45]:
# Load steam reviews
import re
import pandas as pd
import nltk
from nltk.corpus import stopwords

steam_reviews = pd.read_csv("./steam_reviews.csv", names=['id', 'app_id', 'content', 'author_id', 'sentiment'], header=0)

In [46]:
# Preprocess the reviews
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
def clean_text(text):
    text = str(text)
    # remove html tags
    text = re.sub(r'<.*?>', '', text)

    # remove @mentions
    text = re.sub(r'@[A-Za-z0-9]+', '', text)

    # remove urls
    text = re.sub(r'https?://[A-Za-z0-9./]+', '', text)

    # keep only alphanumeric characters
    text = re.sub(r'[^a-zA-Z]', ' ', text)

    return text

stop_words = set(stopwords.words('english'))
def remove_stopwords(tokens):
    return [word for word in tokens if word.lower() not in stop_words]

def remove_special_chars(tokens):
    return [word for word in tokens if word.isalnum()]

lemmatizer = WordNetLemmatizer()
def lemmatize(tokens):
    return [lemmatizer.lemmatize(word) for word in tokens]

def preprocess(tokens):
    # return lemmatize(remove_special_chars(remove_stopwords(tokens)))
    return lemmatize(remove_special_chars(tokens))

steam_reviews = steam_reviews[['content', 'sentiment']]
steam_reviews['content'] = steam_reviews['content'].apply(clean_text)
steam_reviews['sentiment'] = steam_reviews['sentiment'].apply(lambda x: str(x).lower())
steam_reviews.head()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,content,sentiment
0,At least its a counter strike,negative
1,Uh So far my playthrough has not been great...,negative
2,Better mechanics than cs,negative
3,buggy mess and NOT fun to play at all,negative
4,Whoever came up with this is gonna fucking ge...,negative


In [47]:
# Use tfidf to vectorize the reviews
from sklearn.feature_extraction.text import TfidfVectorizer

df = steam_reviews

tfidf = TfidfVectorizer(strip_accents=None, 
                        lowercase=False,
                        preprocessor=None)

X = tfidf.fit_transform(df['content'])
X.shape # (n_samples, n_features) = (201151, 97755)

(201151, 97755)

In [48]:
# Train the logistic regression model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

y = df['sentiment'] # target variable
X_train, X_test, y_train, y_test = train_test_split(X,y)

lr = LogisticRegression(solver='liblinear')
lr.fit(X_train,y_train) # fit the model
preds = lr.predict(X_test) # make predictions

from sklearn.metrics import accuracy_score
accuracy_score(preds,y_test)

0.8424872733057588

In [49]:
# Predict the sentiment of a review
def predict_sentiment(review):
    review = clean_text(review)
    review = tfidf.transform([review])
    sentiment = lr.predict(review)[0]
    proba = lr.predict_proba(review)[0][0 if sentiment == 'negative' else 1]
    return sentiment, proba

predict_sentiment("I love game about war")

('positive', 0.9627468844601628)

### Information Extraction

In [17]:
#!pip install fuzzywuzzy
#!python -m spacy download en_core_web_md

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0
[0m2025-03-25 20:50:46.815641: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-25 20:50:46.815731: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-25 20:50:46.818038: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-25 20:50:46.827821: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use av

In [52]:
import spacy
from spacy.matcher import PhraseMatcher
#python -m spacy download en_core_web_md
from fuzzywuzzy import process  # Pour la recherche approximative

# Charger le fichier JSON
with open("igdb_games_embedding.json", "r", encoding="utf-8") as file:
    games_data = json.load(file)
    
sample_games = random.sample(games_data, 1000)

# Initialiser les listes
games_db = []
categories_db = set()  # Utiliser un set pour éviter les doublons

# Parcourir les jeux dans le JSON
for game in sample_games:
    if "name" in game:
        games_db.append(game["name"])  # Ajouter le nom du jeu
    
    if "genres" in game and isinstance(game["genres"], list):
        for genre in game["genres"]:
            if "name" in genre:  # Vérifier si le nom du genre existe
                categories_db.add(genre["name"])

# Convertir en liste finale
categories_db = list(categories_db)

In [59]:

# Afficher le résultat
#print("Jeux détectés:", games_db[:10])
#print("Catégories détectées:", categories_db[:10])

In [56]:
# Charger le modèle spaCy
nlp = spacy.load("en_core_web_md")  # ou "fr_core_news_md" pour le français

# Initialisation du PhraseMatcher
matcher = PhraseMatcher(nlp.vocab, attr="LOWER")

game_to_use=games_db
# Ajouter les noms de jeux et catégories au matcher
patterns_games = [nlp(game) for game in game_to_use]
patterns_categories = [nlp(cat) for cat in categories_db]

matcher.add("GAME", patterns_games)
matcher.add("CATEGORY", patterns_categories)

def fuzzy_match_game(text, threshold=80):
    """
    Recherche un jeu dans la base avec tolérance aux fautes de frappe.
    Renvoie le jeu correspondant si la similarité est supérieure au seuil.
    """
    best_match, score = process.extractOne(text, game_to_use) if text else (None, 0)
    return best_match if score >= threshold else None

def extract_games_and_categories(text):
    """
    Détecte les jeux et catégories dans un texte en combinant PhraseMatcher et fuzzy matching.
    """
    doc = nlp(text)  # Tokenisation
    matches = matcher(doc)  # Recherche des correspondances exactes
    
    extracted_info = {"games": set(), "categories": set()}
    matched_texts = set()  # Pour éviter les doublons

    # Recherche des jeux et catégories exacts avec PhraseMatcher
    for match_id, start, end in matches:
        label = nlp.vocab.strings[match_id]
        entity = doc[start:end].text
        
        if label == "GAME":
            extracted_info["games"].add(entity)
        elif label == "CATEGORY":
            extracted_info["categories"].add(entity)
        
        matched_texts.add(entity.lower())

    # 🔍 Recherche des jeux approximatifs avec filtre
    for token in doc:
        if (token.text.lower() not in matched_texts  # Éviter les doublons
            and not token.is_punct  # Ignorer la ponctuation
            and not token.is_stop  # Ignorer les mots vides ("le", "et", "de", etc.)
            and len(token.text) > 2):  # Ignorer les mots trop courts
        
            fuzzy_game = fuzzy_match_game(token.text)
            if fuzzy_game:
                extracted_info["games"].add(fuzzy_game)
    
    return extracted_info


In [62]:
#text = "J'adore Elden Ring, c'est un excellent RPG. Je cherche un jeu d'action qui ressemble à Hades. J'ai aussi aimé Dark Sols."

#result = extract_games_and_categories(text)
print(result)

{'games': {'Jrago II: Guardians of Eden', 'Action Fighter', 'Quiz Magic Academy 6', 'Shin Megami Tensei J', 'Magic of Spring', 'Flowering Nightshade', 'Dark Fantasy: Epic Jigsaw Puzzle', 'AIR Summer Solstice'}, 'categories': set()}


### Function for recommendations

In [67]:
import numpy as np
from scipy.spatial.distance import cosine
import random

# Fonction de recommandation basée sur la similarité
def recommend_game(user_query):
    #extract infos
    extract = extract_games_and_categories(user_query)
    # Récupérer les descriptions des jeux extraits
    game_descriptions = {}
    for game_name in extract['games']:
        for game in sample_games:
            if game['name'] == game_name:
                game_descriptions[game_name] = game['summary']


    # Sentiment analysis
    sentiment, proba = predict_sentiment(user_query)

    recommand_similar_game = True # default
    if sentiment=='negative' and proba > 0.70:
        recommand_similar_game = False
        
    if game_descriptions:
        description_query = " ".join(game_descriptions.values())  # Combine toutes les descriptions
        user_query = description_query
    query_embedding = embedding_model.encode(user_query)
    similarities = [
        (game["name"], 1 - cosine(query_embedding, game["embedding"]), game.get("rating", 0))
        for game in sample_games if "embedding" in game
    ]

    if recommand_similar_game:
        similarities.sort(key=lambda x:(x[1], x[2]), reverse=True)
    else:
        similarities.sort(key=lambda x: (x[1], x[2]), reverse=False)

    return similarities[:3]

# Test
#user_input = "I don't love war game"
#print("Jeux recommandés :", recommend_game(user_input))


### LLM

In [35]:
class ChatSession:
    def __init__(self, memsize, llm):
        self.llm = llm
        self.memsize = memsize
        self.history = []
        self.llmID = "ASSISTANT:"
        self.userID = "USER:"
                
    def summarizeStrategy(self):
        summaryPrompt = "Résume la conversation suivante en gardant un maximum d'informations fournies par " + self.userID + " :\n"
        lastMessage, lastMessageLength = self.history[-1]
        history = ""
        for m, l in self.history[:-1]:
            history += m + "\n"
        summary = self.llm.invoke([HumanMessage(content=summaryPrompt + "\n" + history)]).content
        summaryLength = self.estimateLength(summary)
        if summaryLength + lastMessageLength > self.memsize:
            self.fifoStrategy()
            return
        self.history = [(summary, summaryLength), (lastMessage, lastMessageLength)]
    
    def fifoStrategy(self):
        while self.historyLength() > self.memsize:
            self.history = self.history[1:]

    def historyLength(self):
        return sum(length for _, length in self.history)

    def estimateLength(self, text):
        return self.llm.get_num_tokens(text)

    def addToHistory(self, prompt):
        self.history.append((prompt, self.estimateLength(prompt)))
        if self.historyLength() > self.memsize:
            self.summarizeStrategy()

    def chat(self, prompt):
        self.addToHistory(self.userID + " " + prompt)

        recommendations = recommend_game(prompt)
        games_list = ", ".join([game[0] for game in recommendations])

        chat_prompt = f"""
        L'utilisateur cherche un jeu correspondant à : "{prompt}".
        Voici les jeux recommandés : {games_list}.
        Formule une réponse naturelle et engageante.
        """

        fullPrompt = 'Historique de conversation :\n'
        for msg, _ in self.history:
            fullPrompt += msg + "\n"

        fullPrompt += "\n" + chat_prompt

        resp = self.llm.invoke([HumanMessage(content=fullPrompt)])
        content = resp.content
        if content.startswith(self.llmID + ": "):
            content = content[len(self.llmID + ": "):]

        self.addToHistory(self.llmID + " " + content)
        return content

In [37]:
 llm = ChatOpenAI(
        openai_api_key="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",
        openai_api_base="https://openrouter.ai/api/v1",
        model_name="mistralai/mistral-small-3.1-24b-instruct:free",
    )

In [38]:
CONTEXT_LENGTH = 500  
session = ChatSession(CONTEXT_LENGTH, llm)

### Use Of chatbot

In [39]:
print("\n Welcome to the video game recommendation chatbot with memory!")
print("\n Ask me a question (e.g.: “I want an RPG with a good scenario”).")
print("\n Type “exit” to quit.\n")

while True:
    user_input = input("👤 Vous : ")
    if user_input.lower() == "exit":
        print("À bientôt !")
        break
    
    response = session.chat(user_input)
    print(f"🤖 Bot : {response}\n")



 Welcome to the video game recommendation chatbot with memory!

 Ask me a question (e.g.: “I want an RPG with a good scenario”).

 Type “exit” to quit.



👤 Vous :  I want a adventure game like naruto or call of duty


🤖 Bot : Salut ! Si tu cherches un jeu d'aventure au style similaire à Naruto ou Call of Duty, je te propose de jeter un œil à ces trois options :

1. **Adventures of the Cat Leopold** : Bien que ce ne soit pas exactement Naruto, ce jeu offre une aventure pleine d'action et des combats intéressants. Tu devras explorer un vaste monde tout en accomplissant des missions captivantes.

2. **Munlay Online** : Il combine des éléments de combat en équipe à la manière de Call of Duty, mais dans un environnement fantastique. C'est parfait pour ceux qui aiment des défis coopératifs et des combats stratégiques.

3. **Piworld** : Il te plaira si tu cherches des aventures qui mélangent exploration et combat. C'est un jeu où chaque décision compte et où tu devras utiliser tes compétences pour progresser.

Ces jeux offrent chacun une expérience unique qui, je l'espère, répondra à ton envie d'aventure et d'action ! Si tu as besoin de plus de recommandations ou de précisions, fais-le moi savoir !



👤 Vous :  exit


À bientôt !
