In [13]:
!pip install llama-index-vector-stores-mongodb
!pip install llama-index
!pip install pymongo
!pip install torch
!pip install psutil
!pip install ollama



Configurando conexão MongoDB

In [1]:
import os
import pymongo
from pymongo import MongoClient
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core import VectorStoreIndex, StorageContext, SimpleDirectoryReader
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import ollama
import psutil

# Configurar URI do MongoDB
client = MongoClient("mongodb+srv://GuiMcs00:KadidjaEstressada123@clustersv.zriin0z.mongodb.net/?retryWrites=true&w=majority&appName=ClusterSV")
db = client["SV-Collections"]
games_db = db['Games']
embeddings_db = db['Embeddings']
print("ok")

# Configurar o armazenamento de vetores
store = MongoDBAtlasVectorSearch(client)
storage_context = StorageContext.from_defaults(vector_store=store)

# Função para verificar o uso de memória
def check_memory():
    process = psutil.Process()
    mem_info = process.memory_info()
    print(f"Uso de memória: {mem_info.rss / (1024 ** 2)} MB")


In [5]:
total_games = games_db.count_documents({})
print(total_games)

Carregar Modelo de Linguagem

In [2]:
# Puxar o modelo do Ollama
response = ollama.pull('llama3')
print(response)


Carregar dados e criar Embeddings

In [6]:
# Função para gerar embeddings
def generate_embeddings(prompt, model_name):
    response = ollama.embeddings(model=model_name, prompt=prompt)
    print("Resposta da função ollama.embeddings:", response)
    
    # Verificar se a chave 'embeddings' está presente na resposta
    if 'embedding' in response:
        embeddings = response['embedding']
        return embeddings
    else:
        raise KeyError("A chave 'embedding' não está presente na resposta.")


# Função para buscar dados dos jogos do MongoDB e atualizar embeddings
def update_game_embeddings(games_db, embeddings_db, batch_size=50):
    cursor = games_db.find(batch_size=batch_size)
    processed_games = 0
    
    while cursor.alive:
        batch = cursor.next()
        
        game_id = batch['_id']
        # Verificar se o embedding já existe
        if embeddings_db.find_one({'game_id': game_id}):
            print(f"Embedding já existe para o jogo: {batch['title']}, pulando.")
            continue

        title = batch.get('title', '')
        description = batch.get('description', '')
        developers = ', '.join(batch.get('developers', []))
        genres = ', '.join(batch.get('genres', []))
        platforms = ', '.join(batch.get('platforms', []))
        rating = str(batch.get('rating', ''))
        price = str(batch.get('price', ''))
        tags = ', '.join(batch.get('tags', []))
        release_date = str(batch.get('release_date', ''))

        # Combine todos os campos em um texto único para gerar o embedding
        combined_text = f"Title: {title}\nDescription: {description}\nDevelopers: {developers}\nGenres: {genres}\nPlatforms: {platforms}\nRating: {rating}\nPrice: {price}\nTags: {tags}\nRelease Date: {release_date}"
        
        print(f"Gerando embedding para o jogo: {title}")
        embedding = generate_embeddings(combined_text, 'llama3')

        # Preparar os dados para salvar na coleção embeddings
        embedding_data = {
            'game_id': game_id,
            'title': title,
            'description': description,
            'embedding': embedding
        }
        
        # Salvar embedding no MongoDB
        embeddings_db.update_one(
            {'game_id': game_id},
            {'$set': embedding_data},
            upsert=True
        )
        print(f"Embedding salvo com sucesso para o jogo: {title}")
        
        processed_games += 1
        if processed_games % batch_size == 0:
            print(f"{processed_games} jogos processados até agora.")

# Atualizar os embeddings e armazenar na coleção de embeddings

update_game_embeddings(games_db, embeddings_db)


In [None]:
Verificar Embeddings Salvos

In [7]:
# Verificar os embeddings salvos
def verificar_embeddings_salvos():
    count = store._collection.count_documents({})
    print(f"Número de embeddings salvos: {count}")

verificar_embeddings_salvos()
