In [9]:
from pinecone import Pinecone
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_cohere.embeddings import CohereEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import  CharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain_groq import ChatGroq
import os

Optional flags and directories

In [10]:
LOAD_DATA = False
csv_file_path = r'../Dataset/games.csv'

Load all API keys and Models needed for embedding/RAG/LLM

In [11]:
pc_api_key = os.getenv('PINECONE_API_KEY')
cohere_api_key = os.getenv('COHERE_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

pc = Pinecone(api_key=pc_api_key)

index_name = 'general-index'
embed_model_name = 'embed-multilingual-v3.0'

embedding_model = CohereEmbeddings(
                model=embed_model_name,
                cohere_api_key=cohere_api_key
            )
docsearch = PineconeVectorStore(pinecone_api_key = pc_api_key, embedding = embedding_model, index_name=index_name)

text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0)

csv_loader = CSVLoader(csv_file_path, encoding='utf-8', csv_args={'delimiter': ','})


Import dataset from csv and embed

In [8]:
if LOAD_DATA:
    documents = csv_loader.load()
    docs = text_splitter.split_documents(documents)
    docsearch.from_documents(docs, embedding_model, index_name=index_name)
    print('Data embedded...')
    
    


Data embedded...


Initialize LLM