In [2]:
from llama_index.readers.google import GoogleDriveReader
loader = GoogleDriveReader()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
help(GoogleDriveReader)

Help on class GoogleDriveReader in module llama_index.readers.google.drive.base:

class GoogleDriveReader(llama_index.core.readers.base.BaseReader)
 |  GoogleDriveReader(credentials_path: str = 'credentials.json', token_path: str = 'token.json', pydrive_creds_path: str = 'creds.txt') -> None
 |  
 |  Google drive reader.
 |  
 |  Method resolution order:
 |      GoogleDriveReader
 |      llama_index.core.readers.base.BaseReader
 |      abc.ABC
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, credentials_path: str = 'credentials.json', token_path: str = 'token.json', pydrive_creds_path: str = 'creds.txt') -> None
 |      Initialize with parameters.
 |  
 |  load_data(self, folder_id: str = None, file_ids: List[str] = None, mime_types: List[str] = None) -> List[llama_index.core.schema.Document]
 |      Load data from the folder id and file ids.
 |      
 |      Args:
 |          folder_id: folder id of the folder in google drive.
 |          file_ids: file i

In [3]:
help(loader.load_data)

Help on method load_data in module llama_index.readers.google.drive.base:

load_data(folder_id: str = None, file_ids: List[str] = None, mime_types: List[str] = None) -> List[llama_index.core.schema.Document] method of llama_index.readers.google.drive.base.GoogleDriveReader instance
    Load data from the folder id and file ids.
    
    Args:
        folder_id: folder id of the folder in google drive.
        file_ids: file ids of the files in google drive.
        mime_types: the mimeTypes you want to allow e.g.: "application/vnd.google-apps.document"
    Returns:
        List[Document]: A list of documents.



In [11]:
def load_data(folder_id: str):
    docs = loader.load_data(folder_id=folder_id)
    for doc in docs:
        # print(doc.metadata)
        print(doc.id_)
    return docs


docs = load_data(folder_id="1RFhr3-KmOZCR5rtp4dlOMNl3LKe1kOA5")

1-ZGC-6UZC_lq_iImaW4nvgFct_jlWpJEZhHwKJHS6Mc
1Dte8R_SjzzQtoq5go8l0I9--U9P5fssdvusSGyiqAw4
1qSzKrTyj30SUy93zN03st2f8CFBGeScRUZ0ogGb-P3E
1ce7wdv5LO8nWHbx-TnjVn7N-b4w4IUmRmB6NuOZSqj4
1TGiETOFt86El-hI4FbE5lYjo65Cw_l5b6yjKlvl7TVg


## Google Drive API Experiments 

In [1]:
import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError


In [7]:


# If modifying these scopes, delete the file token.json.
CLIENT_SECRET = 'client_secret_944885290760-eol8kggkm9kv1bgmlpvisr619v2mk4ul.apps.googleusercontent.com.json'

credential=None
SCOPES = ["https://www.googleapis.com/auth/drive.metadata.readonly"]
if os.path.exists("token.json"):
  credential = Credentials.from_authorized_user_file("token.json", SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not credential or not credential.valid:
  if credential and credential.expired and credential.refresh_token:
    credential.refresh(Request())
  else:
    flow = InstalledAppFlow.from_client_secrets_file(
        CLIENT_SECRET, SCOPES
    )
    credential = flow.run_local_server(port=0)
  # Save the credentials for the next run
  with open("token.json", "w") as token:
    token.write(credential.to_json())

try:
  service = build("drive", "v3", credentials=credential)

  # Call the Drive v3 API
  results = (
      service.files()
      .list(pageSize=10, fields="nextPageToken, files(id, name)")
      .execute()
  )
  items = results.get("files", [])

  if not items:
    print("No files found.")
  else:
    print("Files:")
    for item in items:
      print(f"{item['name']} ({item['id']})")
except HttpError as error:
  # TODO(developer) - Handle errors from drive API.
  print(f"An error occurred: {error}")




Files:
Example (1XRYcaHI_5nrg_Ht82hF9q42wuzVbtmqZUtUecNlsx14)
Storage (1sLmLETXRAUA1NAoJDf6TxnJ21SJiXzQ3)
Llama_Index_Data_Retriever (1fyXW1wVWpfn6isaGAcYEFRNBQlQbGRLG)
workspace.json (12AMj8sIjQn5SUm_m2Ov2dVc3aeVFpSRt)
community-plugins.json (1dhnLlfbpqTAyySZXDRESro48w3wfDtea)
core-plugins-migration.json (1b58eFIStIsaiA-vhdQ9y13Eq1pswDfqB)
core-plugins.json (1Un9UDfv-wkpI7vKuG853OakyWLoPes-A)
data.json (1CmgpShmDhqcdHpYUdbfMhEzNvwyIZ_HN)
data.json (1ioKNPhTFXa5ZjvkE6Z2_gB2XzMB8Yvb9)
data.json (1KScy5Yj4Guu81olNk_F-uMfOXlNufHY8)


## Project Experiments 

In [1]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.schema import BaseNode
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core import StorageContext, VectorStoreIndex 
from llama_index.vector_stores.chroma import ChromaVectorStore
from constants import EMBED_MODEL

from typing import Union,Sequence,Optional
from pathlib import Path 


class ChromaVectorStoreIndex(object):
    '''This class is a simple control tool for ChromaDB 
    '''
    
    def __init__(self,nodes:Sequence[BaseNode],persist_dir : Union[Path,str], collection:str='default',**kwargs):    
       
        _persist_dir:str = persist_dir if type(persist_dir)==str else persist_dir.resolve().__str__()
        self.chroma_client = chromadb.PersistentClient(path=_persist_dir)
        self.chroma_collection = self.chroma_client.get_or_create_collection(collection)
        self.chroma_vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection)    
        if 'embed_model' not in kwargs :
            kwargs['embed_model'] = EMBED_MODEL
        kwargs['nodes']=nodes
        kwargs['storage_context']=StorageContext.from_defaults(
                vector_store=self.chroma_vector_store) 
        print(_persist_dir)
        self.__index = VectorStoreIndex(**kwargs)
      
    def get_chroma_index(self):
        return self.__index
    
    def load_vector_store(self, persist_dir:Union[Path,str], collection_name:str,embed_model:Optional[BaseEmbedding]=None) :
        print("Create create_index_from_documents call Chroma Vector Store")
        
        _persist_dir:str = persist_dir if type(persist_dir)==str else persist_dir.resolve().__str__()        
        client = chromadb.PersistentClient(path=_persist_dir)
        chroma_collection = client.get_or_create_collection(collection_name)
        vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
        if not embed_model:
            embed_model = EMBED_MODEL
        index =VectorStoreIndex.from_vector_store(
            vector_store,
            embed_model=embed_model)
        self.__index = index


# lOADING TEXT AFTER SELECT 






  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from llama_index.readers.google import GoogleDriveReader
from constants import GOOGLE_CREDENTIALS_PATH
loader = GoogleDriveReader(credentials_path=GOOGLE_CREDENTIALS_PATH)

def load_data_from_drive_folder(folder_id: str):
    docs = loader.load_data(folder_id=folder_id)
    if not docs :
        raise Exception("Content Load Error","No content is loaded")
        
    return docs



In [3]:
from constants import DRIVE_FOLDER_ID,VECTOR_STORE_PATH
from llama_index.core.node_parser import SentenceSplitter



In [4]:
    documents = load_data_from_drive_folder(DRIVE_FOLDER_ID)    
    node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=20)

    nodes = node_parser.get_nodes_from_documents(
        documents, show_progress=False
    )
    # print(nodes)
    chroma_index = ChromaVectorStoreIndex(nodes,persist_dir=VECTOR_STORE_PATH, collection='Set1')
    index = chroma_index.get_chroma_index()
    # print("****************************************************",index)
    index2 = chroma_index.load_vector_store(persist_dir=VECTOR_STORE_PATH, collection_name ='Set1')
    print(index2)

C:\Users\Admin\AritraRanjanChowdhury\GEN_AI\Document_RAG_from_GDrive_with_Llama_Index\WebService\ChromaDB
Create create_index_from_documents call Chroma Vector Store
None


In [6]:
help(index.as_chat_engine)

Help on method as_chat_engine in module llama_index.core.indices.base:

as_chat_engine(chat_mode: llama_index.core.chat_engine.types.ChatMode = <ChatMode.BEST: 'best'>, llm: Union[str, llama_index.core.llms.llm.LLM, ForwardRef('BaseLanguageModel'), NoneType] = None, **kwargs: Any) -> llama_index.core.chat_engine.types.BaseChatEngine method of llama_index.core.indices.vector_store.base.VectorStoreIndex instance



In [7]:
help(index.as_query_engine)


Help on method as_query_engine in module llama_index.core.indices.base:

as_query_engine(llm: Union[str, llama_index.core.llms.llm.LLM, ForwardRef('BaseLanguageModel'), NoneType] = None, **kwargs: Any) -> llama_index.core.base.base_query_engine.BaseQueryEngine method of llama_index.core.indices.vector_store.base.VectorStoreIndex instance



In [10]:
help(index.as_retriever)


Help on method as_retriever in module llama_index.core.indices.vector_store.base:

as_retriever(**kwargs: Any) -> llama_index.core.base.base_retriever.BaseRetriever method of llama_index.core.indices.vector_store.base.VectorStoreIndex instance

