In [1]:
import os
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
import io


import difflib
from googleapiclient.errors import HttpError

In [2]:
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

# Setting up Drive Service

In [6]:
client_secret = r""

In [7]:
def get_drive_service():
    """Shows basic usage of the Drive v3 API.
    Prints the names and IDs of the first 10 files the user has access to.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                client_secret, SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    return build('drive', 'v3', credentials=creds)

In [8]:
drive_service = get_drive_service()

In [9]:
def search_drive_file(service, file_name):
    try:
        results = service.files().list(
            q=f"name='{file_name}' and trashed=false",
            spaces='drive',
            fields='files(id, name)').execute()
        items = results.get('files', [])
        if not items:
            print(f"No file found with name: {file_name}")
            return None
        else:
            print(f"Found file: {items[0]['name']} (ID: {items[0]['id']})")            
            return items[0]['id']
    except HttpError as error:
        print(f'An error occurred: {error}')
        return None

In [10]:
def download_drive_file_content(service, file_id, mime_type='text/plain'):
    try:
        # Use export_media for Google Workspace documents to get plain text
        if mime_type.startswith('application/vnd.google-apps.'):
            request = service.files().export_media(fileId=file_id, mimeType='text/plain')
        else:
            # For other file types, use get_media (will download the raw file)
            request = service.files().get_media(fileId=file_id)
    
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print(f"Download {int(status.progress() * 100)}%.")
        content = fh.getvalue().decode('utf-8')
        return content
    except HttpError as error:
         print(f'An error occurred during download: {error}')
         return None
    except Exception as e:
         print(f"An unexpected error occurred: {e}")
         return None

In [11]:
import io
import fitz  # PyMuPDF
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.errors import HttpError

def download_drive_file_content(service, file_id, mime_type='application/pdf'):
    try:
        # Request file download
        request = service.files().get_media(fileId=file_id)
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while not done:
            status, done = downloader.next_chunk()
            print(f"Download {int(status.progress() * 100)}%.")

        # Load PDF content
        fh.seek(0)  # Reset file pointer
        pdf_document = fitz.open("pdf", fh.read())  # Open as PDF
        content = "\n".join([page.get_text("text") for page in pdf_document])  # Extract text
        
        return content  # Return extracted text
    except HttpError as error:
        print(f"An error occurred during download: {error}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

In [12]:
file_name_to_find = "Abhinav Rohilla Resume.pdf"

In [13]:
#Drive Service
drive_service = get_drive_service()
#Get File Id
file_id = search_drive_file(drive_service, file_name_to_find)
#get file and extract text
file_content = download_drive_file_content(drive_service, file_id)
file_content[:200]

Found file: Abhinav Rohilla Resume.pdf (ID: 1vNVEllr6J6IyX3su_mZQpWyAC5Uju3Kz)
Download 100%.


' \nAbhinav Rohilla \nData Scientist with 6 years of experience in leveraging machine learning, LLMs and \nstatistics to drive impactful business solutions. Proven ability to understand client \nrequiremen'

# Creating Agent

In [14]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

import ollama
from ollama import ChatResponse
from IPython.display import display, Markdown, clear_output

In [15]:
system_message = "You are an AI-powered document retrieval and summarization assistant."
system_message += "Your primary function is to fetch PDFs from Google Drive, extract relevant text, and generate concise, informative summaries."

# Tool 1

In [16]:
#Defining TOOL!
def get_file(file_name_to_find):
    file_id = search_drive_file(drive_service, file_name_to_find)
    #get file and extract text
    file_content = download_drive_file_content(drive_service, file_id)
    return file_content
#Defining tool trigger
file_function = {
            "name": "get_file",
            "description": """Useful for retrieving a specific document from Google Drive when the user provides an exact filename. 
            This tool ensures precise file access without searching for similar names.
            For example user can ask:
            can you fetch Abhinav Rohilla Resume.pdf from my google drive?""",
            "parameters": {
                "type": "object",
                "properties": {
                    "file_name_to_find": {
                        "type": "string",
                        "description": "User input file name"
                    }
                },
                "required": ["file_name_to_find"],
                "additionalProperties": False
            }
        }

# Tool 2

In [17]:
def fuzzy_match_drive_pdfs(search_term, cutoff=0.4):
    try:
        # Fetch list of PDF files from Drive
        results = drive_service.files().list(
            q="mimeType='application/pdf'",  # Filter for PDFs
            fields="files(id, name)"
        ).execute()
        files = results.get("files", [])
        if not files:
            return "No PDFs found in Google Drive."
        # Extract filenames
        file_names = [file["name"] for file in files]
        # Perform fuzzy matching
        matches = [
            (name, difflib.SequenceMatcher(None, search_term, name).ratio()) 
            for name in file_names
        ]
        # Filter matches by cutoff score
        filtered_matches = [name for name, score in matches if score >= cutoff or search_term.lower() in name.lower()]
        return filtered_matches if filtered_matches else "No relevant matches found."
    except HttpError as error:
        return f"An error occurred: {error}"

fuzzy_match_function = {
            "name": "fuzzy_match_drive_pdfs",
            "description": "Useful for finding similar files in Google Drive based on a search term.",
            "parameters": {
                "type": "object",
                "properties": {
                    "search_term": {
                        "type": "string",
                        "description": "User input search term for fuzzy matching."
                    }
                },
                "required": ["search_term"],
                "additionalProperties": False
            }
        }

In [18]:
tools = [{"type": "function", "function": file_function},{"type": "function", "function": fuzzy_match_function}]

In [19]:
def chat(message, history):
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    # response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    response = ollama.chat(model='llama3.2', messages=messages,tools=tools) 
    print(response)
    if response.message.tool_calls:
        message = response.message
        response, file_content = handle_tool_call(message)
        messages.append(message)
        messages.append(response)
        # response = openai.chat.completions.create(model=MODEL, messages=messages)
        print('FINAL Prompt',messages)
        response = ollama.chat(model='llama3.2', messages=messages) 
    # logging.debug("Response: %s", response)
    return response.message.content

In [20]:
# Function to flush memory
  # Clears chat history and returns empty response
# Gradio UI with chat and clear button

In [21]:
def flush_memory():
    return []

In [22]:

TOOLS = {
    "get_file": get_file,  # Function to fetch file content
    "fuzzy_match_drive_pdfs": fuzzy_match_drive_pdfs  # Function to fuzzy match PDF names
}

def handle_tool_call(message):
    tool_call = message.tool_calls[0]
    tool_name = tool_call.function.name
    arguments = tool_call.function.arguments
    # content = get_file(file_name)
    content = TOOLS[tool_name](**arguments)
    response = {
        "role": "tool",
        "content": json.dumps({"file_content": content}),
        'name':tool_call.function.name
    }
    return response, content

In [31]:
"""
2. List all aviation topics from my drive
3. any research topics you can find in my drive?
"""

'\n1. can you find all PDFs from my drive on topic Aviation\n2. List all resume files from my drive\n\n'

In [23]:

gr.ChatInterface(fn=chat, type="messages").launch()
# with gr.Blocks() as demo:
#     # chatbot.render()
#     # clear_button = gr.Button("Clear Memory")  # Add flush button
#     # clear_button.click(flush_memory, inputs=[], outputs=[chatbot])

# demo.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




model='llama3.2' created_at='2025-06-07T09:35:16.0860057Z' done=True done_reason='stop' total_duration=2658464000 load_duration=1619506800 prompt_eval_count=325 prompt_eval_duration=234000000 eval_count=23 eval_duration=270000000 message=Message(role='assistant', content='', thinking=None, images=None, tool_calls=[ToolCall(function=Function(name='fuzzy_match_drive_pdfs', arguments={'search_term': 'Aviation'}))])
FINAL Prompt [{'role': 'system', 'content': 'You are an AI-powered document retrieval and summarization assistant.Your primary function is to fetch PDFs from Google Drive, extract relevant text, and generate concise, informative summaries.'}, {'role': 'user', 'content': 'can you find and list all PDFs on Aviation topic'}, Message(role='assistant', content='', thinking=None, images=None, tool_calls=[ToolCall(function=Function(name='fuzzy_match_drive_pdfs', arguments={'search_term': 'Aviation'}))]), {'role': 'tool', 'content': '{"file_content": ["Sustainable Contrails(Aviation).p