In [None]:
# imports

import os
import glob
from dotenv import load_dotenv
import gradio as gr

In [None]:
# imports for langchain

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

In [None]:


MODEL = "gpt-4o-mini"
db_name = "chroma_vector_db"

In [None]:
# Load environment variables in a file called .env

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [None]:
#install google api connector
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

# Fetch Email Content

In [None]:
import base64
import email
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow

SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
service = build('gmail', 'v1', credentials=creds)

results = service.users().messages().list(userId='me', labelIds=['INBOX'], maxResults=5).execute()
messages = results.get('messages', [])

email_texts = []

for msg in messages:
    msg_data = service.users().messages().get(userId='me', id=msg['id'], format='full').execute()
    payload = msg_data['payload']
    parts = payload.get('parts', [])

    email_body = None
    if parts:
        for part in parts:
            if part.get("mimeType") == "text/plain":
                data = part['body'].get('data')
                if data:
                    decoded = base64.urlsafe_b64decode(data).decode('utf-8')
                    email_body = decoded
                    break
    else:
        # Sometimes no parts, use body directly
        data = payload['body'].get('data')
        if data:
            decoded = base64.urlsafe_b64decode(data).decode('utf-8')
            email_body = decoded

    if email_body:
        email_texts.append(email_body)


# Processed the file

In [None]:
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = ([Document(page_content=email_text) for email_text in email_texts])
chunks = splitter.split_documents(docs)

In [None]:
len(chunks)

In [None]:
embeddings = OpenAIEmbeddings()
if not os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")


In [None]:
# Get one vector and find how many dimensions it has

collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")

In [None]:
llm = ChatOpenAI(model_name=MODEL, temperature=0)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
retriever = vectorstore.as_retriever()


custom_prompt = PromptTemplate.from_template(
    """
    You are an expert assistant that reads email content and answers clearly and concisely.

    {context}

    Question: {question}
    """
)

conversation_chain = ConversationalRetrievalChain.from_llm(llm = llm , retriever = retriever, memory = memory, combine_docs_chain_kwargs={"prompt": custom_prompt})


In [None]:
query = "Summarize the recent emails"
result = conversation_chain.invoke({"question":query})
print(result["answer"])

In [None]:
# Wrapping in a function - note that history isn't used, as the memory is in the conversation_chain

def chat(message,history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [None]:
# And in Gradio:

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)