<a href="https://colab.research.google.com/github/damola936/AI-ML-LLM/blob/main/PersonalWorker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install gradio



In [3]:
import os
import glob
import gradio

In [4]:
!pip install -U langchain-community



In [5]:
!pip install openai



In [9]:
# Imports

from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredWordDocumentLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI, ChatOllama
from langchain_community.vectorstores import Chroma
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings

In [13]:
# price is a factor for our company, so we're going to use a low cost model

MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [5]:
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

In [9]:
!pip install unstructured



In [10]:
!pip install python-docx docx



In [10]:
# Step 1: Get list of directories or individual files
base_path = "/content/drive/MyDrive"

# Optional: only get directories or specific folders if needed
documents = []
text_loader_kwargs = {'encoding': 'utf-8'}

# Step 2: Use DirectoryLoader only on folders, not individual files
loader = DirectoryLoader(
    path=base_path,  # not a list, a single folder path
    glob="**/*.docx",
    loader_cls=UnstructuredWordDocumentLoader,
    loader_kwargs=text_loader_kwargs
)

# Step 3: Load all matching files recursively
documents = loader.load()

# Step 4: Split into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
print(f"Total number of chunks: {len(chunks)}")



Total number of chunks: 26


Creating embeddings

In [11]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [13]:
!pip install chromadb



In [14]:
# Create vectorstore

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 78 documents


In [15]:
# Let's investigate the vectors

collection = vectorstore._collection
count = collection.count()

sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")

There are 78 vectors with 384 dimensions in the vector store


In [16]:
result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
metadatas = result['metadatas']

In [17]:
# We humans find it easier to visalize things in 2D!
# Reduce the dimensionality of the vectors to 2D using t-SNE
# (t-distributed stochastic neighbor embedding)

tsne = TSNE(n_components=2, random_state=42, perplexity=20)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, opacity=0.8),
    text=[f"<br>Text: {d[:100]}..." for d in documents],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x',yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [18]:
tsne = TSNE(n_components=3, random_state=42, perplexity=20)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, opacity=0.8),
    text=[f"<br>Text: {d[:100]}..." for d in documents],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x',yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

## Creating a UI

In [35]:
llm = ChatOpenAI(temperature=0.7, model=MODEL, openai_api_key=OPENAI_API_KEY) # model
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) # model memory
retriever = vectorstore.as_retriever(search_kwargs={"k": 30}) # vectorstore as retriever
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory) # creating conversion chain


Please see the migration guide at: https://python.langchain.com/docs/versions/migrating_memory/



In [36]:
def chat(question, history):
    result = conversation_chain.invoke({"question":question})
    return result["answer"]

In [37]:
import gradio as gr
view = gr.ChatInterface(chat, type="messages").launch(inbrowser=False)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1a526339c2590db960.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


## Using Opensource Models

In [1]:
!pip install -q torch accelerate bitsandbytes transformers

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m48.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [7]:
from huggingface_hub import login
hf_token = userdata.get("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

Token has not been saved to git credential helper.


[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m


In [None]:
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import torch

model_id="meta-llama/Llama-3.1-8B"

# Load in 8-bit precision to reduce memory usage
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_8bit=True,  # Quantization to save memory
    device_map="auto"
)

# Create pipeline with more controlled parameters
hf_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,  # Limit output size
    do_sample=True,
    temperature=0.7,
    top_p=0.9
)

In [None]:
# Wrap in LangChain's HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=hf_pipeline) # opensource model
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) # model memory
retriever = vectorstore.as_retriever(search_kwargs={"k": 30}) # vectorstore as retriever
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory) # creating conversion chain

In [18]:
def chat(question, history):
    result = conversation_chain.invoke({"question":question})
    return result["answer"]

In [19]:
import gradio as gr
view = gr.ChatInterface(chat, type="messages").launch(inbrowser=False) # Opensource model not good enough as 8B parameters

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fc79151637d9bf0bdc.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
