In [1]:
#!git lfs install

In [2]:
#!git clone https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu

In [2]:
#!pip install --pre onnxruntime-genai

#### Download the ONNX model locally

In [20]:
import argparse
import os
import readline
import glob

import onnxruntime_genai as og

In [21]:
model = og.Model("./phi3vision/cpu-int4-rtn-block-32-acc-level-4")

In [22]:
def getImageAnswer(question, imagePath=None):
    processor = model.create_multimodal_processor()
    tokenizer_stream = processor.create_stream()
    prompt = "<|user|>\n"
    if imagePath:
        image = og.Images.open(imagePath)
    prompt += "<|image_1|>\n"

    prompt += f"{question}<|end|>\n<|assistant|>\n"

    if imagePath:
        print("Processing image and prompt...")
        inputs = processor(prompt, images=image)
    else:
        inputs = processor(prompt, images=None)

    print("Generating response...")
    params = og.GeneratorParams(model)
    params.set_inputs(inputs)
    params.set_search_options(max_length=3072)

    generator = og.Generator(model, params)

    while not generator.is_done():
        generator.compute_logits()
        generator.generate_next_token()

        new_token = generator.get_next_tokens()[0]
        print(tokenizer_stream.decode(new_token), end='', flush=True)

    for _ in range(3):
        print()

    # Delete the generator to free the captured graph before creating another one
    del generator

In [23]:
getImageAnswer("Describe the image and details ", './Data/Gru/images/1.jpg')

Processing image and prompt...
Generating response...
The image depicts a lively scene from a 'Despicable Me' movie set. In the center, Gru, the main character, is seated at a round table with a blue tablecloth, surrounded by various Minions. He is wearing a white lab coat and is holding a clipboard, suggesting he is in a scientific or research role. The Minions are in different poses, some are standing, and others are seated on stools around the table. The background features a large, open space with a staircase leading to a higher level, and the walls are adorned with various mechanical and scientific equipment, giving the impression of a laboratory or workshop. The overall atmosphere is one of organized chaos, typical of a 'Despicable Me' movie set.</s>




#### Call Phi3 using Ollama

##### List of models in library - https://ollama.com/library

##### Currently phi3 vision is not supported yet, but eventually it will be just change in the name - https://github.com/ollama/ollama/blob/main/docs/api.md
https://github.com/ollama/ollama

##### Download/Serve the model using ollama ollama run phi3
##### If required set the environment variable for OLLAMA_HOST = 127.0.0.1:11435

In [24]:
from langchain_community.llms import Ollama

In [25]:
ollama = Ollama(
    base_url='http://localhost:11435',
    model="phi3"
)
print(ollama.invoke("why is the sky blue"))

 The sky appears blue to us because of a phenomenon called Rayleigh scattering. When sunlight reaches Earth's atmosphere, it encounters molecules and small particles that are present in the air. Sunlight is composed of different colors of light, each with its own wavelength. Blue light has a shorter wavelength compared to other visible colors like red or yellow.

As sunlight travels through Earth's atmosphere, it interacts with these molecules and particles, causing the blue light to scatter in all directions more efficiently than the other colors due to its shorter wavelength. This scattered blue light reaches our eyes from different angles, which gives us the impression that the sky is predominantly blue during daytime.

In contrast, at sunrise or sunset, when the Sun's rays have to travel through a greater thickness of Earth's atmosphere, most of the shorter wavelength colors (blue and green) are scattered away from our line of sight, leaving behind longer-wavelength colors like red

##### QnA usig langchain and Phi3 locally on existing index we created in 2_AskQuestion.ipynb

In [35]:
import json  
from openai import OpenAI, AzureOpenAI, AsyncAzureOpenAI
from Utilities.envVars import *
import os
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from Utilities.cogSearch import performCogSearch
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from IPython.display import display, HTML
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch
from langchain import hub
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema import StrOutputParser
from operator import itemgetter
from langchain.schema.runnable import RunnableMap

os.environ["AZURESEARCH_FIELDS_ID"] = "id"
os.environ["AZURESEARCH_FIELDS_CONTENT"] = "content"
os.environ["AZURESEARCH_FIELDS_CONTENT_VECTOR"] = "contentVector"
os.environ["AZURESEARCH_FIELDS_TAG"] = "{}"

# Set Search Service endpoint, index name, and API key from environment variables
indexName = SearchIndex

azure_endpoint  = f"{OpenAiEndPoint}"
api_key = OpenAiKey
api_version = OpenAiVersion

embeddingModelType = "azureopenai"
temperature = 0.3
tokenLength = 1000
embeddings = AzureOpenAIEmbeddings(azure_endpoint=OpenAiEndPoint, azure_deployment=OpenAiEmbedding, api_key=OpenAiKey, openai_api_type="azure")

In [34]:
chainType = "stuff"
topK = 3
query = "What is Microsoft Fabric"

# Since we already index our document, we can perform the search on the query to retrieve "TopK" documents
r = performCogSearch(OpenAiEndPoint, OpenAiKey, OpenAiVersion, OpenAiApiKey, SearchService, SearchKey, embeddingModelType, OpenAiEmbedding, query, indexName, topK)

if r == None:
    docs = [Document(page_content="No results found")]
else :
    docs = [
        Document(page_content=doc['content'], metadata={"id": doc['id'], "source": doc['sourcefile']})
        for doc in r
        ]

In [36]:
rawDocs=[]
csVectorStore: AzureSearch = AzureSearch(
    azure_search_endpoint=f"https://{SearchService}.search.windows.net",
    azure_search_key=SearchKey,
    index_name=indexName,
    embedding_function=embeddings.embed_query,
    semantic_configuration_name="semanticConfig",
)
retriever = csVectorStore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrievedDocs = retriever.get_relevant_documents(query)
for doc in retrievedDocs:
    rawDocs.append(doc.page_content)

prompt = hub.pull("rlm/rag-prompt")

In [37]:
def formatDocs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [40]:
ragChain = (
        {"context": retriever | formatDocs, "question": RunnablePassthrough()}
        | prompt
        | ollama
        | StrOutputParser()
    )
try:
    #modifiedAnswer = ragChainWithSource.invoke(question)['answer']
    modifiedAnswer = ragChain.invoke(query)
    modifiedAnswer = modifiedAnswer.replace("Answer: ", '')
    print("Modified Answer: " + modifiedAnswer)
except Exception as e:
    print("Error in RAG Chain: " + str(e))
    pass

Modified Answer:  Microsoft Fabric is a unified analytics platform that integrates various experiences like Data Engineering, Data Factory, Data Science, and more onto a shared SaaS foundation for easy accessibility and governance within an organization's data ecosystem.
