In [1]:
# Install required libraries
!pip install transformers azure-storage-blob torch



In [2]:
# Import required modules
from azure.storage.blob import BlobServiceClient
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
import tempfile
import os

In [3]:
# Step 1: Azure Blob Storage Connection
connection_string = ""  # Replace with your Azure Storage connection string
container_name = "ml-poc-container"  # Replace with the container name where the model is stored
model_files = ["config.json", "tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "vocab.txt", "model.safetensors"]


In [8]:
# Step 2: Fetch Model Files from Azure Blob Storage and Write to a Temporary Directory
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)

with tempfile.TemporaryDirectory() as temp_dir:
    for file_name in model_files:
        blob_client = container_client.get_blob_client(file_name)
        file_data = blob_client.download_blob().readall()  # Read file directly into memory
        temp_file_path = os.path.join(temp_dir, file_name)
        
        # Write file to the temporary directory
        with open(temp_file_path, "wb") as temp_file:
            temp_file.write(file_data)
    
    # Step 3: Load the Model and Tokenizer from Temporary Directory
    model = AutoModelForQuestionAnswering.from_pretrained(temp_dir)
    tokenizer = AutoTokenizer.from_pretrained(temp_dir)

    # Step 4: Create a Question-Answering Pipeline
    qa_pipeline = pipeline('question-answering', model=model, tokenizer=tokenizer)

    # Step 5: Perform Question-Answering
    context = """
    Large Language Models (LLMs) are deep learning algorithms capable of performing various natural language processing tasks.
    They are trained on massive datasets, enabling them to recognize, translate, predict, and generate text or other content.
    """
    question = "What is LLM?"

    # Get the answer using the QA pipeline
    result = qa_pipeline(question=question, context=context)

    # Step 6: Print the Result
    print(f"Question: {question}")
    print(f"Answer: {result['answer']}")

Device set to use mps:0


Question: What is LLM?
Answer: Large Language Models
