In [64]:
from google.cloud import aiplatform
from google.auth import credentials, load_credentials_from_dict
import json
from google.protobuf.json_format import MessageToDict
from datetime import datetime

# Load credentials
credentials, project_id = load_credentials_from_dict(
    json.load(open('research-paper-rag-0a8819b735b9.json'))
)

# Initialize client
client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"}
client = aiplatform.gapic.PredictionServiceClient(
    client_options=client_options,
    credentials=credentials
)

# Endpoint configuration
project = "research-paper-rag"
location = "us-central1"
endpoint_id = "3729166308927864832"
endpoint = f"projects/{project}/locations/{location}/endpoints/{endpoint_id}"

# Input data
input_data = {
    "instances": [{
        "query": "Give me an idea on Transformers",
        'max_tokens': 500,
        'num_papers': 2
    }]
}

def format_references(references):
    """Format the references list from MapComposite objects"""
    formatted_refs = []
    for ref in references:
        ref_dict = dict(ref)
        formatted_refs.append({
            'title': ref_dict.get('title', ''),
            'authors': list(ref_dict.get('authors', [])),
            'categories': ref_dict.get('categories', ''),
            'relevance_score': ref_dict.get('relevance_score', 0.0),
            'citation': ref_dict.get('citation', '')
        })
    return formatted_refs

try:
    # Send prediction request
    response = client.predict(endpoint=endpoint, instances=input_data["instances"])
    
    # Get the first prediction
    prediction = dict(response.predictions[0])
    
    # Format the output
    formatted_output = {
        'query': prediction['query'],
        'response': prediction['response'],
        'references': format_references(prediction['references']),
        'metadata': dict(prediction['metadata']),
        'timestamp': prediction['timestamp']
    }
    
    # Pretty print the results
    print("\n=== Prediction Results ===\n")
    print(f"Query: {formatted_output['query']}\n")
    print(f"Response: {formatted_output['response']}\n")
    print("References:")
    for i, ref in enumerate(formatted_output['references'], 1):
        print(f"\n[{i}] {ref['title']}")
        print(f"    Authors: {', '.join(ref['authors'])}")
        print(f"    Categories: {ref['categories']}")
        print(f"    Relevance Score: {ref['relevance_score']:.2f}")
    
    print(f"\nTimestamp: {formatted_output['timestamp']}")
    
    # Optionally, save the full response to a file
    with open('prediction_response.json', 'w') as f:
        json.dump(formatted_output, f, indent=2)

except Exception as e:
    print(f"\nError during prediction: {str(e)}")


=== Prediction Results ===

Query: Give me an idea on Transformers

Response: A comprehensive survey of techniques for optimizing transformer inference.

References:

[1] An Introduction to Transformers
    Authors: Richard E. Turner
    Categories: cs.LG cs.AI
    Relevance Score: 0.48

[2] A Survey of Techniques for Optimizing Transformer Inference
    Authors: Krishna Teja Chitty-Venkata, Sparsh Mittal, Murali Emani, Venkatram
  Vishwanath, Arun K. Somani
    Categories: cs.LG cs.AR cs.CL cs.CV
    Relevance Score: 0.46

Timestamp: 2024-12-20T23:13:03.616904


In [70]:
from google.cloud import aiplatform
from google.auth import credentials, load_credentials_from_dict
import json
from google.protobuf.json_format import MessageToDict
from datetime import datetime
import textwrap

# Load credentials
credentials, project_id = load_credentials_from_dict(
    json.load(open('research-paper-rag-0a8819b735b9.json'))
)

# Initialize client
client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"}
client = aiplatform.gapic.PredictionServiceClient(
    client_options=client_options,
    credentials=credentials
)

# Endpoint configuration
project = "research-paper-rag"
location = "us-central1"
endpoint_id = "3729166308927864832"
endpoint = f"projects/{project}/locations/{location}/endpoints/{endpoint_id}"

# Input data
input_data = {
    "instances": [{
        "query": "what is Transformer in Deep Learning?",
        'max_tokens': 500,  # Increased max tokens
        'num_papers': 1     # Increased number of papers
    }]
}

def format_references(references):
    """Format the references list from MapComposite objects"""
    formatted_refs = []
    for ref in references:
        ref_dict = dict(ref)
        formatted_refs.append({
            'title': ref_dict.get('title', ''),
            'authors': list(ref_dict.get('authors', [])),
            'categories': ref_dict.get('categories', ''),
            'relevance_score': ref_dict.get('relevance_score', 0.0),
            'citation': ref_dict.get('citation', '')
        })
    return formatted_refs

def print_wrapped_text(label, text, width=80):
    """Print text with proper wrapping"""
    print(f"\n{label}:")
    wrapped_text = textwrap.fill(text, width=width)
    print(wrapped_text)

try:
    # Send prediction request
    response = client.predict(endpoint=endpoint, instances=input_data["instances"])
    
    # Get the first prediction
    prediction = dict(response.predictions[0])
    
    # Format the output
    formatted_output = {
        'query': prediction['query'],
        'response': prediction['response'],
        'references': format_references(prediction['references']),
        'metadata': dict(prediction['metadata']),
        'timestamp': prediction['timestamp']
    }
    
    # Print complete results with proper formatting
    print("\n" + "="*80)
    print("\nPREDICTION RESULTS")
    print("="*80)
    
    print_wrapped_text("QUERY", formatted_output['query'])
    
    print("\n" + "-"*80 + "\n")
    print_wrapped_text("RESPONSE", formatted_output['response'])
    
    print("\n" + "-"*80)
    print("\nREFERENCES:")
    for i, ref in enumerate(formatted_output['references'], 1):
        print(f"\n[{i}] Title: {ref['title']}")
        print(f"    Authors: {', '.join(ref['authors'])}")
        print(f"    Categories: {ref['categories']}")
        print(f"    Relevance Score: {ref['relevance_score']:.2f}")
        print(f"    Citation: {ref['citation']}")
    
    print("\n" + "-"*80)
    print(f"\nMetadata:")
    for key, value in formatted_output['metadata'].items():
        print(f"    {key}: {value}")
    
    print(f"\nTimestamp: {formatted_output['timestamp']}")
    print("\n" + "="*80)
    
    # Save complete response to file
    with open('prediction_response.json', 'w') as f:
        json.dump(formatted_output, f, indent=2)
    print("\nFull response saved to 'prediction_response.json'")

except Exception as e:
    print(f"\nError during prediction: {str(e)}")
    



PREDICTION RESULTS

QUERY:
what is Transformer in Deep Learning?

--------------------------------------------------------------------------------


RESPONSE:
A neural network component that can be used to learn useful representations of
sequences or sets of data-points.

--------------------------------------------------------------------------------

REFERENCES:

[1] Title: An Introduction to Transformers
    Authors: Richard E. Turner
    Categories: cs.LG cs.AI
    Relevance Score: 0.64
    Citation: Richard E. Turner. "An Introduction to Transformers". cs.LG cs.AI.

--------------------------------------------------------------------------------

Metadata:
    model: google/flan-t5-base
    style: academic
    max_tokens: 500.0
    num_papers: 1.0

Timestamp: 2024-12-20T23:20:26.841521


Full response saved to 'prediction_response.json'


In [None]:
from google.cloud import aiplatform
from google.auth import credentials, load_credentials_from_dict
import json
from google.protobuf.json_format import MessageToDict
from datetime import datetime

# Load credentials and initialize client
credentials, project_id = load_credentials_from_dict(
    json.load(open('research-paper-rag-0a8819b735b9.json'))
)

client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"}
client = aiplatform.gapic.PredictionServiceClient(
    client_options=client_options,
    credentials=credentials
)

# Endpoint configuration
project = "research-paper-rag"
location = "us-central1"
endpoint_id = "3729166308927864832"
endpoint = f"projects/{project}/locations/{location}/endpoints/{endpoint_id}"

def format_references(references):
    """Format references into a readable string"""
    formatted_text = ""
    for i, ref in enumerate(references, 1):
        ref_dict = dict(ref)
        formatted_text += f"\n[{i}] Title: {ref_dict.get('title', '')}\n"
        formatted_text += f"    Authors: {', '.join(ref_dict.get('authors', []))}\n"
        formatted_text += f"    Categories: {ref_dict.get('categories', '')}\n"
        formatted_text += f"    Relevance Score: {ref_dict.get('relevance_score', 0.0):.2f}\n"
        formatted_text += f"    Citation: {ref_dict.get('citation', '')}\n\n"
    return formatted_text

def predict_query(query, max_tokens):
    try:
        # Prepare input data
        input_data = {
            "instances": [{
                "query": query,
                'max_tokens': max_tokens,
                'num_papers': 2
            }]
        }
        
        # Get prediction
        response = client.predict(endpoint=endpoint, instances=input_data["instances"])
        prediction = dict(response.predictions[0])
        
        # Format response
        main_response = prediction['response']
        references = format_references(prediction['references'])
        
        return main_response, references
    
    except Exception as e:
        return f"Error: {str(e)}", "Error retrieving references"
    
predict_query("What is the Transformer model?", 300)

In [83]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text2text-generation", model="google/flan-t5-base")

# inference
output = pipe("what is Transformers and does it useful ?")
print(output)

Device set to use cuda:0


[{'generated_text': 'no'}]
