# Azure OpenAI API RAG Storage Container

In [1]:
import os
from openai import AzureOpenAI

In [2]:
from azure_oai_rag_password import endpoint, deployment, model_version, subscription_key, search_endpoint, search_key, search_index

# Initialize Client Service with Key-Based Authentication

In [3]:
client = AzureOpenAI(
    azure_endpoint = endpoint,
    api_key = subscription_key,
    api_version = model_version
)

# Prepare the Prompt

In [4]:
instructions = "Provide answers like a university scholar."
query = "What is the Twitter geolocation hybrid approach?"

messages = [
    {"role": "system", "content": instructions},
    {"role": "user", "content": query}
]

# Generate Completion without Data

In [5]:
completion = client.chat.completions.create(  
    model=deployment,
    messages=messages,
    max_tokens=800,  
    temperature=0.7,  
    top_p=0.95,  
    frequency_penalty=0,  
    presence_penalty=0,
    stop=None,  
    stream=False,
)

In [6]:
print("Instructions:","\n   ", instructions)
print("Query:","\n   ", query)
print("Response:","\n   ", completion.choices[0].message.content)

Instructions: 
    Provide answers like a university scholar.
Query: 
    What is the Twitter geolocation hybrid approach?
Response: 
    The "Twitter geolocation hybrid approach" refers to a methodology that combines multiple techniques or data sources to infer the geographic location of Twitter users or their tweets. This approach is particularly valuable in studies where precise geolocation information is necessary, yet direct geolocation data (e.g., GPS coordinates) is often missing or sparse. Only a small fraction of tweets (estimated to be less than 1%) include explicit geotags, so researchers and analysts must rely on hybrid methods to infer location with greater coverage and accuracy.

### Components of the Hybrid Approach

A hybrid approach typically integrates both explicit and implicit signals to estimate geolocation. Some of the key components include:

1. **Explicit Geotags**:
   - Some tweets include GPS coordinates or geotagged metadata provided by the user. This is the 

# RAG - Generate Completion with Data

 The data used is a pdf manuscript from the ACM journal Transactions on Knowledge Discovery from Data
 
 The manuscript is titled "Twitter Geolocation: A Hybrid Approach"

In [7]:
completion = client.chat.completions.create(  
    model=deployment,
    messages=messages,
    max_tokens=800,  
    temperature=0.7,  
    top_p=0.95,  
    frequency_penalty=0,  
    presence_penalty=0,
    stop=None,  
    stream=False,
    extra_body={
      "data_sources": [{
          "type": "azure_search",
          "parameters": {
            "endpoint": search_endpoint,
            "index_name": search_index,
            "semantic_configuration": "default",
            "query_type": "semantic",
            "fields_mapping": {},
            "in_scope": True,
            "role_information": "",
            "filter": None,
            "strictness": 3,
            "top_n_documents": 5,
            "authentication": {
              "type": "api_key",
              "key": search_key
            }
          }
        }]
    }
)

In [8]:
print("Instructions:","\n   ", instructions)
print("Query:","\n   ", query)
print("Response:","\n   ", completion.choices[0].message.content)

Instructions: 
    Provide answers like a university scholar.
Query: 
    What is the Twitter geolocation hybrid approach?
Response: 
    The Twitter geolocation hybrid approach combines text-based features and network-based features to predict the geographic location of tweets, leveraging the strengths of both methodologies. This model uses Gaussian Mixture Models (GMMs) to map spatial distributions and estimate the probability of a tweet's origin within a spatial domain. It has been shown to outperform other geotagging algorithms in terms of prediction accuracy and precision [doc1][doc3].

### Key Features of the Hybrid Approach:
1. **Integration of Text and Network Information**: The model jointly utilizes text features (e.g., geographically narrow n-grams) and network features (e.g., distributions of friends) as predictors, weighting them according to their geographic scope [doc2].
2. **Flexibility**: The approach can operate using either text or network features alone if one set i