# SUPERSMARTHOME


### 0: INSTALLING LIBRARIES

In [1]:
# !pip install llama_index chroma chromadb openai logging llama-index-experimental
# !pip install -r requirements.txt


### 0: IMPORTING LIBRARIES
Using pre-built class for directory reading.

In [2]:
import os
import sys
import logging
import chromadb
import pandas as pd
from IPython.display import Markdown, display

from llama_index.core import StorageContext
from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.embeddings.openai import OpenAIEmbedding


In [3]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

### 1: IMPORTING OPENAPI KEY

In [4]:
# first, load and set openaikey from a txt file I stored it in
with open('oaikey.txt') as keyfile:
    oaikey = keyfile.read().strip()
    
os.environ["OPENAI_API_KEY"] = oaikey

#### 2: IMPORTING DATASET 

In [5]:
# File paths for each room
room_files = {
    'QDORO': './MQTT Client/Room MQTT Client/data/airQDORO.csv',
    'QFOYER': './MQTT Client/Room MQTT Client/data/airQFOYER.csv',
    'QHANS': './MQTT Client/Room MQTT Client/data/airQHANS.csv',
    'QMOMO': './MQTT Client/Room MQTT Client/data/airQMOMO.csv',
    'QRITA': './MQTT Client/Room MQTT Client/data/airQRITA.csv',
    'QROB': './MQTT Client/Room MQTT Client/data/airQROB.csv'
}

# Load each room's data into separate DataFrames
df_QDORO = pd.read_csv(room_files['QDORO'])
df_QFOYER = pd.read_csv(room_files['QFOYER'])
df_QHANS = pd.read_csv(room_files['QHANS'])
df_QMOMO = pd.read_csv(room_files['QMOMO'])
df_QRITA = pd.read_csv(room_files['QRITA'])
df_QROB = pd.read_csv(room_files['QROB'])

df_rooftop = pd.read_csv('./MQTT Client/Roof MQTT Client/data/pivoted_data.csv')


In [6]:
# Optionally, convert timestamp to datetime if needed
df_QDORO['timestamp'] = pd.to_datetime(df_QDORO['timestamp'])
df_QFOYER['timestamp'] = pd.to_datetime(df_QFOYER['timestamp'])
df_QHANS['timestamp'] = pd.to_datetime(df_QHANS['timestamp'])
df_QMOMO['timestamp'] = pd.to_datetime(df_QMOMO['timestamp'])
df_QRITA['timestamp'] = pd.to_datetime(df_QRITA['timestamp'])
df_QROB['timestamp'] = pd.to_datetime(df_QROB['timestamp'])

df_rooftop['timestamp'] = pd.to_datetime(df_rooftop['timestamp_utc'])

### 3: BUILDING PANDAS QUERY ENGINE

In [7]:
# Internal Rooms
query_engine_DORO = PandasQueryEngine(df=df_QDORO, verbose=True)
query_engine_FOYER = PandasQueryEngine(df=df_QFOYER, verbose=True)
query_engine_HANS = PandasQueryEngine(df=df_QHANS, verbose=True)
query_engine_MOMO = PandasQueryEngine(df=df_QMOMO, verbose=True)
query_engine_RITA = PandasQueryEngine(df=df_QRITA, verbose=True)
query_engine_ROB = PandasQueryEngine(df=df_QROB, verbose=True)

# External Rooms
query_engine_rooftop = PandasQueryEngine(df=df_rooftop, verbose=True)

### 4: QUERYING THE DATA

##### 4.1: ROOFTOP QUERYING

In [8]:
internal_external_choice = "external"   # "external", "internal"

# If the choice is internal, I need to set the Room Number.
# internal_room_choice = "ROOFTOP"   # "DORO", "FOYER", "HANS", "MOMO", "RITA", "ROB"
internal_room_choice = "ROOFTOP"

In [9]:
# Set query_engine_choice based on the internal/external choice and the room choice
if internal_external_choice == "internal":
    
    if internal_room_choice == "QDORO":
        query_engine_choice = query_engine_DORO
    elif internal_room_choice == "QFOYER":
        query_engine_choice = query_engine_FOYER
    elif internal_room_choice == "QHANS":
        query_engine_choice = query_engine_HANS
    elif internal_room_choice == "QMOMO":
        query_engine_choice = query_engine_MOMO
    elif internal_room_choice == "QRITA":
        query_engine_choice = query_engine_RITA
    elif internal_room_choice == "QROB":
        query_engine_choice = query_engine_ROB
    else:
        raise ValueError(f"Unknown internal room choice: {internal_room_choice}")

elif internal_external_choice == "external":
    
    query_engine_choice = query_engine_rooftop

else:
    raise ValueError(f"Unknown choice for internal_external_choice: {internal_external_choice}")


In [10]:
df_rooftop

Unnamed: 0,timestamp_utc,sensor_sn,Air Temperature ( °C),Atmospheric Pressure ( kPa),EC ( mS/cm),Gust Speed ( m/s),Max Air Temperature ( °C),Max Precip Rate ( mm/h),Min Air Temperature ( °C),Precipitation ( mm),RH Sensor Temp ( °C),Solar Radiation ( W/m²),Tilt Angle (°),VPD ( kPa),Vapor Pressure ( kPa),Wind Direction (°),Wind Speed ( m/s),timestamp
0,1719784800,A4100209,20.0,98.00,0.0,2.11,20.1,0.0,20.0,0.0,19.7,0.0,1.4,0.28,2.067,289.7,0.68,1970-01-01 00:00:01.719784800
1,1719785100,A4100209,20.1,98.00,0.0,2.48,20.1,0.0,20.0,0.0,19.8,0.0,1.4,0.29,2.060,302.9,0.75,1970-01-01 00:00:01.719785100
2,1719785400,A4100209,20.1,98.01,0.0,2.38,20.2,0.0,20.0,0.0,19.8,0.0,1.4,0.30,2.054,298.2,0.72,1970-01-01 00:00:01.719785400
3,1719785700,A4100209,20.1,98.01,0.0,1.80,20.2,0.0,20.0,0.0,19.9,0.0,1.3,0.30,2.047,292.3,0.85,1970-01-01 00:00:01.719785700
4,1719786000,A4100209,20.1,98.02,0.0,3.53,20.2,0.0,20.0,0.0,19.9,0.0,1.3,0.32,2.036,286.0,0.99,1970-01-01 00:00:01.719786000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
572,1719956400,A4100209,15.8,98.04,0.0,0.64,15.9,0.0,15.7,0.0,15.9,0.0,1.3,0.41,1.380,197.7,0.34,1970-01-01 00:00:01.719956400
573,1719956700,A4100209,15.7,98.04,0.0,0.54,15.8,0.0,15.6,0.0,15.8,0.0,1.4,0.40,1.378,194.1,0.24,1970-01-01 00:00:01.719956700
574,1719957000,A4100209,15.7,98.03,0.0,0.74,15.8,0.0,15.5,0.0,15.7,0.0,1.4,0.41,1.370,148.3,0.25,1970-01-01 00:00:01.719957000
575,1719957300,A4100209,15.6,98.03,0.0,1.05,15.7,,15.5,0.0,15.7,0.0,1.3,0.41,1.362,172.4,0.49,1970-01-01 00:00:01.719957300


In [11]:
query = f"Please provide me the set of air temperature values during last day in {internal_room_choice}"

response = query_engine_choice.query(
    query,
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
> Pandas Instructions:
```
df[df['timestamp_utc'] >= df['timestamp_utc'].max() - 86400]['Air Temperature ( °C)']
```
> Pandas Output: 288    16.3
289    16.3
290    16.3
291    16.2
292    16.3
       ... 
572    15.8
573    15.7
574    15.7
575    15.6
576    15.6
Name: Air Temperature ( °C), Length: 289, dtype: float64


In [12]:
# Assuming you have a Response object named 'response'
actual_response = response.response

# Now you can print or further process the actual response string
print(actual_response)

288    16.3
289    16.3
290    16.3
291    16.2
292    16.3
       ... 
572    15.8
573    15.7
574    15.7
575    15.6
576    15.6
Name: Air Temperature ( °C), Length: 289, dtype: float64


##### 4.2: DORO QUERYING

In [23]:
# Internal Rooms
query_engine_DORO = PandasQueryEngine(df=df_QDORO, verbose=True)
query_engine_FOYER = PandasQueryEngine(df=df_QFOYER, verbose=True)
query_engine_HANS = PandasQueryEngine(df=df_QHANS, verbose=True)
query_engine_MOMO = PandasQueryEngine(df=df_QMOMO, verbose=True)
query_engine_RITA = PandasQueryEngine(df=df_QRITA, verbose=True)
query_engine_ROB = PandasQueryEngine(df=df_QROB, verbose=True)

# External Rooms
query_engine_rooftop = PandasQueryEngine(df=df_rooftop, verbose=True)

In [24]:
internal_external_choice = "internal"   # "external", "internal"

# If the choice is internal, I need to set the Room Number.
internal_room_choice = "QRITA"   # "QDORO", "QFOYER", "QHANS", "QMOMO", "QRITA", "QROB"


In [25]:
df_QRITA

Unnamed: 0,oxygen,health,dewpt,no2,h2s,Status,humidity,sound,temperature,sound_max,...,DeviceID,pressure,performance,pm2_5,TypPS,pm1,humidity_abs,timestamp,tvoc,o3
0,"[20.922, 1.63]",333,"[19.05, 0.84]","[62.57, 29.1]","[1.63, 0.4]",OK,"[76.847, 5.0]","[34.93, 8.4]","[23.137, 0.53]","[43.8, 4.3]",...,2476110a610045cbe5f347a25c6eecb5,"[988.8499, 1.0]",618,"[24.9, 12.5]",2.1,"[17.2, 11.9]","[15.989, 0.82]",1970-01-01 00:28:45.009675,"[0, 0]","[31.97, 9.2]"
1,"[20.922, 1.63]",337,"[19.044, 0.84]","[62.55, 29.1]","[1.62, 0.4]",OK,"[76.794, 4.99]","[35.62, 8.0]","[23.142, 0.53]","[43.8, 4.3]",...,2476110a610045cbe5f347a25c6eecb5,"[988.8499, 1.0]",619,"[24.3, 12.4]",2.1,"[17.1, 11.8]","[15.983, 0.82]",1970-01-01 00:28:45.009707,"[0, 0]","[31.96, 9.2]"
2,"[20.922, 1.63]",338,"[19.042, 0.84]","[62.54, 29.1]","[1.61, 0.4]",OK,"[76.776, 4.99]","[35.87, 8.0]","[23.144, 0.53]","[43.8, 4.3]",...,2476110a610045cbe5f347a25c6eecb5,"[988.8499, 1.0]",619,"[24.3, 12.4]",2.1,"[17.0, 11.8]","[15.981, 0.82]",1970-01-01 00:28:45.009715,"[0, 0]","[31.96, 9.2]"
3,"[20.922, 1.63]",338,"[19.04, 0.84]","[62.53, 29.1]","[1.61, 0.4]",OK,"[76.758, 4.99]","[35.87, 7.9]","[23.146, 0.53]","[43.8, 4.3]",...,2476110a610045cbe5f347a25c6eecb5,"[988.8499, 1.0]",619,"[24.4, 12.4]",2.1,"[17.0, 11.8]","[15.979, 0.82]",1970-01-01 00:28:45.009723,"[0, 0]","[31.96, 9.2]"
4,"[20.922, 1.63]",338,"[19.038, 0.84]","[62.52, 29.1]","[1.61, 0.4]",OK,"[76.741, 4.99]","[35.9, 7.9]","[23.148, 0.53]","[43.8, 4.3]",...,2476110a610045cbe5f347a25c6eecb5,"[988.8499, 1.0]",619,"[24.3, 12.4]",2.1,"[17.0, 11.9]","[15.977, 0.82]",1970-01-01 00:28:45.009731,"[0, 0]","[31.96, 9.2]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,"[20.924, 1.63]",341,"[18.996, 0.83]","[60.8, 28.4]","[1.31, 0.4]",OK,"[76.19, 4.94]","[39.97, 7.0]","[23.224, 0.53]","[42.2, 4.7]",...,2476110a610045cbe5f347a25c6eecb5,"[988.73, 1.0]",612,"[23.9, 12.2]",2.2,"[16.9, 11.8]","[15.931, 0.82]",1970-01-01 00:28:45.010416,"[0, 0]","[32.15, 9.3]"
88,"[20.925, 1.63]",343,"[18.998, 0.83]","[60.72, 28.4]","[1.27, 0.4]",OK,"[76.182, 4.94]","[38.54, 7.6]","[23.228, 0.53]","[42.2, 4.7]",...,2476110a610045cbe5f347a25c6eecb5,"[988.73, 1.0]",612,"[23.7, 12.2]",2.1,"[16.8, 11.8]","[15.933, 0.82]",1970-01-01 00:28:45.010452,"[0, 0]","[32.2, 9.3]"
89,"[20.925, 1.63]",343,"[18.998, 0.83]","[60.7, 28.4]","[1.27, 0.4]",OK,"[76.175, 4.94]","[38.24, 7.7]","[23.228, 0.53]","[42.2, 4.7]",...,2476110a610045cbe5f347a25c6eecb5,"[988.73, 1.0]",612,"[23.8, 12.2]",2.1,"[16.8, 11.8]","[15.932, 0.82]",1970-01-01 00:28:45.010460,"[0, 0]","[32.21, 9.3]"
90,"[20.925, 1.63]",343,"[18.998, 0.83]","[60.7, 28.4]","[1.27, 0.4]",OK,"[76.175, 4.94]","[38.24, 7.7]","[23.228, 0.53]","[42.2, 4.7]",...,2476110a610045cbe5f347a25c6eecb5,"[988.73, 1.0]",612,"[23.8, 12.2]",2.1,"[16.8, 11.8]","[15.932, 0.82]",1970-01-01 00:28:45.010460,"[0, 0]","[32.21, 9.3]"


In [26]:
# Set query_engine_choice based on the internal/external choice and the room choice
if internal_external_choice == "internal":
    
    if internal_room_choice == "QDORO":
        query_engine_choice = query_engine_DORO
    elif internal_room_choice == "QFOYER":
        query_engine_choice = query_engine_FOYER
    elif internal_room_choice == "QHANS":
        query_engine_choice = query_engine_HANS
    elif internal_room_choice == "QMOMO":
        query_engine_choice = query_engine_MOMO
    elif internal_room_choice == "QRITA":
        query_engine_choice = query_engine_RITA
    elif internal_room_choice == "QROB":
        query_engine_choice = query_engine_ROB
    else:
        raise ValueError(f"Unknown internal room choice: {internal_room_choice}")

elif internal_external_choice == "external":
    
    query_engine_choice = query_engine_rooftop

else:
    raise ValueError(f"Unknown choice for internal_external_choice: {internal_external_choice}")


In [27]:
df_QDORO

Unnamed: 0,oxygen,health,dewpt,no2,h2s,Status,humidity,sound,temperature,sound_max,...,DeviceID,pressure,performance,pm2_5,TypPS,pm1,humidity_abs,timestamp,tvoc,o3
0,"[20.713, 1.63]",479,"[19.664, 0.84]","[40.64, 18.2]","[8.26, 0.7]",OK,"[76.874, 4.99]","[66.84, 2.6]","[23.765, 0.53]","[74.7, 1.9]",...,61c61e378e5095bf25c28a285822f338,"[988.6, 1.0]",191,"[15.0, 11.4]",1.7,"[12.0, 11.0]","[16.577, 0.85]",1970-01-01 00:28:45.009418,"[577, 87]","[19.13, 6.1]"
1,"[20.718, 1.63]",497,"[19.626, 0.84]","[36.43, 16.8]","[8.35, 0.7]",OK,"[76.598, 4.97]","[71.64, 2.1]","[23.785, 0.53]","[78.1, 1.8]",...,61c61e378e5095bf25c28a285822f338,"[988.56, 1.0]",210,"[14.1, 11.2]",1.7,"[11.5, 11.0]","[16.537, 0.85]",1970-01-01 00:28:45.009723,"[568, 85]","[18.06, 6.0]"
2,"[20.718, 1.63]",497,"[19.626, 0.84]","[36.43, 16.8]","[8.35, 0.7]",OK,"[76.598, 4.97]","[71.64, 2.1]","[23.785, 0.53]","[78.1, 1.8]",...,61c61e378e5095bf25c28a285822f338,"[988.56, 1.0]",210,"[14.1, 11.2]",1.7,"[11.5, 11.0]","[16.537, 0.85]",1970-01-01 00:28:45.009723,"[568, 85]","[18.06, 6.0]"
3,"[20.721, 1.63]",508,"[19.584, 0.84]","[33.37, 15.7]","[8.33, 0.7]",OK,"[76.347, 4.95]","[72.37, 2.0]","[23.797, 0.53]","[77.3, 1.8]",...,61c61e378e5095bf25c28a285822f338,"[988.5099, 1.0]",222,"[13.7, 11.2]",1.7,"[11.2, 11.0]","[16.493, 0.85]",1970-01-01 00:28:45.010020,"[568, 85]","[17.74, 5.9]"
4,"[20.725, 1.63]",524,"[19.573, 0.84]","[31.61, 15.1]","[8.33, 0.7]",OK,"[76.203, 4.94]","[68.63, 2.3]","[23.817, 0.53]","[75.4, 1.9]",...,61c61e378e5095bf25c28a285822f338,"[988.46, 1.0]",230,"[12.5, 11.1]",1.6,"[10.7, 11.0]","[16.481, 0.85]",1970-01-01 00:28:45.010326,"[582, 87]","[18.15, 6.0]"


In [29]:
query = f"Please provide me health values during last day in the room {internal_room_choice}"

response = query_engine_choice.query(
    query,
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
> Pandas Instructions:
```
df[df['DeviceID'] == '2476110a610045cbe5f347a25c6eecb5']['health']
```
> Pandas Output: 0     333
1     337
2     338
3     338
4     338
     ... 
87    341
88    343
89    343
90    343
91    342
Name: health, Length: 92, dtype: int64


### STILL NOT TESTED

In [None]:
df = pd.read_csv("./MQTT Client/Room MQTT Client/data/airQDORO.csv") 
df


Unnamed: 0,oxygen,health,dewpt,no2,h2s,Status,humidity,sound,temperature,sound_max,...,DeviceID,pressure,performance,pm2_5,TypPS,pm1,humidity_abs,timestamp,tvoc,o3
0,"[20.713, 1.63]",479,"[19.664, 0.84]","[40.64, 18.2]","[8.26, 0.7]",OK,"[76.874, 4.99]","[66.84, 2.6]","[23.765, 0.53]","[74.7, 1.9]",...,61c61e378e5095bf25c28a285822f338,"[988.6, 1.0]",191,"[15.0, 11.4]",1.7,"[12.0, 11.0]","[16.577, 0.85]",1725009418000,"[577, 87]","[19.13, 6.1]"
1,"[20.718, 1.63]",497,"[19.626, 0.84]","[36.43, 16.8]","[8.35, 0.7]",OK,"[76.598, 4.97]","[71.64, 2.1]","[23.785, 0.53]","[78.1, 1.8]",...,61c61e378e5095bf25c28a285822f338,"[988.56, 1.0]",210,"[14.1, 11.2]",1.7,"[11.5, 11.0]","[16.537, 0.85]",1725009723000,"[568, 85]","[18.06, 6.0]"
2,"[20.718, 1.63]",497,"[19.626, 0.84]","[36.43, 16.8]","[8.35, 0.7]",OK,"[76.598, 4.97]","[71.64, 2.1]","[23.785, 0.53]","[78.1, 1.8]",...,61c61e378e5095bf25c28a285822f338,"[988.56, 1.0]",210,"[14.1, 11.2]",1.7,"[11.5, 11.0]","[16.537, 0.85]",1725009723000,"[568, 85]","[18.06, 6.0]"
3,"[20.721, 1.63]",508,"[19.584, 0.84]","[33.37, 15.7]","[8.33, 0.7]",OK,"[76.347, 4.95]","[72.37, 2.0]","[23.797, 0.53]","[77.3, 1.8]",...,61c61e378e5095bf25c28a285822f338,"[988.5099, 1.0]",222,"[13.7, 11.2]",1.7,"[11.2, 11.0]","[16.493, 0.85]",1725010020000,"[568, 85]","[17.74, 5.9]"
4,"[20.725, 1.63]",524,"[19.573, 0.84]","[31.61, 15.1]","[8.33, 0.7]",OK,"[76.203, 4.94]","[68.63, 2.3]","[23.817, 0.53]","[75.4, 1.9]",...,61c61e378e5095bf25c28a285822f338,"[988.46, 1.0]",230,"[12.5, 11.1]",1.6,"[10.7, 11.0]","[16.481, 0.85]",1725010326000,"[582, 87]","[18.15, 6.0]"


In [None]:
query_engine = PandasQueryEngine(df=df, verbose=True)
response = query_engine.query(
    "Please provide me the humidity in room DORO during last day",
)

#### Wrap in LLamaIndex objects for easier handling/compatability

In [None]:
documents = SimpleDirectoryReader("preprocessed").load_data()

In [None]:
# ... basically, we are just specifying the storage to be used as the ChromaDB
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

#### Specify the embedding model 

In [None]:

# let's use OpenAI out-of-the box embeddings.
embed_model = OpenAIEmbedding(model="text-embedding-3-small")

### Start building the database from our config!
Note that you could add multiple processing steps here, such as:
- Using an [ingestion pipeline](https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/) for further preprocessing
- [modifying chunk size and overlap](https://docs.llamaindex.ai/en/stable/optimizing/basic_strategies/basic_strategies/#chunk-sizes) or introduce specific chunking strategy
- and others

In [None]:
# You could modify chunk size and overlap like this
# Settings.chunk_size = 512
# Settings.chunk_overlap = 50

In [None]:
from llama_index.core import VectorStoreIndex
# now let's build an index for the database using pre-built functionality
# - Chunk the documents
# - Retrieve embeddings for document chunks
# - Create nodes in db based on docs/chunks
# - Index database for fast retrieval
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=embed_model,
    show_progress=True
)

Parsing nodes:   0%|          | 0/41 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/65 [00:00<?, ?it/s]

In [None]:
# in case we are loading from disk, uncomment
# from llama_index.core import load_index_from_storage
# index2 = load_index_from_storage(storage_context)

#### Test created embedding/chunks


### Specify LLM-Chat interface
Now, we want to build the communication between an LLM and our database that resembles our typical RAG setup:
![Typical RAG pipeline](RAG_pipeline.png)



Using LlamaIndex, this is deceptively easy.

#### Specify details about retrieval from vector db

In [None]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

import logging
import sys

# Let's do some logging
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

#### Configure retrieval from VectorDB 

In [None]:
# this specifies the details for retrieving the k closest elements to the user query
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=5, # how many documents should we consider? Let's do 5
    verbose=True
)

#### Specify the used LLM
in this case, we use the OpenAI GPT4o-mini (very performant and cheap)

In [None]:
# %pip install llama-index-llms-openai

In [None]:
from llama_index.llms.openai import OpenAI
# this is an OpenAI wrapper for llama_index
llm = OpenAI(model="gpt-4o-mini") 

#### Specify the prompt

Do a simple [RAG-prompt](https://docs.llamaindex.ai/en/stable/examples/prompts/prompts_rag/)

In [None]:
from llama_index.core import PromptTemplate
from llama_index.core import get_response_synthesizer

In [None]:
# Let's specify a prompt similar to what we have learned earlier
custom_query = """
    You are an information chatbot that informs users about the Interdisciplinary Transformation University Austria (ITU) in Linz, Austria. 
    
    Here is the context information:
    ---------------------
    {context_str}
    ---------------------
    Given the context information, this prompt, and no prior knowledge, answer the query. 
    The answer must be 100 words or less.
    
    Query: {query_str}
    Answer: """

In [None]:
# this specifies how we utilize the retrieved chunks/text in the response
# configure response synthesizer
rag_prompt = PromptTemplate(custom_query) # use LLama_index wrapper to create our query

# Build response synthesizer:
# i.e., object that combines user prompt, retrieved context, and our RAG prompt and sends it to the LLM (GPT-4o-mini)
response_synthesizer = get_response_synthesizer(
    llm=llm, text_qa_template=rag_prompt, verbose=True)

#### "Assemble" query engine 
Combine other config into the actual logic that will do the querying for us.

Again, we will stick to the basics here.


In [None]:
from llama_index.core.query_engine import RetrieverQueryEngine
#node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever, # configuration for retrieval of vector chunks
    response_synthesizer=response_synthesizer, # config for synthesizing LLM prompt/response
)

#### Let's prompt away!

In [None]:
oai_response = query_engine.query('Who is the founding president?')

In [None]:
oai_response.response

'The Founding President of the Interdisciplinary Transformation University Austria (ITU) is Prof. Dr. Stefanie Lindstaedt.'

In [None]:
# which files were used as context information? 
oai_response.metadata

{'542fde3f-28d1-4690-a8a9-dd7722665809': {'file_path': '/home/jovyan/work/preprocessed/en_public-notice_provisional-bylaws-of-itu-idsa.txt',
  'file_name': 'en_public-notice_provisional-bylaws-of-itu-idsa.txt',
  'file_type': 'text/plain',
  'file_size': 36144,
  'creation_date': '2024-08-26',
  'last_modified_date': '2024-08-26'},
 '2b55da0d-4516-441d-bc60-44fb958bd8c3': {'file_path': '/home/jovyan/work/preprocessed/en_digital-transformation-university_organization.txt',
  'file_name': 'en_digital-transformation-university_organization.txt',
  'file_type': 'text/plain',
  'file_size': 2374,
  'creation_date': '2024-08-26',
  'last_modified_date': '2024-08-26'},
 '81983a30-d6ca-4fce-a39b-6b0ae3c756c5': {'file_path': '/home/jovyan/work/preprocessed/en_public-notice_provisional-bylaws-of-itu-idsa.txt',
  'file_name': 'en_public-notice_provisional-bylaws-of-itu-idsa.txt',
  'file_type': 'text/plain',
  'file_size': 36144,
  'creation_date': '2024-08-26',
  'last_modified_date': '2024-08-2

In [None]:
# some more details
example_node_id = oai_response.source_nodes[1].node_id

print(f'Gathered information from {len(oai_response.source_nodes)} text chunks, for example:\n'
      f'Node ID: {example_node_id}\n'
      f'Document: {oai_response.metadata[example_node_id]["file_name"]}\n'
      f'Text:\n{oai_response.source_nodes[1].text}')

Gathered information from 3 text chunks, for example:
Node ID: 2b55da0d-4516-441d-bc60-44fb958bd8c3
Document: en_digital-transformation-university_organization.txt
Text:
:study:careerhome : about : organization
© Felix Büchele - IT:U:organization© Lunghammer – TU GrazDipl.-Ing.in Claudia von der Linden, MBA (IMD)Chairwoman of the Founding Convent© Antje Wolm – IT:UProf. Dr.in Stefanie LindstaedtFounding President© Felix Büchele – IT:UGabriele Költringer, EMBAManaging Directorinternational strategic advisory boardfounding conventfounding presidentfounding advisory boardmanaging directorFounding ConventThe Founding Convent is the strategic body of the university during the founding phase. Two of its members were nominated by the province of Upper Austria, three by the Federal Ministry of Education, Science and Research (BMBWF), two by the Federal Ministry for Climate Protection, Environment, Energy, Mobility, Innovation and Technology (BMK), one by the Austrian Science Fund (FWF) and one

#### Some other queries

In [None]:
oai_response2 = query_engine.query('Is there a summer school?')

In [None]:
oai_response2.response

"Yes, the Interdisciplinary Transformation University Austria (ITU) is hosting a Summer School in 2024, which has attracted over 200 applicants from 66 countries. The program emphasizes interdisciplinary collaboration and diverse academic backgrounds, with approximately 40 participants expected to be selected for this unique learning opportunity. The review committee is currently evaluating applications, and updates on the selection process will be provided as preparations continue. For more information, you can check the university's website."

In [None]:
oai_response3 = query_engine.query('Are they hiring?')

In [None]:
oai_response3.response

'Yes, the Interdisciplinary Transformation University Austria (ITU) is currently hiring. They have up to 12 postdoctoral positions available in the field of Computational X, as well as openings for a LMS Administrator – Full Stack Developer, Content Creator, Financial Controller, Project Controller, and Software Developer. Interested candidates can apply online and are encouraged to submit their applications, including a CV and cover letter. The application deadline for the postdoctoral positions is September 15th, 2024.'