<a href="https://colab.research.google.com/github/antonum/Timescale-Workshops/blob/main/RAG_in_SQL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RAG in a single SQL statement

![Timescale logo](https://docs.timescale.com/static/logo-white-c6cf9b4c58cd066908a6335ece1957fd.svg)





In [1]:
import psycopg2
from google.colab import userdata
#CONNECTION="postgres://tsdbadmin:xxxxxxx.yyyyy.tsdb.cloud.timescale.com:39966/tsdb?sslmode=require"
CONNECTION=userdata.get('TS_CONNECTION')
conn = psycopg2.connect(CONNECTION)
cursor = conn.cursor()

In [2]:
import pandas as pd
# helper function to convert SQL Results to the dataframe
def sql_results_to_df(cursor):
  columns = [desc[0] for desc in cursor.description]
  data = cursor.fetchall()
  df = pd.DataFrame(data, columns=columns)
  return df

In [31]:
query = """
DROP TABLE IF EXISTS wiki_movie_plots_with_summaries CASCADE;
"""
cursor.execute(query)
conn.commit()

In [32]:
query = """
select ai.load_dataset('vishnupriyavr/wiki-movie-plots-with-summaries', if_table_exists=>'drop' );
"""
cursor.execute(query)
conn.commit()

In [64]:
query = """
select "Release Year", "Title", "Plot" from "wiki_movie_plots_with_summaries"
ORDER BY "Release Year" DESC
LIMIT 10;
"""
cursor.execute(query)
sql_results_to_df(cursor)

Unnamed: 0,Release Year,Title,Plot
0,2017,The Book of Love,After the accidental death of his free-spirite...
1,2017,The Resurrection of Gavin Stone,Former child star Gavin Stone is now a washed-...
2,2017,The Bye Bye Man,"In 1969, a mass murder occurs in which a man k..."
3,2017,100 Streets,The film centers on three characters who have ...
4,2017,Split,Casey Cooke is an emotionally withdrawn teenag...
5,2017,xXx: Return of Xander Cage,NSA Agent Augustus Gibbons attempts to recruit...
6,2017,Underworld: Blood Wars,The remaining vampire covens are on the verge ...
7,2017,Monster Trucks,Terravex Oil is in the midst of a fracking ope...
8,2017,Sleepless,"In Las Vegas, vice LVMPD policemen Vincent Dow..."
9,2017,Trespass Against Us,"To give a better future to his son, Chad Cutle..."


In [34]:
conn.commit()
query = """
ALTER TABLE wiki_movie_plots_with_summaries
ADD COLUMN id SERIAL PRIMARY KEY;
"""
cursor.execute(query)
conn.commit()


In [71]:
conn.commit()

In [35]:
# create vectorizer
query = """
SELECT ai.create_vectorizer(
   'wiki_movie_plots_with_summaries'::regclass,
   destination => 'movies_embeddings3',
   embedding => ai.embedding_openai('text-embedding-3-small', 1536),
   chunking => ai.chunking_recursive_character_text_splitter('Plot'),
   indexing => ai.indexing_diskann()
);
"""
cursor.execute(query)
sql_results_to_df(cursor)
conn.commit()

In [39]:
query = """
select * from movies_embeddings3
limit 10;
"""
cursor.execute(query)
sql_results_to_df(cursor)

Unnamed: 0,embedding_uuid,chunk_seq,chunk,embedding,Release Year,Title,Origin/Ethnicity,Director,Cast,Genre,Wiki Page,Plot,PlotSummary,id
0,4afe84a3-1b94-453b-9b01-e33dce512dd2,0,"A bartender is working at a saloon, serving dr...","[-0.011869899,0.039477732,-0.002416647,0.02663...",1901,Kansas Saloon Smashers,American,Unknown,,unknown,https://en.wikipedia.org/wiki/Kansas_Saloon_Sm...,"A bartender is working at a saloon, serving dr...",Carrie Nation and her followers burst into a s...,1
1,78b2c385-daba-458f-bdef-2f649aab844b,0,"The moon, painted with a smiling face hangs ov...","[0.008271699,-0.009916825,-0.013569008,-0.0024...",1901,Love by the Light of the Moon,American,Unknown,,unknown,https://en.wikipedia.org/wiki/Love_by_the_Ligh...,"The moon, painted with a smiling face hangs ov...","The moon, painted with a smiling face hangs ov...",2
2,3a43d48f-4ffe-487b-973f-07a507043996,0,"The film, just over a minute long, is composed...","[-0.01929859,0.007857089,0.0142286215,0.005434...",1901,The Martyred Presidents,American,Unknown,,unknown,https://en.wikipedia.org/wiki/The_Martyred_Pre...,"The film, just over a minute long, is composed...","The film, just over a minute long, is composed...",3
3,e56cc832-88ba-414e-85e4-00d9f363d312,0,Lasting just 61 seconds and consisting of two ...,"[-0.007654703,-0.0008206655,-0.008439056,-0.01...",1901,"Terrible Teddy, the Grizzly King",American,Unknown,,unknown,"https://en.wikipedia.org/wiki/Terrible_Teddy,_...",Lasting just 61 seconds and consisting of two ...,The first shot is set in a wood during winter ...,4
4,9b77c8b5-5d1d-49a2-8166-4845caf0e361,1,". ""Teddy"" aims his rifle upward at the tree an...","[0.039645053,0.007756961,-0.0022020945,-0.0145...",1901,"Terrible Teddy, the Grizzly King",American,Unknown,,unknown,"https://en.wikipedia.org/wiki/Terrible_Teddy,_...",Lasting just 61 seconds and consisting of two ...,The first shot is set in a wood during winter ...,4
5,ff0e853c-039d-43b2-9f80-4786a066707f,0,The earliest known adaptation of the classic f...,"[0.0042759217,0.0065219556,0.0072925612,-0.020...",1902,Jack and the Beanstalk,American,"George S. Fleming, Edwin S. Porter",,unknown,https://en.wikipedia.org/wiki/Jack_and_the_Bea...,The earliest known adaptation of the classic f...,The earliest known adaptation of the classic f...,5
6,ec5f58fa-7285-4b7f-b7b7-26482f8f2aef,0,"Alice follows a large white rabbit down a ""Rab...","[0.0014899907,-0.034087874,0.046704993,-0.0035...",1903,Alice in Wonderland,American,Cecil Hepworth,May Clark,unknown,https://en.wikipedia.org/wiki/Alice_in_Wonderl...,"Alice follows a large white rabbit down a ""Rab...","Alice follows a large white rabbit down a ""Rab...",6
7,39cdd043-b0e5-4961-b3d6-6dd4696caf92,1,"She enters a kitchen, in which there is a cook...","[0.008044878,-0.029636292,0.022837073,0.014130...",1903,Alice in Wonderland,American,Cecil Hepworth,May Clark,unknown,https://en.wikipedia.org/wiki/Alice_in_Wonderl...,"Alice follows a large white rabbit down a ""Rab...","Alice follows a large white rabbit down a ""Rab...",6
8,a85686af-ac89-4177-9111-12fcd7a05acc,0,"Described as ""a liberal adaptation of Mrs. She...","[0.015609496,0.03383736,-0.068681784,0.0091894...",1910,Frankenstein,American,J. Searle Dawley,"Augustus Phillips, Charles Stanton Ogle, Mary ...",unknown,https://en.wikipedia.org/wiki/Frankenstein_(19...,"Described as ""a liberal adaptation of Mrs. She...",The plot description in the Edison Kinetogram ...,7
9,989a8d3a-8c2e-4b38-821c-7dc59adfd66f,0,"In this film's version of the story, four of t...","[-0.020853277,0.053743005,-0.042499077,-0.0053...",1955,Rage at Dawn,American,Tim Whelan,"Randolph Scott, Forrest Tucker",western,https://en.wikipedia.org/wiki/Rage_at_Dawn,"In this film's version of the story, four of t...","In this film's version of the story, four of t...",8


In [21]:
query = """
--Generate single embedding
SELECT ai.openai_embed('text-embedding-3-small', 'good food', dimensions=>1536);
"""
cursor.execute(query)
sql_results_to_df(cursor)

Unnamed: 0,openai_embed
0,"[-0.012252327,-0.014671666,-0.027942661,-0.012..."


In [73]:
query = """
SELECT
"Release Year", "Title", "Plot",
    embedding <=> ai.openai_embed(
        'text-embedding-3-small',
        'James Bond in Rome, Italy', -- your RAG query
        dimensions => 1536
    ) AS vector_distance
FROM
    movies_embeddings3
ORDER BY
    vector_distance
LIMIT 10;
"""
cursor.execute(query)
sql_results_to_df(cursor)

Unnamed: 0,Release Year,Title,Plot,vector_distance
0,2008,Quantum of Solace,James Bond is driving from Lake Garda to Siena...,0.403066
1,2008,Quantum of Solace,James Bond is driving from Lake Garda to Siena...,0.403066
2,1963,From Russia with Love,Seeking to exact revenge on James Bond (007) f...,0.418277
3,1963,From Russia with Love,Seeking to exact revenge on James Bond (007) f...,0.418851
4,1963,From Russia with Love,Seeking to exact revenge on James Bond (007) f...,0.424854
5,1969,On Her Majesty's Secret Service,"In Portugal, James Bond – agent 007, sometimes...",0.428215
6,2008,Quantum of Solace,James Bond is driving from Lake Garda to Siena...,0.432349
7,2008,Quantum of Solace,James Bond is driving from Lake Garda to Siena...,0.432349
8,2015,Spectre,A posthumous message from the previous M leads...,0.437267
9,2008,Quantum of Solace,James Bond is driving from Lake Garda to Siena...,0.437724


In [66]:
query = """
-- Ask a question to OpenAI using pgAI
SELECT ai.openai_chat_complete(
  'gpt-4o',
  jsonb_build_array(
    jsonb_build_object(
      'role', 'system',
      'content', 'You are a helpful assistant.'
    ),
    jsonb_build_object(
      'role', 'user',
      'content', 'What famous movies are set in Rome?'
    )
  )
)->'choices'->0->'message'->>'content';
"""
cursor.execute(query)
sql_results_to_df(cursor)


Unnamed: 0,?column?
0,Rome has been the backdrop for many famous mov...


# RAG In a single SQL Query

Combining semantic search with call to generative AI model gives us full RAG solution in a single SQL query

In [74]:
query = """
-- Summarization of vector query results
SELECT
    ai.openai_chat_complete(
        'gpt-4o',
        jsonb_build_array(
            jsonb_build_object(
                'role', 'system',
                'content', 'you are a helpful assistant'
            ),
            jsonb_build_object(
                'role', 'user',
                'content', concat(E'Recommend the movie for James Bond in Rome, Italy. Use only the context below context: ',string_agg(x.report, E'\n\n'))
            )
        )
    )->'choices'->0->'message'->>'content' AS summary,
    string_agg(x.report, E'\n\n') AS raw_descriptions
FROM (
    SELECT
        id,
        ' Title: ' || "Title" ||' Year: ' || "Release Year" ||  ' Plot: ' || Chunk AS report,
        embedding <=> ai.openai_embed(
            'text-embedding-3-small',
            'James Bond in Rome, Italy', -- your semantic query
            dimensions => 1536
        ) AS distance
    FROM
        movies_embeddings3
    -- WHERE station_name LIKE '%Bay%'
    ORDER BY
        distance
    LIMIT 10
) x;

"""
cursor.execute(query)
sql_results_to_df(cursor)

Unnamed: 0,summary,raw_descriptions
0,"I recommend the movie **""Spectre"" (2015)** for...",Title: Quantum of Solace Year: 2008 Plot: Jam...
