# Imports

In [1]:
import sqlite3
import pandas as pd

In [2]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

In [3]:
import re
import sqlparse

In [4]:
from IPython.display import Markdown, display

def print_markdown(text):
    display(Markdown(text))

In [59]:
from IPython.display import clear_output

def clear_notebook_output():
    clear_output(wait=True)

In [5]:
import warnings

# Suppress all warnings
warnings.filterwarnings('ignore')


# Load Vector Store

In [6]:
# Load FAISS index
vectorstore = FAISS.load_local(
    "faiss_index", 
    OpenAIEmbeddings(model="text-embedding-ada-002"), 
    allow_dangerous_deserialization=True
)

# Initialize the language model (chat model)
llm = ChatOpenAI(model="gpt-4o",
                max_tokens=None,
                timeout=None,
                max_retries=5)

# Setup RAG 

In [7]:
# Set up Retrieval Augmented Generation (RAG)
qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

In [8]:
def run_chat_query(query):
    return qa_chain.run(query)

In [9]:
run_chat_query("What is the main topic of the conference [<10 words]")

'Python programming, technology, and innovation in diverse fields.'

# Setup SQL Database

In [10]:
def run_sql_query(query, debug=False):
    """
    Run the sanitized SQL query using SQLite and return the result.
    Creates a fresh connection for each execution.
    """
    try:
        # Create a new connection each time
        conn = sqlite3.connect('data/conference_data.db')
        
        # Execute the query and fetch the result
        result = pd.read_sql_query(query, conn)

        if debug:
            print("Running SQL Query:")
            print(query)
            print(result)  
        return result
    except Exception as e:
        print(f"Error occurred: {e}")
        print("### QUERY ###")
        print(query)
        print("#############")
    finally:
        conn.close()  # Always close the connection after execution
    return None

In [11]:
query = '''
SELECT 
    m.name AS table_name, 
    p.name AS column_name, 
    p.type AS data_type
FROM 
    sqlite_master m 
JOIN 
    pragma_table_info(m.name) p 
WHERE 
    m.type = 'table'
ORDER BY 
    table_name, p.cid;


'''
run_sql_query(query)

Unnamed: 0,table_name,column_name,data_type
0,github,speaker_id,INTEGER
1,github,github_url,TEXT
2,github,username,TEXT
3,github,name,TEXT
4,github,public_repos,INTEGER
5,github,total_stars,INTEGER
6,github,most_starred_repo_name,TEXT
7,github,most_starred_repo_stars,INTEGER
8,github,last_commit_datetime,TEXT
9,github,hours_since_last_commit,REAL


In [12]:
query = '''
SELECT COUNT(*)
FROM talks t;
'''
run_sql_query(query)

Unnamed: 0,COUNT(*)
0,36


# Agent Prompts

In [13]:
question = "How many talks are there about artificial intelligence?"

In [14]:
def get_prompt_decide_LLM_or_SQL(question):
    return f'''
    You are an expert analyst who can decide whether it is better to rely on a database of documents and general reasoning or whether we should use a sql query to answer a question
        
    Your job is to decide whether the input QUESTION would be better answered with SQL or LLM.
    
    A question answered better with SQL will tend to be analytical or questions about numbers.
    
    A question better answered with an LLM that has documents in its context via RAG would be something like "what is alex conway's talk about"
    
    The QUESTION is:
    {question}
    
    When you believe it is better to answer with the LLM and the document store, reply "LLM" and nothing else.
    
    When you believe it is better to write a SQL query and check the database to answer the question then reply "SQL" and nothing else.
        
        IMPORTANT instructions:
        1) Only reply with a single word - either SQL or LLM
    
    
    Here is the table structure of the sql data:
    github table:
    speaker_id,github_url,username,name,public_repos,total_stars,most_starred_repo_name,most_starred_repo_stars,last_commit_datetime,hours_since_last_commit,followers,following,created_at,company,bio,location,blog,email
    8,https://github.com/sixfeetup,sixfeetup,Six Feet Up,127,210,scaf,79,2024-09-25T17:04:43+00:00,164.0354158066667,19,0,2011-09-08T20:39:11+00:00,,"Accelerating IMPACT with App Dev, AI & Big Data","Fishers, IN",http://www.sixfeetup.com,info@sixfeetup.com
    20,https://github.com/gijzelaerr,gijzelaerr,Gijs Molenaar,172,748,python-snap7,648,2024-09-17T16:20:59+00:00,356.8318311875,188,111,2010-07-08T11:22:39+00:00,spotify,"astro software composer, music brewer, beer programmer, senior system engineer @spotify",Amsterdam - Windhoek,http://pythonic.nl,gijsmolenaar@gmail.com
    21,https://github.com/luisdza,luisdza,Luis de Sousa,8,3,aerialmzansi-website,2,2024-08-22T13:24:29+00:00,983.7772850216667,23,112,2014-04-12T05:41:28+00:00,@Syeop,Creating and dreaming,"Johannesburg, South Africa",,
    ...
    
    speakers table:
    Speaker ID,Name,Profile URL,Photo URL,Twitter,GitHub,Fediverse,Bio Part 1,Bio Part 2,Bio Part 3
    1,Alex Conway,https://2024.za.pycon.org/users/alxcnwy/,No photo available,https://twitter.com/alxcnwy,https://github.com/alxcnwy,,Twitter Profile link:https://twitter.com/alxcnwy,Other Social:https://www.linkedin.com/in/alxcnwy/,github profile:https://github.com/alxcnwy
    2,Nunudzai Mrewa,https://2024.za.pycon.org/users/JustNunuz/,No photo available,,https://github.com/JustNunuz,,Twitter Profile link:https://x.com/_absolute_z3r0,Other Social:https://www.linkedin.com/in/nunuz/,github profile:https://github.com/JustNunuz
    3,Brenden Taylor,https://2024.za.pycon.org/users/brenden.taylor22/,No photo available,,,,"I am a data scientist at Praelexis, in Stellenbosch. I have mostly worked in financial services, specifically in time series forecasting in Python.",,
    ...
    
    talks table:
    Talk ID,Title,Abstract,Speaker Name,Speaker Profile URL,Talk URL,Speaker ID
    1,Applying AI with Python,"Artificial Intelligence (AI) is transforming industries and our lives by enabling computers to solve increasingly complex tasks, and Python stands at the forefront of this revolution. In this talk, we will explore how Python's rich ecosystem of AI libraries and frameworks can be harnessed to quickly solve real-world problems without needing a PhD. From computer vision to time series analysis to natural language processing and AI agents, this session will demonstrate practical AI applications in real-world use cases that leverage Python's versatility so you can get started using AI right away.
    ...
    
    topics table:
    Talk ID,Assigned Topic
    1,AI and Machine Learning
    2,Community and Tech Education
    3,Time Series Analysis
    4,AI and Machine Learning
    5,Natural Language Processing (NLP)
    6,Community and Tech Education
    7,Community and Tech Education
    8,Community and Tech Education
    ...
    
    
    '''

In [15]:
prompt_decide_LLM_or_SQL = get_prompt_decide_LLM_or_SQL(question)
run_chat_query(prompt_decide_LLM_or_SQL)

'SQL'

## SQL type of Query:

In [16]:
def get_prompt_write_SQL_query_for_question(question):
    return f'''
    You are a Senior SQL Programmer who can understand the complex table structures and form SQL queries as per the user's question.
        
    Your job is to write a SQL query that answers the question and return the SQL code for the query and nothing else.
    
    The QUESTION is:
    {question}
    
    Here is the table structure of the sql data:
    
       table_name              column_name data_type
    0      github               speaker_id   INTEGER
    1      github               github_url      TEXT
    2      github                 username      TEXT
    3      github                     name      TEXT
    4      github             public_repos   INTEGER
    5      github              total_stars   INTEGER
    6      github   most_starred_repo_name      TEXT
    7      github  most_starred_repo_stars   INTEGER
    8      github     last_commit_datetime      TEXT
    9      github  hours_since_last_commit      REAL
    10     github                followers   INTEGER
    11     github                following   INTEGER
    12     github               created_at      TEXT
    13     github                  company      TEXT
    14     github                      bio      TEXT
    15     github                 location      TEXT
    16     github                     blog      TEXT
    17     github                    email      TEXT
    18   speakers               speaker_id   INTEGER
    19   speakers                     name      TEXT
    20   speakers              profile_url      TEXT
    21   speakers                photo_url      TEXT
    22   speakers                  twitter      TEXT
    23   speakers                   github      TEXT
    24   speakers                fediverse      TEXT
    25   speakers               bio_part_1      TEXT
    26   speakers               bio_part_2      TEXT
    27   speakers               bio_part_3      TEXT
    28      talks                  talk_id   INTEGER
    29      talks                    title      TEXT
    30      talks                 abstract      TEXT
    31      talks             speaker_name      TEXT
    32      talks      speaker_profile_url      TEXT
    33      talks                 talk_url      TEXT
    34      talks               speaker_id   INTEGER
    35     topics                  talk_id   INTEGER
    36     topics           assigned_topic      TEXT
    
    
    
    Here is some smaple rows from the database:
    
    github table:
    speaker_id,github_url,username,name,public_repos,total_stars,most_starred_repo_name,most_starred_repo_stars,last_commit_datetime,hours_since_last_commit,followers,following,created_at,company,bio,location,blog,email
    8,https://github.com/sixfeetup,sixfeetup,Six Feet Up,127,210,scaf,79,2024-09-25T17:04:43+00:00,164.0354158066667,19,0,2011-09-08T20:39:11+00:00,,"Accelerating IMPACT with App Dev, AI & Big Data","Fishers, IN",http://www.sixfeetup.com,info@sixfeetup.com
    20,https://github.com/gijzelaerr,gijzelaerr,Gijs Molenaar,172,748,python-snap7,648,2024-09-17T16:20:59+00:00,356.8318311875,188,111,2010-07-08T11:22:39+00:00,spotify,"astro software composer, music brewer, beer programmer, senior system engineer @spotify",Amsterdam - Windhoek,http://pythonic.nl,gijsmolenaar@gmail.com
    21,https://github.com/luisdza,luisdza,Luis de Sousa,8,3,aerialmzansi-website,2,2024-08-22T13:24:29+00:00,983.7772850216667,23,112,2014-04-12T05:41:28+00:00,@Syeop,Creating and dreaming,"Johannesburg, South Africa",,
    ...
    
    speakers table:
    Speaker ID,Name,Profile URL,Photo URL,Twitter,GitHub,Fediverse,Bio Part 1,Bio Part 2,Bio Part 3
    1,Alex Conway,https://2024.za.pycon.org/users/alxcnwy/,No photo available,https://twitter.com/alxcnwy,https://github.com/alxcnwy,,Twitter Profile link:https://twitter.com/alxcnwy,Other Social:https://www.linkedin.com/in/alxcnwy/,github profile:https://github.com/alxcnwy
    2,Nunudzai Mrewa,https://2024.za.pycon.org/users/JustNunuz/,No photo available,,https://github.com/JustNunuz,,Twitter Profile link:https://x.com/_absolute_z3r0,Other Social:https://www.linkedin.com/in/nunuz/,github profile:https://github.com/JustNunuz
    3,Brenden Taylor,https://2024.za.pycon.org/users/brenden.taylor22/,No photo available,,,,"I am a data scientist at Praelexis, in Stellenbosch. I have mostly worked in financial services, specifically in time series forecasting in Python.",,
    ...
    
    talks table:
    Talk ID,Title,Abstract,Speaker Name,Speaker Profile URL,Talk URL,Speaker ID
    1,Applying AI with Python,"Artificial Intelligence (AI) is transforming industries and our lives by enabling computers to solve increasingly complex tasks, and Python stands at the forefront of this revolution. In this talk, we will explore how Python's rich ecosystem of AI libraries and frameworks can be harnessed to quickly solve real-world problems without needing a PhD. From computer vision to time series analysis to natural language processing and AI agents, this session will demonstrate practical AI applications in real-world use cases that leverage Python's versatility so you can get started using AI right away.
    ...
    
    topics table:
    Talk ID,Assigned Topic
    1,AI and Machine Learning
    2,Community and Tech Education
    3,Time Series Analysis
    4,AI and Machine Learning
    5,Natural Language Processing (NLP)
    6,Community and Tech Education
    7,Community and Tech Education
    8,Community and Tech Education
    ...

    When doing joins or where searches, make sure that you filter / join on an object that exists.
    For example the query 'SELECT COUNT(*)  FROM topics  WHERE "Assigned Topic" LIKE \'%AI%\'' is bad becauase AI isn't a topic, "Artificial Intelligence" is.

    Here is select talk_id, assigned_topic from topics:
1	AI and Machine Learning
2	Community and Tech Education
3	Time Series Analysis
4	AI and Machine Learning
5	Natural Language Processing (NLP)
6	Community and Tech Education
7	Community and Tech Education
8	Community and Tech Education
9	Python Libraries and Tools
10	Web Development and Frontend
11	Data Science and Decision Engines
12	Python Libraries and Tools
13	Python Libraries and Tools
14	Python Libraries and Tools
15	Data Quality
16	AI and Machine Learning
17	Event Streaming and Kafka
18	Community and Tech Education
19	Python Libraries and Tools
20	Robotics
21	Open Source and Software Development
22	Python Libraries and Tools
23	AI and Machine Learning
24	Python Libraries and Tools
25	IoT and MicroPython
26	Community and Tech Education
27	Natural Language Processing (NLP)
28	Python Libraries and Tools
29	Open Source and Software Development
30	Web Development and Frontend
31	Web Development and Frontend
32	Community and Tech Education
33	Community and Tech Education
34	Open Source and Software Development
35	Open Source and Software Development
36	Python Libraries and Tools


    Here are the speaker id and speaker name columns from the speakers table:
1	Alex Conway
2	Nunudzai Mrewa
3	Brenden Taylor
4	Ruan Pretorius
5	Alta Saunders
6	Schalk Venter
7	Mthetho Sovara
8	Roché Compaan
9	Shaun De Ponte
10	Sholto Armstrong
11	Du Toit Spies
12	Binjamin Barsch
13	Gordon Inggs
14	Ariella Rink
15	Jolanda Becker
16	Johan Beyers
17	Sheena O'Connell
18	Bruce Merry
19	:David Campey
20	Gijs Molenaar
21	Luis de Sousa
22	Rajdeep Singh
23	Sourav Saha
24	Jon Nordby
25	Adeline Makokha
26	Divya Rani
27	Rupali Kavale
28	Robson Kanhalelo
29	Cory Zue
30	Sheena O'Connell
31	Sheena O'Connell
32	Neil Muller
33	Marco Slaviero
34	Marco Slaviero
35	SARAO

here is select talk_id, speaker_id, title from talks:
1	1	Applying AI with Python
2	2	Harnessing the Power of Community: Lessons from Speedrunning for the Python Ecosystem and Beyond
3	3	It’s About Time: Time-Series Forecasting with Darts
4	4	Monitoring and Evaluating LLM Apps with Langfuse
5	5	Leveraging the NLTK library for Translation: A Case Study of Dyula-French Translation
6	6	Creating Personalised Images with Python's Stable Diffusion
7	6	Mental Illness and Vulnerability in Tech
8	7	Bridging Language Barriers: Making Programming Education Accessible to All
9	8	Level up your developer experience with Kubernetes
10	9	A Novel Way of Creating Multi-Tenant Apps using Django... No Third Party Packages Apply!
11	10	Building a Decisioning Engine for Data Scientists: A Practical Guide
12	11	Implementing an HPy Backend for Cython
13	12	Streamlining HPC Operations: Integrating Django Helpdesk in Resource-Constrained Environments
14	13	You don't need a data service, you just need an object store and some JSON files
15	14	Great Expectations About Data Quality
16	15	Made you look: Using Siamese Neural Networks for Building Change Detection at the City of Cape Town
17	16	Kafka in practice: Lessons learned at Takealot
18	17	Power to the people who teach the people to code
19	18	Adventures in Garbage Collection
20	19	& robotics for all!
21	20	Maintaining a PLC communication library called python-snap7 without owning a PLC
22	21	Duck, Duck, Python: OLAP Data with DuckDB
23	22	Differentiation Engines: The Elves behind the AI Christmas
24	23	Accelerate your pandas workload using FireDucks at zero manual effort
25	24	Sensor data processing on microcontrollers with MicroPython
26	25	Building a Thriving Tech Community
27	26	Building a Code Search Engine: Using NLP to Find Similar Methods Across Libraries
28	27	Beyond the Pause: Exploring the Inner Workings of Python’s `sleep()`
29	28	Exploring the Adoption & Role of Open Source Software in Schools
30	29	Using Coding Skills to Make Passive Income
31	30	Modern web frontend development with Python, HTMX and friends
32	31	Modern web frontend development with Python, HTMX and friends (part 2)
33	32	PyCon Africa & PyConZA 2025 discussion
34	33	Community & Contributions
35	34	Professional Software Isn’t
36	35	Software Development at the South African Radio Astronomy Observatory

here is select speaker_id, public_repos, total_stars, most_starred_repo_stars,  followers, created_at from github:
8	127	210	79	19	2011-09-08T20:39:11+00:00
20	172	748	648	188	2010-07-08T11:22:39+00:00
21	8	3	2	23	2014-04-12T05:41:28+00:00
22	56	51	12	63	2015-03-13T17:19:37+00:00
32	30	5	1	7	2011-02-28T09:19:10+00:00
26	190	17	4	134	2016-08-24T09:28:22+00:00
25	39	0	0	13	2021-06-08T13:20:20+00:00
31	76	22	6	77	2011-03-30T11:39:05+00:00
17	76	22	6	77	2011-03-30T11:39:05+00:00
29	67	1016	464	343	2009-03-24T15:34:08+00:00
30	76	22	6	77	2011-03-30T11:39:05+00:00
24	2	546	483	23	2018-07-18T12:41:12+00:00
13	44	20	4	37	2018-07-03T06:38:25+00:00
27	75	3	3	4	2017-07-29T09:42:56+00:00
28	54	2	1	10	2019-02-27T16:55:09+00:00
1	18	104	44	44	2014-09-02T18:24:14+00:00
16	42	57	36	15	2010-06-28T10:04:31+00:00
2	10	0	0	4	2019-12-23T09:07:22+00:00
11	11	1	1	0	2021-03-31T11:46:28+00:00
12	74	7	2	6	2017-06-23T19:03:23+00:00
6	171	165	17	365	2015-09-13T08:55:34+00:00
10	27	131	112	20	2018-12-29T20:55:43+00:00
9	16	4	2	5	2018-06-12T12:05:02+00:00
4	71	112	68	19	2019-12-05T13:17:03+00:00


IMPORTANT instructions:
1) Use JOINs if necessary, check the table and its usable columns, if using JOIN do it with extra CAUTION
2) Dont forget to apply inverted commas when required.
3) Give no explanation and directly provide answers.
4) Take care when joining tables, use right set of table-column configuration
5) When asked a question, consider slight variations / broader queries where relevant e.g. if the query is asking about talks on the topic of "community", a like statement would not be appropriate because talks about "Open Source and Software Development" should also be included and not just topics that contain the word community. I have provided all topics below.
6) When asked a question about one of the tables e.g. github / speakers / talks / topics - first consider querying the table directly 
7) When asked a question that requires calculation, work directly with the table most related to the question and first consider which columns to work with e.g. counting how many speakers have github accounts will require doing a count(*) on the github table, while identifying who has the most stars on github will require sorting on star count with limit 1. If you needed to find which speaker has the newest github you would query with something like SELECT s.name, g.created_at FROM github g JOIN speakers s ON g.speaker_id = s.speaker_id ORDER BY g.created_at DESC LIMIT 1;
8) Do not select frin pragma_table_info so do not return queries like: SELECT      m.name AS table_name,      p.name AS column_name,      p.type AS data_type FROM      sqlite_master m  JOIN      pragma_table_info(m.name) p  WHERE      m.type = 'table' ORDER BY      table_name, p.cid
9) Be extra careful to only use columns that are available in where and join and group by clauses
10) Feel free to use group by, limit, join where needed
11) Come up with a sql query that best answers the question: {question}
12) Return only a SQL query that answers the question and nothing else

    
    '''

In [17]:
prompt_write_SQL_query_for_question = get_prompt_write_SQL_query_for_question(question)

In [18]:
sql_query = run_chat_query(prompt_write_SQL_query_for_question)
print(sql_query)

```sql
SELECT COUNT(*) 
FROM talks 
JOIN topics ON talks.talk_id = topics.talk_id 
WHERE topics.assigned_topic LIKE '%AI and Machine Learning%'
```


In [19]:
def sanitize_sql_query(query):
    """
    Sanitize the SQL query to ensure it's safe and valid for execution.
    """

    query = query.replace("```sql\n","").replace("\n``","").replace("\n"," ").replace("```","")
    query = query.replace("```sql ","").replace("sql ","").replace("\\'","'")
    query = query.replace("`","")
    query = query.replace(";`", "")
    
    # Remove line breaks and extra spaces
    query = query.replace("\n", " ").replace("\r", "").strip()
    
    # Remove any trailing semicolon
    if query.endswith(";"):
        query = query[:-1]
    # Remove any trailing semicolon
    if query.endswith(";"):
        query = query[:-1]
    
    return query


In [20]:
# scrub sqlquery
sql_query = sanitize_sql_query(sql_query)
sql_query

"SELECT COUNT(*)  FROM talks  JOIN topics ON talks.talk_id = topics.talk_id  WHERE topics.assigned_topic LIKE '%AI and Machine Learning%'"

In [21]:
# query sql database with generated query
sql_results = run_sql_query(sql_query)

In [22]:
def get_prompt_analyze_sql(question, sql_results):
    return f'''
    You are an expert analyst who can answer questions using supporting data which was returned from a SQL query on a dataset about the conference.
        
    Your job is to answer the input QUESTION, using the SQL_RESULTS in your answer and identifying any insights or trends you can from the results of the SQL query.
    
    The QUESTION is:
    {question}
    
    The SQL_RESULTS are:
    {sql_results}
    
    Important instructions
    * return your answer in markdown
    * be concise
    * do not reference the SQL database or use the term SQL unless it's from a part of the data itself
    * try to make points with short bullet points
    * have a separate insights section with a markdown h2 and bullets for any insights you can deduce from the sql_results with respect to the question
    * return the sql resutlts dataset as part of your response
    
    '''

In [34]:
prompt_analyze_sql = get_prompt_analyze_sql(question, sql_results)
print_markdown(run_chat_query(prompt_analyze_sql))

- There are a total of 4 talks about artificial intelligence.

### Insights
- The number of AI-related talks suggests a significant interest in the topic at the conference.
- AI appears to be a major area of focus, likely reflecting its growing importance across various industries.
- Attendees interested in AI have multiple sessions to choose from, indicating a diverse range of subtopics or applications within AI.

### SQL Results Dataset
|   COUNT(*) |
|------------|
|          4 |

> Works well!

## LLM Type of Query:

In [25]:
def get_prompt_analyze_llm(question):
    return f'''
    You are an expert analyst who can answer questions using supporting documents.
        
    Your job is to answer the input QUESTION, using the documents in your answer and identifying any insights.
    
    The QUESTION is:
    {question}
    
    Important instructions
    * answer in markdown
    * be concise and try to make points with short bullet points
    * answer only from the context from the documents, do not make up answers that are not supported by data
    
    '''

In [26]:
print(question)

How many talks are there about artificial intelligence?


In [50]:
prompt_analyze_llm = get_prompt_analyze_llm(question)
answer = run_chat_query(prompt_analyze_llm)
print_markdown(answer)

Based on the provided context, there are two talks related to artificial intelligence:

- **Talk ID: 1**
  - Title: Applying AI with Python
  - Speaker: Alex Conway
  - Focus: Exploring how Python's AI libraries can solve real-world problems.

- **Talk ID: 11**
  - Title: Building a Decisioning Engine for Data Scientists: A Practical Guide
  - Speaker: Sholto Armstrong
  - Focus: Introducing a new open-source decisioning framework for predictive decision-making using AI.

These talks cover various aspects of AI application and decision-making frameworks in Python.

> fails as expected

# Does the answer actaully answer the question?

In [47]:
def get_prompt_check_answer(question, answer):
    return f'''
    You are an expert analyst who can check if a given question is answered adequately by a given answer.
        
    Your job is to check if the input ANSWER actually answers the input QUESTION and answer YES or NO.
    
    The QUESTION is:
    {question}

    The ANSWER is:
    {answer}
    
    Important instructions
    * respond with only YES or NO
    * if you are not sure then lean towards "YES"
    
    '''

In [52]:
print (answer)

Based on the provided context, there are two talks related to artificial intelligence:

- **Talk ID: 1**
  - Title: Applying AI with Python
  - Speaker: Alex Conway
  - Focus: Exploring how Python's AI libraries can solve real-world problems.

- **Talk ID: 11**
  - Title: Building a Decisioning Engine for Data Scientists: A Practical Guide
  - Speaker: Sholto Armstrong
  - Focus: Introducing a new open-source decisioning framework for predictive decision-making using AI.

These talks cover various aspects of AI application and decision-making frameworks in Python.


In [53]:
print(question)

How many talks are there about artificial intelligence?


In [54]:
prompt_analyze_llm = get_prompt_check_answer(question, answer)
answers_question = run_chat_query(prompt_analyze_llm)
answers_question

'YES'

# Rewrite the question

In [55]:
def get_prompt_rewrite_question(question):
    return f'''
    You are an expert analyst who can answer questions using supporting documents.
        
    Your job is to rewrite the input question to be more understandable
    
    The QUESTION is:
    {question}
    
    Important instructions
    * respond with only a rewritten question 
    * your response must end in a question mark
    * answer only from the context from the documents, do not make up answers that are not supported by data
    
    '''

In [56]:
prompt_analyze_llm = get_prompt_rewrite_question(question)
print_markdown(run_chat_query(prompt_analyze_llm))

How many presentations focus on artificial intelligence?

# Write function to run pipeline

In [64]:
def agent(question, max_retries=3):
    max_retries = 2
    attempt = 0
    answers_question = "no"
    while answers_question.lower() !=  "yes" and attempt < max_retries:  
        # logging
        attempt += 1
        if attempt > 1:
            print(f"> Rewriting question {attempt}: {question}")
            clear_notebook_output()

        # figure out if SQL or LLM question
        prompt_decide_LLM_or_SQL = get_prompt_decide_LLM_or_SQL(question)
        question_type = run_chat_query(prompt_decide_LLM_or_SQL)
        
        # reasoning
        if question_type == "SQL":
            print("Using SQL to inform answer")
            # write sql query
            prompt_write_SQL_query_for_question = get_prompt_write_SQL_query_for_question(question)
            sql_query = run_chat_query(prompt_write_SQL_query_for_question)
            
            # scrub sqlquery
            sql_query = sanitize_sql_query(sql_query)
    
            print(f"> Running SQL Query: {sql_query}")
            
            # query sql database with generated query
            sql_results = run_sql_query(sql_query)
    
            # construct answer with insights from the data
            prompt_analyze_sql = get_prompt_analyze_sql(question, sql_results)
            
            answer = print_markdown(run_chat_query(prompt_analyze_sql))
        else:
            print("Using RAG to inform answer")
            if debug:
                print("LLM query type")
            prompt_analyze_llm = get_prompt_analyze_llm(question)
            answer = print_markdown(run_chat_query(prompt_analyze_llm))
    
        # check if answer actually answers the question
        prompt_analyze_llm = get_prompt_check_answer(question, answer)
        answers_question = str(run_chat_query(prompt_analyze_llm))

        # rewrite question
        prompt_analyze_llm = get_prompt_rewrite_question(question)
        question = run_chat_query(prompt_analyze_llm)
    return answer
        

In [63]:
agent("How many talks are there about artificial intelligence?")

Using SQL to inform answer
> Running SQL Query: SELECT COUNT(*)  FROM talks  WHERE talk_id IN (     SELECT talk_id      FROM topics      WHERE assigned_topic = 'AI and Machine Learning' )


- There are 4 talks about artificial intelligence.

|   COUNT(*) |
|------------|
|          4 |

## Insights
- AI is a significant topic at the conference, with multiple sessions dedicated to it.
- The focus on AI may indicate a strong interest or demand for knowledge in this area among attendees.
- The presence of these talks suggests the importance and relevance of AI in current technological discussions and advancements.

In [65]:
agent("Which speaker has the oldest github account?")

Using SQL to inform answer
> Running SQL Query: SELECT s.name, g.created_at  FROM github g  JOIN speakers s ON g.speaker_id = s.speaker_id  ORDER BY g.created_at ASC  LIMIT 1


**Answer:**

- The speaker who created their GitHub account first is Cory Zue.

**SQL Results:**

| name    | created_at              |
|---------|-------------------------|
| Cory Zue | 2009-03-24T15:34:08+00:00 |

## Insights

- **Early Adopter**: Cory Zue established his GitHub account in 2009, indicating he was an early adopter of the platform.
- **Potential Experience**: Having been on GitHub for a long time, Cory Zue likely has extensive experience in using the platform for collaborative software development.

In [None]:
agent("What is the most popular topic?")

Using SQL to inform answer
> Running SQL Query: SELECT assigned_topic, COUNT(*) AS topic_count FROM topics GROUP BY assigned_topic ORDER BY topic_count DESC LIMIT 1


In [None]:
agent("Which 5 talks should I go to if I am interested in data science?")

In [None]:
agent("What is the main topic of the conference?")

In [None]:
agent("Which speaker has the most active github account?")